From c7f1cf1d58cf50118c18b1afc4c938eafba81492 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Fri, 13 Apr 2007 13:59:31 +0000
Subject: Remove most of the special handling for delay slots since they have
 to be squashed anyway on a mispredict. This is because the NNPC value they
 saw when executing was incorrect.

--HG--
extra : convert_revision : b42c4eb28b4fbba66c65cbd0a5033bf886c1532d
---
 src/cpu/base_dyn_inst.hh      |  2 ++
 src/cpu/o3/comm.hh            |  5 ---
 src/cpu/o3/commit_impl.hh     | 75 -------------------------------------------
 src/cpu/o3/cpu.cc             | 12 ++-----
 src/cpu/o3/cpu.hh             |  3 +-
 src/cpu/o3/decode_impl.hh     | 65 +------------------------------------
 src/cpu/o3/fetch.hh           |  3 +-
 src/cpu/o3/fetch_impl.hh      | 33 +++----------------
 src/cpu/o3/iew.hh             |  3 --
 src/cpu/o3/iew_impl.hh        | 74 ++++--------------------------------------
 src/cpu/o3/inst_queue_impl.hh |  4 ---
 src/cpu/o3/rename_impl.hh     | 49 ++--------------------------
 12 files changed, 20 insertions(+), 308 deletions(-)
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index eed05c2f1..b02038b3e 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -709,7 +709,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Set the next NPC of this instruction (the target in Mips or Sparc).*/
     void setNextNPC(uint64_t val)
     {
+#if ISA_HAS_DELAY_SLOT
         nextNPC = val;
+#endif
     }
 
     /** Sets the ASID. */
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index d96919007..8d7bb95f4 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -87,7 +87,6 @@ struct DefaultIEWDefaultCommit {
     bool squash[Impl::MaxThreads];
     bool branchMispredict[Impl::MaxThreads];
     bool branchTaken[Impl::MaxThreads];
-    bool squashDelaySlot[Impl::MaxThreads];
     uint64_t mispredPC[Impl::MaxThreads];
     uint64_t nextPC[Impl::MaxThreads];
     uint64_t nextNPC[Impl::MaxThreads];
@@ -114,7 +113,6 @@ struct TimeBufStruct {
         uint64_t branchAddr;
 
         InstSeqNum doneSeqNum;
-        InstSeqNum bdelayDoneSeqNum;
 
         // @todo: Might want to package this kind of branch stuff into a single
         // struct as it is used pretty frequently.
@@ -169,9 +167,6 @@ struct TimeBufStruct {
         // retired or squashed sequence number.
         InstSeqNum doneSeqNum;
 
-        InstSeqNum bdelayDoneSeqNum;
-        bool squashDelaySlot;
-
         //Just in case we want to do a commit/squash on a cycle
         //(necessary for multiple ROBs?)
         bool commitInsts;
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 65625065d..9dd5ed291 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -741,38 +741,15 @@ DefaultCommit<Impl>::commit()
             // then use one older sequence number.
             InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
 
-#if ISA_HAS_DELAY_SLOT
-            InstSeqNum bdelay_done_seq_num = squashed_inst;
-            bool squash_bdelay_slot = fromIEW->squashDelaySlot[tid];
-            bool branchMispredict = fromIEW->branchMispredict[tid];
-
-            // Squashing/not squashing the branch delay slot only makes
-            // sense when you're squashing from a branch, ie from a branch
-            // mispredict.
-            if (branchMispredict && !squash_bdelay_slot) {
-                bdelay_done_seq_num++;
-            }
-#endif
-
             if (fromIEW->includeSquashInst[tid] == true) {
                 squashed_inst--;
-#if ISA_HAS_DELAY_SLOT
-                bdelay_done_seq_num--;
-#endif
             }
 
             // All younger instructions will be squashed. Set the sequence
             // number as the youngest instruction in the ROB.
             youngestSeqNum[tid] = squashed_inst;
 
-#if ISA_HAS_DELAY_SLOT
-            rob->squash(bdelay_done_seq_num, tid);
-            toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot;
-            toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num;
-#else
             rob->squash(squashed_inst, tid);
-            toIEW->commitInfo[tid].squashDelaySlot = true;
-#endif
             changedROBNumEntries[tid] = true;
 
             toIEW->commitInfo[tid].doneSeqNum = squashed_inst;
@@ -809,10 +786,6 @@ DefaultCommit<Impl>::commit()
 
         // Try to commit any instructions.
         commitInsts();
-    } else {
-#if ISA_HAS_DELAY_SLOT
-        skidInsert();
-#endif
     }
 
     //Check for any activity
@@ -1164,37 +1137,13 @@ DefaultCommit<Impl>::getInsts()
 {
     DPRINTF(Commit, "Getting instructions from Rename stage.\n");
 
-#if ISA_HAS_DELAY_SLOT
-    // Read any renamed instructions and place them into the ROB.
-    int insts_to_process = std::min((int)renameWidth,
-                               (int)(fromRename->size + skidBuffer.size()));
-    int rename_idx = 0;
-
-    DPRINTF(Commit, "%i insts available to process. Rename Insts:%i "
-            "SkidBuffer Insts:%i\n", insts_to_process, fromRename->size,
-            skidBuffer.size());
-#else
     // Read any renamed instructions and place them into the ROB.
     int insts_to_process = std::min((int)renameWidth, fromRename->size);
-#endif
-
 
     for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) {
         DynInstPtr inst;
 
-#if ISA_HAS_DELAY_SLOT
-        // Get insts from skidBuffer or from Rename
-        if (skidBuffer.size() > 0) {
-            DPRINTF(Commit, "Grabbing skidbuffer inst.\n");
-            inst = skidBuffer.front();
-            skidBuffer.pop();
-        } else {
-            DPRINTF(Commit, "Grabbing rename inst.\n");
-            inst = fromRename->insts[rename_idx++];
-        }
-#else
         inst = fromRename->insts[inst_num];
-#endif
         int tid = inst->threadNumber;
 
         if (!inst->isSquashed() &&
@@ -1216,30 +1165,6 @@ DefaultCommit<Impl>::getInsts()
                     inst->readPC(), inst->seqNum, tid);
         }
     }
-
-#if ISA_HAS_DELAY_SLOT
-    if (rename_idx < fromRename->size) {
-        DPRINTF(Commit,"Placing Rename Insts into skidBuffer.\n");
-
-        for (;
-             rename_idx < fromRename->size;
-             rename_idx++) {
-            DynInstPtr inst = fromRename->insts[rename_idx];
-
-            if (!inst->isSquashed()) {
-                DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ",
-                        "skidBuffer.\n", inst->readPC(), inst->seqNum,
-                        inst->threadNumber);
-                skidBuffer.push(inst);
-            } else {
-                DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
-                        "squashed, skipping.\n",
-                        inst->readPC(), inst->seqNum, inst->threadNumber);
-            }
-        }
-    }
-#endif
-
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 2e6a43f9c..b2b4645d2 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -696,7 +696,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
 
     // Squash Throughout Pipeline
     InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
-    fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid);
+    fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, tid);
     decode.squash(tid);
     rename.squash(squash_seq_num, tid);
     iew.squash(tid);
@@ -1226,9 +1226,7 @@ FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst)
 
 template <class Impl>
 void
-FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid,
-                                     bool squash_delay_slot,
-                                     const InstSeqNum &delay_slot_seq_num)
+FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid)
 {
     DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction"
             " list.\n", tid);
@@ -1259,12 +1257,6 @@ FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid,
     while (inst_it != end_it) {
         assert(!instList.empty());
 
-#if ISA_HAS_DELAY_SLOT
-        if(!squash_delay_slot &&
-           delay_slot_seq_num >= (*inst_it)->seqNum) {
-            break;
-        }
-#endif
         squashInstIt(inst_it, tid);
 
         inst_it--;
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index e71d05c8e..4b247e6e3 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -468,8 +468,7 @@ class FullO3CPU : public BaseO3CPU
 
     /** Remove all instructions that are not currently in the ROB.
      *  There's also an option to not squash delay slot instructions.*/
-    void removeInstsNotInROB(unsigned tid, bool squash_delay_slot,
-                             const InstSeqNum &delay_slot_seq_num);
+    void removeInstsNotInROB(unsigned tid);
 
     /** Remove all instructions younger than the given sequence number. */
     void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid);
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 314864f94..c9d0a1885 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -49,8 +49,6 @@ DefaultDecode<Impl>::DefaultDecode(O3CPU *_cpu, Params *params)
         stalls[i].rename = false;
         stalls[i].iew = false;
         stalls[i].commit = false;
-
-        squashAfterDelaySlot[i] = false;
     }
 
     // @todo: Make into a parameter
@@ -278,17 +276,12 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
 #if ISA_HAS_DELAY_SLOT
     toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
         (inst->readNextPC() + sizeof(TheISA::MachInst));
-
-    toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid];
-    squashAfterDelaySlot[tid] = false;
-
-    InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid];
 #else
     toFetch->decodeInfo[tid].branchTaken =
         inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
+#endif
 
     InstSeqNum squash_seq_num = inst->seqNum;
-#endif
 
     // Might have to tell fetch to unblock.
     if (decodeStatus[tid] == Blocked ||
@@ -309,30 +302,10 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
     // Clear the instruction list and skid buffer in case they have any
     // insts in them.
     while (!insts[tid].empty()) {
-
-#if ISA_HAS_DELAY_SLOT
-        if (insts[tid].front()->seqNum <= squash_seq_num) {
-            DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode "
-                    "instructions before delay slot [sn:%i]. %i insts"
-                    "left in decode.\n", tid, squash_seq_num,
-                    insts[tid].size());
-            break;
-        }
-#endif
         insts[tid].pop();
     }
 
     while (!skidBuffer[tid].empty()) {
-
-#if ISA_HAS_DELAY_SLOT
-        if (skidBuffer[tid].front()->seqNum <= squash_seq_num) {
-            DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer "
-                    "instructions before delay slot [sn:%i]. %i insts"
-                    "left in decode.\n", tid, squash_seq_num,
-                    insts[tid].size());
-            break;
-        }
-#endif
         skidBuffer[tid].pop();
     }
 
@@ -760,48 +733,12 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
 
                 // Might want to set some sort of boolean and just do
                 // a check at the end
-#if !ISA_HAS_DELAY_SLOT
                 squash(inst, inst->threadNumber);
                 Addr target = inst->branchTarget();
                 inst->setPredTarg(target, target + sizeof(TheISA::MachInst));
                 break;
-#else
-                // If mispredicted as taken, then ignore delay slot
-                // instruction... else keep delay slot and squash
-                // after it is sent to rename
-                if (inst->readPredTaken() && inst->isCondDelaySlot()) {
-                    DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst."
-                            "[sn:%i] PC %#x mispredicted as taken.\n", tid,
-                            inst->seqNum, inst->PC);
-                    bdelayDoneSeqNum[tid] = inst->seqNum;
-                    squash(inst, inst->threadNumber);
-                    Addr target = inst->branchTarget();
-                    inst->setPredTarg(target,
-                            target + sizeof(TheISA::MachInst));
-                    break;
-                } else {
-                    DPRINTF(Decode, "[tid:%i]: Misprediction detected at "
-                            "[sn:%i] PC %#x, will squash after delay slot "
-                            "inst. is sent to Rename\n",
-                            tid, inst->seqNum, inst->PC);
-                    bdelayDoneSeqNum[tid] = inst->seqNum + 1;
-                    squashAfterDelaySlot[tid] = true;
-                    squashInst[tid] = inst;
-                    continue;
-                }
-#endif
             }
         }
-
-        if (squashAfterDelaySlot[tid]) {
-            assert(!inst->isSquashed());
-            squash(squashInst[tid], squashInst[tid]->threadNumber);
-            Addr target = squashInst[tid]->branchTarget();
-            squashInst[tid]->setPredTarg(target,
-                    target + sizeof(TheISA::MachInst));
-            assert(!inst->isSquashed());
-            break;
-        }
     }
 
     // If we didn't process all instructions, then we will need to block
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 241935416..bb0057e7c 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -263,8 +263,7 @@ class DefaultFetch
      * squash should be the commit stage.
      */
     void squash(const Addr &new_PC, const Addr &new_NPC,
-                const InstSeqNum &seq_num,
-                bool squash_delay_slot, unsigned tid);
+                const InstSeqNum &seq_num, unsigned tid);
 
     /** Ticks the fetch stage, processing all inputs signals and fetching
      * as many instructions as possible.
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index e16f97558..25498c7f3 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -774,20 +774,14 @@ DefaultFetch<Impl>::updateFetchStatus()
 template <class Impl>
 void
 DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
-                           const InstSeqNum &seq_num,
-                           bool squash_delay_slot, unsigned tid)
+                           const InstSeqNum &seq_num, unsigned tid)
 {
     DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
 
     doSquash(new_PC, new_NPC, tid);
 
-#if ISA_HAS_DELAY_SLOT
-    // Tell the CPU to remove any instructions that are not in the ROB.
-    cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
-#else
     // Tell the CPU to remove any instructions that are not in the ROB.
-    cpu->removeInstsNotInROB(tid, true, 0);
-#endif
+    cpu->removeInstsNotInROB(tid);
 }
 
 template <class Impl>
@@ -896,17 +890,10 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
 
         DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
                 "from commit.\n",tid);
-
-#if ISA_HAS_DELAY_SLOT
-    InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
-#else
-    InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum;
-#endif
         // In any case, squash.
         squash(fromCommit->commitInfo[tid].nextPC,
                fromCommit->commitInfo[tid].nextNPC,
-               doneSeqNum,
-               fromCommit->commitInfo[tid].squashDelaySlot,
+               fromCommit->commitInfo[tid].doneSeqNum,
                tid);
 
         // Also check if there's a mispredict that happened.
@@ -955,18 +942,13 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
 
         if (fetchStatus[tid] != Squashing) {
 
-#if ISA_HAS_DELAY_SLOT
-            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum;
-#else
-            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
-#endif
             DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n",
                     fromDecode->decodeInfo[tid].nextPC,
                     fromDecode->decodeInfo[tid].nextNPC);
             // Squash unless we're already squashing
             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
                              fromDecode->decodeInfo[tid].nextNPC,
-                             doneSeqNum,
+                             fromDecode->decodeInfo[tid].doneSeqNum,
                              tid);
 
             return true;
@@ -1157,9 +1139,6 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                                      instruction->readPC());
 
             ///FIXME This needs to be more robust in dealing with delay slots
-#if !ISA_HAS_DELAY_SLOT
-//	    predicted_branch |=
-#endif
             lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
             predicted_branch |= (next_PC != fetch_NPC);
 
@@ -1213,11 +1192,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         PC[tid] = next_PC;
         nextPC[tid] = next_NPC;
         nextNPC[tid] = next_NPC + instSize;
-#if ISA_HAS_DELAY_SLOT
-        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
-#else
         DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
-#endif
     } else {
         // We shouldn't be in an icache miss and also have a fault (an ITB
         // miss)
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index ce2991cfb..eef5a15d2 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -402,9 +402,6 @@ class DefaultIEW
     /** Records if there is a fetch redirect on this cycle for each thread. */
     bool fetchRedirect[Impl::MaxThreads];
 
-    /** Keeps track of the last valid branch delay slot instss for threads */
-    InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads];
-
     /** Used to track if all instructions have been dispatched this cycle.
      * If they have not, then blocking must have occurred, and the instructions
      * would already be added to the skid buffer.
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 62e656e93..050785818 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -69,7 +69,6 @@ DefaultIEW<Impl>::DefaultIEW(O3CPU *_cpu, Params *params)
         dispatchStatus[i] = Running;
         stalls[i].commit = false;
         fetchRedirect[i] = false;
-        bdelayDoneSeqNum[i] = 0;
     }
 
     wbMax = wbWidth * params->wbDepth;
@@ -410,31 +409,14 @@ DefaultIEW<Impl>::squash(unsigned tid)
     instQueue.squash(tid);
 
     // Tell the LDSTQ to start squashing.
-#if ISA_HAS_DELAY_SLOT
-    ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid);
-#else
     ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
-#endif
     updatedQueues = true;
 
     // Clear the skid buffer in case it has any data in it.
     DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n",
-            tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum);
+            tid, fromCommit->commitInfo[tid].doneSeqNum);
 
     while (!skidBuffer[tid].empty()) {
-#if ISA_HAS_DELAY_SLOT
-        if (skidBuffer[tid].front()->seqNum <=
-            fromCommit->commitInfo[tid].bdelayDoneSeqNum) {
-            DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions "
-                    "that occur before delay slot [sn:%i].\n",
-                    fromCommit->commitInfo[tid].bdelayDoneSeqNum,
-                    tid);
-            break;
-        } else {
-            DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from "
-                    "skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum);
-        }
-#endif
         if (skidBuffer[tid].front()->isLoad() ||
             skidBuffer[tid].front()->isStore() ) {
             toRename->iewInfo[tid].dispatchedToLSQ++;
@@ -445,8 +427,6 @@ DefaultIEW<Impl>::squash(unsigned tid)
         skidBuffer[tid].pop();
     }
 
-    bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
-
     emptyRenameInsts(tid);
 }
 
@@ -462,38 +442,18 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
     toCommit->mispredPC[tid] = inst->readPC();
     toCommit->branchMispredict[tid] = true;
 
-    int instSize = sizeof(TheISA::MachInst);
 #if ISA_HAS_DELAY_SLOT
-    bool branch_taken =
+    int instSize = sizeof(TheISA::MachInst);
+    toCommit->branchTaken[tid] =
         !(inst->readNextPC() + instSize == inst->readNextNPC() &&
           (inst->readNextPC() == inst->readPC() + instSize ||
            inst->readNextPC() == inst->readPC() + 2 * instSize));
-    DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n",
-            branch_taken ? "true": "false", inst->seqNum);
-
-    toCommit->branchTaken[tid] = branch_taken;
-
-    bool squashDelaySlot = true;
-//	(inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
-    DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n",
-            squashDelaySlot ? "true": "false", inst->seqNum);
-    toCommit->squashDelaySlot[tid] = squashDelaySlot;
-    //If we're squashing the delay slot, we need to pick back up at NextPC.
-    //Otherwise, NextPC isn't being squashed, so we should pick back up at
-    //NextNPC.
-    if (squashDelaySlot) {
-        toCommit->nextPC[tid] = inst->readNextPC();
-        toCommit->nextNPC[tid] = inst->readNextNPC();
-    } else {
-        toCommit->nextPC[tid] = inst->readNextNPC();
-        toCommit->nextNPC[tid] = inst->readNextNPC() + instSize;
-    }
 #else
     toCommit->branchTaken[tid] = inst->readNextPC() !=
         (inst->readPC() + sizeof(TheISA::MachInst));
-    toCommit->nextPC[tid] = inst->readNextPC();
-    toCommit->nextNPC[tid] = inst->readNextPC() + instSize;
 #endif
+    toCommit->nextPC[tid] = inst->readNextPC();
+    toCommit->nextNPC[tid] = inst->readNextNPC();
 
     toCommit->includeSquashInst[tid] = false;
 
@@ -510,11 +470,7 @@ DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, unsigned tid)
     toCommit->squash[tid] = true;
     toCommit->squashedSeqNum[tid] = inst->seqNum;
     toCommit->nextPC[tid] = inst->readNextPC();
-#if ISA_HAS_DELAY_SLOT
     toCommit->nextNPC[tid] = inst->readNextNPC();
-#else
-    toCommit->nextNPC[tid] = inst->readNextPC() + sizeof(TheISA::MachInst);
-#endif
     toCommit->branchMispredict[tid] = false;
 
     toCommit->includeSquashInst[tid] = false;
@@ -532,11 +488,7 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
     toCommit->squash[tid] = true;
     toCommit->squashedSeqNum[tid] = inst->seqNum;
     toCommit->nextPC[tid] = inst->readPC();
-#if ISA_HAS_DELAY_SLOT
     toCommit->nextNPC[tid] = inst->readNextPC();
-#else
-    toCommit->nextNPC[tid] = inst->readPC() + sizeof(TheISA::MachInst);
-#endif
     toCommit->branchMispredict[tid] = false;
 
     // Must include the broadcasted SN in the squash.
@@ -880,10 +832,8 @@ DefaultIEW<Impl>::sortInsts()
 {
     int insts_from_rename = fromRename->size;
 #ifdef DEBUG
-#if !ISA_HAS_DELAY_SLOT
     for (int i = 0; i < numThreads; i++)
         assert(insts[i].empty());
-#endif
 #endif
     for (int i = 0; i < insts_from_rename; ++i) {
         insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]);
@@ -894,21 +844,9 @@ template <class Impl>
 void
 DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
 {
-    DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until "
-            "[sn:%i].\n", tid, bdelayDoneSeqNum[tid]);
+    DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions\n", tid);
 
     while (!insts[tid].empty()) {
-#if ISA_HAS_DELAY_SLOT
-        if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) {
-            DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction"
-                    " that occurs at or before delay slot [sn:%i].\n",
-                    tid, bdelayDoneSeqNum[tid]);
-            break;
-        } else {
-            DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction "
-                    "[sn:%i].\n", tid, insts[tid].front()->seqNum);
-        }
-#endif
 
         if (insts[tid].front()->isLoad() ||
             insts[tid].front()->isStore() ) {
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 10c3287f2..bdf5f07aa 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1005,11 +1005,7 @@ InstructionQueue<Impl>::squash(unsigned tid)
 
     // Read instruction sequence number of last instruction out of the
     // time buffer.
-#if ISA_HAS_DELAY_SLOT
-    squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
-#else
     squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
-#endif
 
     // Call doSquash if there are insts in the IQ
     if (count[tid] > 0) {
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 431705e19..6e7180b1e 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -356,47 +356,12 @@ DefaultRename<Impl>::squash(const InstSeqNum &squash_seq_num, unsigned tid)
     }
 
     // Clear the instruction list and skid buffer in case they have any
-    // insts in them. Since we support multiple ISAs, we cant just:
-    // "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is
-    // a possible delay slot inst for different architectures
-    // insts[tid].clear();
-#if ISA_HAS_DELAY_SLOT
-    DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until "
-            "[sn:%i].\n",tid, squash_seq_num);
-    ListIt ilist_it = insts[tid].begin();
-    while (ilist_it != insts[tid].end()) {
-        if ((*ilist_it)->seqNum > squash_seq_num) {
-            (*ilist_it)->setSquashed();
-            DPRINTF(Rename, "Squashing incoming decode instruction, "
-                    "[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC);
-        }
-        ilist_it++;
-    }
-#else
+    // insts in them.
     insts[tid].clear();
-#endif
 
     // Clear the skid buffer in case it has any data in it.
-    // See comments above.
-    //     skidBuffer[tid].clear();
-#if ISA_HAS_DELAY_SLOT
-    DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions "
-            "until [sn:%i].\n", tid, squash_seq_num);
-    ListIt slist_it = skidBuffer[tid].begin();
-    while (slist_it != skidBuffer[tid].end()) {
-        if ((*slist_it)->seqNum > squash_seq_num) {
-            (*slist_it)->setSquashed();
-            DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]"
-                    "PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC);
-        }
-        slist_it++;
-    }
-    resumeUnblocking = (skidBuffer[tid].size() != 0);
-    DPRINTF(Rename, "Resume unblocking set to %s\n",
-            resumeUnblocking ? "true" : "false");
-#else
     skidBuffer[tid].clear();
-#endif
+
     doSquash(squash_seq_num, tid);
 }
 
@@ -776,10 +741,8 @@ DefaultRename<Impl>::sortInsts()
 {
     int insts_from_decode = fromDecode->size;
 #ifdef DEBUG
-#if !ISA_HAS_DELAY_SLOT
     for (int i=0; i < numThreads; i++)
         assert(insts[i].empty());
-#endif
 #endif
     for (int i = 0; i < insts_from_decode; ++i) {
         DynInstPtr inst = fromDecode->insts[i];
@@ -1248,13 +1211,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
         DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from "
                 "commit.\n", tid);
 
-#if ISA_HAS_DELAY_SLOT
-        InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
-#else
-        InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum;
-#endif
-
-        squash(squashed_seq_num, tid);
+        squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
 
         return true;
     }
-- 
cgit v1.2.3


From e9c6012acf729ef55b37dda76e011b5a284b6988 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Fri, 13 Apr 2007 14:00:42 +0000
Subject: Adjust references to reflect differences without special delay slot
 handling. Performance actually went up slightly.

--HG--
extra : convert_revision : 504f6185ddc89881aa41deb7fd934da8038d1ed2
---
 .../ref/sparc/linux/o3-timing/m5stats.txt          | 263 ++++++++++-----------
 .../02.insttest/ref/sparc/linux/o3-timing/stderr   |   1 -
 .../02.insttest/ref/sparc/linux/o3-timing/stdout   |   8 +-
 3 files changed, 135 insertions(+), 137 deletions(-)

diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt
index 7c0d31494..4c5655a33 100644
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt
@@ -1,17 +1,17 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                         2990                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      7055                       # Number of BTB lookups
+global.BPredUnit.BTBHits                         3021                       # Number of BTB hits
+global.BPredUnit.BTBLookups                      7086                       # Number of BTB lookups
 global.BPredUnit.RASInCorrect                       0                       # Number of incorrect RAS predictions.
 global.BPredUnit.condIncorrect                   2077                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   7846                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         7846                       # Number of BP lookups
+global.BPredUnit.condPredicted                   7877                       # Number of conditional branches predicted
+global.BPredUnit.lookups                         7877                       # Number of BP lookups
 global.BPredUnit.usedRAS                            0                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  15119                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 154868                       # Number of bytes of host memory used
-host_seconds                                     0.73                       # Real time elapsed on the host
-host_tick_rate                                1956796                       # Simulator tick rate (ticks/s)
+host_inst_rate                                   4388                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 179936                       # Number of bytes of host memory used
+host_seconds                                     2.50                       # Real time elapsed on the host
+host_tick_rate                                 568121                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                 12                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                 0                       # Number of conflicting stores.
 memdepunit.memDep.insertedLoads                  3250                       # Number of loads inserted to the mem dependence unit.
@@ -19,22 +19,22 @@ memdepunit.memDep.insertedStores                 2817                       # Nu
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                       10976                       # Number of instructions simulated
 sim_seconds                                  0.000001                       # Number of seconds simulated
-sim_ticks                                     1421211                       # Number of ticks simulated
+sim_ticks                                     1421207                       # Number of ticks simulated
 system.cpu.commit.COM:branches                   2152                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events               172                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events               225                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples       221349                      
+system.cpu.commit.COM:committed_per_cycle.samples       220766                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0       215844   9751.30%           
-                               1         2970    134.18%           
-                               2         1290     58.28%           
-                               3          631     28.51%           
-                               4          208      9.40%           
-                               5           90      4.07%           
-                               6          133      6.01%           
+                               0       215368   9755.49%           
+                               1         2915    132.04%           
+                               2         1196     54.18%           
+                               3          673     30.48%           
+                               4          208      9.42%           
+                               5           79      3.58%           
+                               6           91      4.12%           
                                7           11      0.50%           
-                               8          172      7.77%           
+                               8          225     10.19%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -49,65 +49,65 @@ system.cpu.commit.commitNonSpecStalls             327                       # Th
 system.cpu.commit.commitSquashedInsts           14263                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                       10976                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                 10976                       # Number of Instructions Simulated
-system.cpu.cpi                             129.483509                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                       129.483509                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               2737                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  6585.044776                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency  6511.939394                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   2603                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         882396                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.048959                       # miss rate for ReadReq accesses
+system.cpu.cpi                             129.483145                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                       129.483145                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               2738                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  6586.074627                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  6513.166667                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   2604                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency         882534                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.048941                       # miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_misses                  134                       # number of ReadReq misses
 system.cpu.dcache.ReadReq_mshr_hits                68                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       429788                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.024114                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_latency       429869                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.024105                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses              66                       # number of ReadReq MSHR misses
 system.cpu.dcache.SwapReq_accesses                  6                       # number of SwapReq accesses(hits+misses)
 system.cpu.dcache.SwapReq_hits                      6                       # number of SwapReq hits
 system.cpu.dcache.WriteReq_accesses              1292                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  7960.583924                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency  7136.918605                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency  7962.583924                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  7138.593023                       # average WriteReq mshr miss latency
 system.cpu.dcache.WriteReq_hits                   869                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       3367327                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency       3368173                       # number of WriteReq miss cycles
 system.cpu.dcache.WriteReq_miss_rate         0.327399                       # miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_misses                 423                       # number of WriteReq misses
 system.cpu.dcache.WriteReq_mshr_hits              337                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency       613775                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency       613919                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.066563                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             86                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  22.881579                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  22.888158                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                4029                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  7629.664273                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  6865.546053                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    3472                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         4249723                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.138248                       # miss rate for demand accesses
+system.cpu.dcache.demand_accesses                4030                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  7631.430880                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  6867.026316                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                    3473                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         4250707                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.138213                       # miss rate for demand accesses
 system.cpu.dcache.demand_misses                   557                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                405                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      1043563                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.037726                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_latency      1043788                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.037717                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses              152                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               4029                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  7629.664273                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  6865.546053                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses               4030                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  7631.430880                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  6867.026316                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   3472                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        4249723                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.138248                       # miss rate for overall accesses
+system.cpu.dcache.overall_hits                   3473                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        4250707                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.138213                       # miss rate for overall accesses
 system.cpu.dcache.overall_misses                  557                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits               405                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      1043563                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.037726                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_latency      1043788                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.037717                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses             152                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -123,50 +123,50 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                    152                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 90.938737                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     3478                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                 90.938565                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     3479                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles         192719                       # Number of cycles decode is blocked
-system.cpu.decode.DECODE:DecodedInsts           39774                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles             20128                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               8238                       # Number of cycles decode is running
+system.cpu.decode.DECODE:BlockedCycles         192302                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:DecodedInsts           39763                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles             19973                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles               8441                       # Number of cycles decode is running
 system.cpu.decode.DECODE:SquashCycles            3162                       # Number of cycles decode is squashing
-system.cpu.decode.DECODE:UnblockCycles            264                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        7846                       # Number of branches that fetch encountered
+system.cpu.decode.DECODE:UnblockCycles             50                       # Number of cycles decode is unblocking
+system.cpu.fetch.Branches                        7877                       # Number of branches that fetch encountered
 system.cpu.fetch.CacheLines                      5085                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                         14399                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.Cycles                         14430                       # Number of cycles fetch has run and was not squashing or blocked
 system.cpu.fetch.IcacheSquashes                   745                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          43304                       # Number of instructions fetch has processed
+system.cpu.fetch.Insts                          43366                       # Number of instructions fetch has processed
 system.cpu.fetch.SquashCycles                    2134                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.034947                       # Number of branch fetches per cycle
+system.cpu.fetch.branchRate                  0.035176                       # Number of branch fetches per cycle
 system.cpu.fetch.icacheStallCycles               5085                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches               2990                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        0.192881                       # Number of inst fetches per cycle
+system.cpu.fetch.predictedBranches               3021                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        0.193660                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples              224511                      
+system.cpu.fetch.rateDist.samples              223928                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0       215198   9585.19%           
-                               1         2258    100.57%           
-                               2          627     27.93%           
-                               3          958     42.67%           
-                               4          553     24.63%           
-                               5          816     36.35%           
-                               6          951     42.36%           
-                               7          280     12.47%           
-                               8         2870    127.83%           
+                               0       214584   9582.72%           
+                               1         2258    100.84%           
+                               2          658     29.38%           
+                               3          958     42.78%           
+                               4          553     24.70%           
+                               5          816     36.44%           
+                               6          951     42.47%           
+                               7          280     12.50%           
+                               8         2870    128.17%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
 system.cpu.icache.ReadReq_accesses               5085                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5148.266776                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4502.972752                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_miss_latency  5150.152209                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4503.673025                       # average ReadReq mshr miss latency
 system.cpu.icache.ReadReq_hits                   4474                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        3145591                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency        3146743                       # number of ReadReq miss cycles
 system.cpu.icache.ReadReq_miss_rate          0.120157                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                  611                       # number of ReadReq misses
 system.cpu.icache.ReadReq_mshr_hits               244                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      1652591                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency      1652848                       # number of ReadReq MSHR miss cycles
 system.cpu.icache.ReadReq_mshr_miss_rate     0.072173                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             367                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@@ -178,29 +178,29 @@ system.cpu.icache.blocked_cycles_no_mshrs            0                       # n
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
 system.cpu.icache.demand_accesses                5085                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5148.266776                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4502.972752                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_miss_latency  5150.152209                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4503.673025                       # average overall mshr miss latency
 system.cpu.icache.demand_hits                    4474                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         3145591                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         3146743                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_rate           0.120157                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                   611                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                244                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      1652591                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency      1652848                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_rate      0.072173                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              367                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.icache.overall_accesses               5085                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5148.266776                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4502.972752                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_miss_latency  5150.152209                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4503.673025                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_hits                   4474                       # number of overall hits
-system.cpu.icache.overall_miss_latency        3145591                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        3146743                       # number of overall miss cycles
 system.cpu.icache.overall_miss_rate          0.120157                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                  611                       # number of overall misses
 system.cpu.icache.overall_mshr_hits               244                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      1652591                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency      1652848                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_rate     0.072173                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             367                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -217,35 +217,35 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      1                       # number of replacements
 system.cpu.icache.sampled_refs                    363                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                172.869174                       # Cycle average of tags in use
+system.cpu.icache.tagsinuse                172.868641                       # Cycle average of tags in use
 system.cpu.icache.total_refs                     4474                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                         1196701                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     3576                       # Number of branches executed
+system.cpu.idleCycles                         1197280                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                     3577                       # Number of branches executed
 system.cpu.iew.EXEC:nop                             0                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.092548                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         5257                       # number of memory reference insts executed
+system.cpu.iew.EXEC:rate                     0.092802                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         5258                       # number of memory reference insts executed
 system.cpu.iew.EXEC:stores                       2386                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
 system.cpu.iew.WB:consumers                      9737                       # num instructions consuming a value
-system.cpu.iew.WB:count                         19769                       # cumulative count of insts written-back
+system.cpu.iew.WB:count                         19771                       # cumulative count of insts written-back
 system.cpu.iew.WB:fanout                     0.790901                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:producers                      7701                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.088054                       # insts written-back per cycle
-system.cpu.iew.WB:sent                          20061                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                 2593                       # Number of branch mispredicts detected at execute
+system.cpu.iew.WB:rate                       0.088292                       # insts written-back per cycle
+system.cpu.iew.WB:sent                          20063                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                 2594                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                     476                       # Number of cycles IEW is blocking
 system.cpu.iew.iewDispLoadInsts                  3250                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                617                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts              2705                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispSquashedInsts              2694                       # Number of squashed instructions skipped by dispatch
 system.cpu.iew.iewDispStoreInsts                 2817                       # Number of dispatched store instructions
 system.cpu.iew.iewDispatchedInsts               25240                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  2871                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts              1780                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                 20778                       # Number of executed instructions
+system.cpu.iew.iewExecLoadInsts                  2872                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts              1777                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                 20781                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                      7                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
@@ -262,7 +262,7 @@ system.cpu.iew.lsq.thread.0.rescheduledLoads            0
 system.cpu.iew.lsq.thread.0.squashedLoads         1788                       # Number of loads squashed
 system.cpu.iew.lsq.thread.0.squashedStores         1519                       # Number of stores squashed
 system.cpu.iew.memOrderViolationEvents             54                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          962                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedNotTakenIncorrect          963                       # Number of branches that were predicted not taken incorrectly
 system.cpu.iew.predictedTakenIncorrect           1631                       # Number of branches that were predicted taken incorrectly
 system.cpu.ipc                               0.007723                       # IPC: Instructions Per Cycle
 system.cpu.ipc_total                         0.007723                       # IPC: Total IPC of All Threads
@@ -302,21 +302,21 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples       224511                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples       223928                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0       215315   9590.40%           
-                               1         4124    183.69%           
-                               2         1297     57.77%           
-                               3         1306     58.17%           
-                               4         1190     53.00%           
-                               5          707     31.49%           
-                               6          433     19.29%           
-                               7           83      3.70%           
-                               8           56      2.49%           
+                               0       214838   9594.07%           
+                               1         3976    177.56%           
+                               2         1244     55.55%           
+                               3         1359     60.69%           
+                               4         1316     58.77%           
+                               5          612     27.33%           
+                               6          444     19.83%           
+                               7           83      3.71%           
+                               8           56      2.50%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.100476                       # Inst issue rate
+system.cpu.iq.ISSUE:rate                     0.100738                       # Inst issue rate
 system.cpu.iq.iqInstsAdded                      24623                       # Number of instructions added to the IQ (excludes non-spec)
 system.cpu.iq.iqInstsIssued                     22558                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                 617                       # Number of non-speculative instructions added to the IQ
@@ -325,12 +325,12 @@ system.cpu.iq.iqSquashedInstsIssued               174                       # Nu
 system.cpu.iq.iqSquashedNonSpecRemoved            290                       # Number of squashed non-spec instructions that were removed
 system.cpu.iq.iqSquashedOperandsExamined         5834                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses               513                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4754.779727                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2343.506823                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       2439202                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_avg_miss_latency  4755.715400                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2343.752437                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency       2439682                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses                 513                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      1202219                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency      1202345                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_mshr_misses            513                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@@ -342,29 +342,29 @@ system.cpu.l2cache.blocked_cycles_no_mshrs            0                       #
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                513                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4754.779727                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2343.506823                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_miss_latency  4755.715400                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2343.752437                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        2439202                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        2439682                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  513                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      1202219                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency      1202345                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses             513                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               513                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4754.779727                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2343.506823                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_miss_latency  4755.715400                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2343.752437                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       2439202                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       2439682                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 513                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      1202219                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency      1202345                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses            513                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -381,28 +381,27 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.sampled_refs                   512                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               262.946375                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               262.945674                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                           224511                       # number of cpu cycles simulated
+system.cpu.numCycles                           223928                       # number of cpu cycles simulated
 system.cpu.rename.RENAME:BlockCycles              960                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           9868                       # Number of HB maps that are committed
 system.cpu.rename.RENAME:IQFullEvents               2                       # Number of times rename has blocked due to IQ full
-system.cpu.rename.RENAME:IdleCycles             20098                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents            481                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:IdleCycles             21302                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents            411                       # Number of times rename has blocked due to LSQ full
 system.cpu.rename.RENAME:ROBFullEvents              4                       # Number of times rename has blocked due to ROB full
 system.cpu.rename.RENAME:RenameLookups          46931                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           31260                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedInsts           31249                       # Number of instructions processed by rename
 system.cpu.rename.RENAME:RenamedOperands        25831                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               7921                       # Number of cycles rename is running
+system.cpu.rename.RENAME:RunCycles               7136                       # Number of cycles rename is running
 system.cpu.rename.RENAME:SquashCycles            3162                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:SquashedInsts           8042                       # Number of squashed instructions processed by rename
-system.cpu.rename.RENAME:UnblockCycles           1212                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UnblockCycles            614                       # Number of cycles rename is unblocking
 system.cpu.rename.RENAME:UndoneMaps             15963                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles       190573                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializeStallCycles       190754                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts          638                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts               5594                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:skidInsts               5529                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts          629                       # count of temporary serializing insts renamed
 system.cpu.timesIdled                             289                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls               8                       # Number of system calls
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr
index 48affb0e2..7873672f2 100644
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr
@@ -1,4 +1,3 @@
 warn: More than two loadable segments in ELF object.
 warn: Ignoring segment @ 0x0 length 0x0.
-0: system.remote_gdb.listener: listening for remote gdb on port 7003
 warn: Entering event queue @ 0.  Starting simulation...
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout
index 6cba2ba7e..38b0c1787 100644
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout
@@ -16,9 +16,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Apr  9 2007 03:06:26
-M5 started Mon Apr  9 03:06:54 2007
-M5 executing on zizzer.eecs.umich.edu
+M5 compiled Apr 13 2007 13:56:34
+M5 started Fri Apr 13 13:56:35 2007
+M5 executing on ahchoo.blinky.homelinux.org
 command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/o3-timing tests/run.py quick/02.insttest/sparc/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 1421211 because target called exit()
+Exiting @ tick 1421207 because target called exit()
-- 
cgit v1.2.3


From 3140dd88bc588ea51aadeb2dd58d33cc9a40883a Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sat, 14 Apr 2007 17:07:24 +0000
Subject: Make the fsr a serializing register. Other control registers probably
 need this as well.

--HG--
extra : convert_revision : edd3f9a83cc2722b6e0eff0eff4a8e034b0f6ec6
---
 src/arch/sparc/isa/operands.isa | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/sparc/isa/operands.isa b/src/arch/sparc/isa/operands.isa
index 58d616a7a..110b37d15 100644
--- a/src/arch/sparc/isa/operands.isa
+++ b/src/arch/sparc/isa/operands.isa
@@ -187,7 +187,7 @@ def operands {{
     'Hver':		('ControlReg', 'udw', 'MISCREG_HVER', None, 74),
     'StrandStsReg':	('ControlReg', 'udw', 'MISCREG_STRAND_STS_REG', None, 75),
 
-    'Fsr':		('ControlReg', 'udw', 'MISCREG_FSR', None, 80),
+    'Fsr':		('ControlReg', 'udw', 'MISCREG_FSR', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 80),
     # Mem gets a large number so it's always last
     'Mem': 		('Mem', 'udw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100)
 
-- 
cgit v1.2.3


From 5a3dcc172a9fd661330909815b163eb6f4d6a2d8 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sat, 14 Apr 2007 17:08:24 +0000
Subject: Make register indexes larger so they can actually hold all the legal
 values. Oops!

--HG--
extra : convert_revision : 7689b2e1f7468e4acb8be0f242f74002c79e7960
---
 src/arch/sparc/types.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/sparc/types.hh b/src/arch/sparc/types.hh
index 15386adca..8bd50b7e8 100644
--- a/src/arch/sparc/types.hh
+++ b/src/arch/sparc/types.hh
@@ -59,7 +59,7 @@ namespace SparcISA
 
     typedef int RegContextVal;
 
-    typedef uint8_t RegIndex;
+    typedef uint16_t RegIndex;
 }
 
 #endif
-- 
cgit v1.2.3


From c3081d9c1c36e1a08c173048783d191fa19463de Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sat, 14 Apr 2007 17:13:18 +0000
Subject: Add support for microcode and pull out the special branch delay slot
 handling. Branch delay slots need to be squash on a mispredict as well
 because the nnpc they saw was incorrect.

--HG--
extra : convert_revision : 8b9c603616bcad254417a7a3fa3edfb4c8728719
---
 src/cpu/base_dyn_inst.hh          | 57 +++++++++++++++++++++---
 src/cpu/base_dyn_inst_impl.hh     | 55 +++++++++++++++++++++--
 src/cpu/o3/comm.hh                | 21 +++++----
 src/cpu/o3/commit.hh              | 34 ++++++++++++---
 src/cpu/o3/commit_impl.hh         | 11 ++---
 src/cpu/o3/cpu.cc                 | 30 ++++++++++++-
 src/cpu/o3/cpu.hh                 | 22 +++++++---
 src/cpu/o3/decode_impl.hh         |  4 +-
 src/cpu/o3/fetch.hh               | 17 ++++----
 src/cpu/o3/fetch_impl.hh          | 91 +++++++++++++++++++++++++--------------
 src/cpu/o3/iew_impl.hh            |  1 +
 src/cpu/o3/rename_impl.hh         |  6 ++-
 src/cpu/o3/sparc/dyn_inst.hh      | 10 ++++-
 src/cpu/o3/sparc/dyn_inst_impl.hh | 17 +++++++-
 14 files changed, 291 insertions(+), 85 deletions(-)

diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index b02038b3e..1311e5cf2 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -209,6 +209,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** PC of this instruction. */
     Addr PC;
 
+    /** Micro PC of this instruction. */
+    Addr microPC;
+
   protected:
     /** Next non-speculative PC.  It is not filled in at fetch, but rather
      *  once the target of the branch is truly known (either decode or
@@ -219,12 +222,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Next non-speculative NPC. Target PC for Mips or Sparc. */
     Addr nextNPC;
 
+    /** Next non-speculative micro PC. */
+    Addr nextMicroPC;
+
     /** Predicted next PC. */
     Addr predPC;
 
     /** Predicted next NPC. */
     Addr predNPC;
 
+    /** Predicted next microPC */
+    Addr predMicroPC;
+
     /** If this is a branch that was predicted taken */
     bool predTaken;
 
@@ -340,6 +349,17 @@ class BaseDynInst : public FastAlloc, public RefCounted
     {
         _flatDestRegIdx[idx] = flattened_dest;
     }
+    /** BaseDynInst constructor given a binary instruction.
+     *  @param staticInst A StaticInstPtr to the underlying instruction.
+     *  @param PC The PC of the instruction.
+     *  @param pred_PC The predicted next PC.
+     *  @param pred_NPC The predicted next NPC.
+     *  @param seq_num The sequence number of the instruction.
+     *  @param cpu Pointer to the instruction's CPU.
+     */
+    BaseDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
+            Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC,
+            InstSeqNum seq_num, ImplCPU *cpu);
 
     /** BaseDynInst constructor given a binary instruction.
      *  @param inst The binary instruction.
@@ -349,8 +369,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
      *  @param seq_num The sequence number of the instruction.
      *  @param cpu Pointer to the instruction's CPU.
      */
-    BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
-            Addr pred_PC, Addr pred_NPC,
+    BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
+            Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC,
             InstSeqNum seq_num, ImplCPU *cpu);
 
     /** BaseDynInst constructor given a StaticInst pointer.
@@ -402,11 +422,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
 #endif
     }
 
+    Addr readNextMicroPC()
+    {
+        return nextMicroPC;
+    }
+
     /** Set the predicted target of this current instruction. */
-    void setPredTarg(Addr predicted_PC, Addr predicted_NPC)
+    void setPredTarg(Addr predicted_PC, Addr predicted_NPC,
+            Addr predicted_MicroPC)
     {
         predPC = predicted_PC;
         predNPC = predicted_NPC;
+        predMicroPC = predicted_MicroPC;
     }
 
     /** Returns the predicted PC immediately after the branch. */
@@ -415,6 +442,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Returns the predicted PC two instructions after the branch */
     Addr readPredNPC() { return predNPC; }
 
+    /** Returns the predicted micro PC after the branch */
+    Addr readPredMicroPC() { return predMicroPC; }
+
     /** Returns whether the instruction was predicted taken or not. */
     bool readPredTaken()
     {
@@ -430,7 +460,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
     bool mispredicted()
     {
         return readPredPC() != readNextPC() ||
-            readPredNPC() != readNextNPC();
+            readPredNPC() != readNextNPC() ||
+            readPredMicroPC() != readNextMicroPC();
     }
 
     //
@@ -467,6 +498,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
     bool isQuiesce() const { return staticInst->isQuiesce(); }
     bool isIprAccess() const { return staticInst->isIprAccess(); }
     bool isUnverifiable() const { return staticInst->isUnverifiable(); }
+    bool isMacroOp() const { return staticInst->isMacroOp(); }
+    bool isMicroOp() const { return staticInst->isMicroOp(); }
+    bool isDelayedCommit() const { return staticInst->isDelayedCommit(); }
+    bool isLastMicroOp() const { return staticInst->isLastMicroOp(); }
+    bool isFirstMicroOp() const { return staticInst->isFirstMicroOp(); }
+    bool isMicroBranch() const { return staticInst->isMicroBranch(); }
 
     /** Temporarily sets this instruction as a serialize before instruction. */
     void setSerializeBefore() { status.set(SerializeBefore); }
@@ -700,20 +737,28 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Read the PC of this instruction. */
     const Addr readPC() const { return PC; }
 
+    /**Read the micro PC of this instruction. */
+    const Addr readMicroPC() const { return microPC; }
+
     /** Set the next PC of this instruction (its actual target). */
-    void setNextPC(uint64_t val)
+    void setNextPC(Addr val)
     {
         nextPC = val;
     }
 
     /** Set the next NPC of this instruction (the target in Mips or Sparc).*/
-    void setNextNPC(uint64_t val)
+    void setNextNPC(Addr val)
     {
 #if ISA_HAS_DELAY_SLOT
         nextNPC = val;
 #endif
     }
 
+    void setNextMicroPC(Addr val)
+    {
+        nextMicroPC = val;
+    }
+
     /** Sets the ASID. */
     void setASID(short addr_space_id) { asid = addr_space_id; }
 
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index a1c866336..acf8af9cf 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -62,19 +62,66 @@ my_hash_t thishash;
 #endif
 
 template <class Impl>
-BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst machInst,
+BaseDynInst<Impl>::BaseDynInst(StaticInstPtr _staticInst,
                                Addr inst_PC, Addr inst_NPC,
+                               Addr inst_MicroPC,
                                Addr pred_PC, Addr pred_NPC,
+                               Addr pred_MicroPC,
                                InstSeqNum seq_num, ImplCPU *cpu)
-  : staticInst(machInst), traceData(NULL), cpu(cpu)
+  : staticInst(_staticInst), traceData(NULL), cpu(cpu)
 {
     seqNum = seq_num;
 
+    bool nextIsMicro =
+        staticInst->isMicroOp() && !staticInst->isLastMicroOp();
+
     PC = inst_PC;
-    nextPC = inst_NPC;
-    nextNPC = nextPC + sizeof(TheISA::MachInst);
+    microPC = inst_MicroPC;
+    if (nextIsMicro) {
+        nextPC = inst_PC;
+        nextNPC = inst_NPC;
+        nextMicroPC = microPC + 1;
+    } else {
+        nextPC = inst_NPC;
+        nextNPC = nextPC + sizeof(TheISA::MachInst);
+        nextMicroPC = 0;
+    }
+    predPC = pred_PC;
+    predNPC = pred_NPC;
+    predMicroPC = pred_MicroPC;
+    predTaken = false;
+
+    initVars();
+}
+
+template <class Impl>
+BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst inst,
+                               Addr inst_PC, Addr inst_NPC,
+                               Addr inst_MicroPC,
+                               Addr pred_PC, Addr pred_NPC,
+                               Addr pred_MicroPC,
+                               InstSeqNum seq_num, ImplCPU *cpu)
+  : staticInst(inst), traceData(NULL), cpu(cpu)
+{
+    seqNum = seq_num;
+
+    bool nextIsMicro =
+        staticInst->isMicroOp() && !staticInst->isLastMicroOp();
+
+    PC = inst_PC;
+    microPC = inst_MicroPC;
+    if (nextIsMicro) {
+        nextPC = inst_PC;
+        nextNPC = inst_NPC;
+        nextMicroPC = microPC + 1;
+    } else {
+        nextPC = inst_NPC;
+        nextNPC = nextPC + sizeof(TheISA::MachInst);
+        nextMicroPC = 0;
+    }
     predPC = pred_PC;
     predNPC = pred_NPC;
+    predMicroPC = pred_MicroPC;
     predTaken = false;
 
     initVars();
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index 8d7bb95f4..fb772060b 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -87,9 +87,10 @@ struct DefaultIEWDefaultCommit {
     bool squash[Impl::MaxThreads];
     bool branchMispredict[Impl::MaxThreads];
     bool branchTaken[Impl::MaxThreads];
-    uint64_t mispredPC[Impl::MaxThreads];
-    uint64_t nextPC[Impl::MaxThreads];
-    uint64_t nextNPC[Impl::MaxThreads];
+    Addr mispredPC[Impl::MaxThreads];
+    Addr nextPC[Impl::MaxThreads];
+    Addr nextNPC[Impl::MaxThreads];
+    Addr nextMicroPC[Impl::MaxThreads];
     InstSeqNum squashedSeqNum[Impl::MaxThreads];
 
     bool includeSquashInst[Impl::MaxThreads];
@@ -118,9 +119,10 @@ struct TimeBufStruct {
         // struct as it is used pretty frequently.
         bool branchMispredict;
         bool branchTaken;
-        uint64_t mispredPC;
-        uint64_t nextPC;
-        uint64_t nextNPC;
+        Addr mispredPC;
+        Addr nextPC;
+        Addr nextNPC;
+        Addr nextMicroPC;
 
         unsigned branchCount;
     };
@@ -158,9 +160,10 @@ struct TimeBufStruct {
 
         bool branchMispredict;
         bool branchTaken;
-        uint64_t mispredPC;
-        uint64_t nextPC;
-        uint64_t nextNPC;
+        Addr mispredPC;
+        Addr nextPC;
+        Addr nextNPC;
+        Addr nextMicroPC;
 
         // Represents the instruction that has either been retired or
         // squashed.  Similar to having a single bus that broadcasts the
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index fba618c14..27bdd20c5 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -279,25 +279,37 @@ class DefaultCommit
     /** Returns the PC of the head instruction of the ROB.
      * @todo: Probably remove this function as it returns only thread 0.
      */
-    uint64_t readPC() { return PC[0]; }
+    Addr readPC() { return PC[0]; }
 
     /** Returns the PC of a specific thread. */
-    uint64_t readPC(unsigned tid) { return PC[tid]; }
+    Addr readPC(unsigned tid) { return PC[tid]; }
 
     /** Sets the PC of a specific thread. */
-    void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
+    void setPC(Addr val, unsigned tid) { PC[tid] = val; }
+
+    /** Reads the micro PC of a specific thread. */
+    Addr readMicroPC(unsigned tid) { return microPC[tid]; }
+
+    /** Sets the micro PC of a specific thread */
+    void setMicroPC(Addr val, unsigned tid) { microPC[tid] = val; }
 
     /** Reads the next PC of a specific thread. */
-    uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
+    Addr readNextPC(unsigned tid) { return nextPC[tid]; }
 
     /** Sets the next PC of a specific thread. */
-    void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
+    void setNextPC(Addr val, unsigned tid) { nextPC[tid] = val; }
 
     /** Reads the next NPC of a specific thread. */
-    uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; }
+    Addr readNextNPC(unsigned tid) { return nextNPC[tid]; }
 
     /** Sets the next NPC of a specific thread. */
-    void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; }
+    void setNextNPC(Addr val, unsigned tid) { nextNPC[tid] = val; }
+
+    /** Reads the micro PC of a specific thread. */
+    Addr readNextMicroPC(unsigned tid) { return nextMicroPC[tid]; }
+
+    /** Sets the micro PC of a specific thread */
+    void setNextMicroPC(Addr val, unsigned tid) { nextMicroPC[tid] = val; }
 
   private:
     /** Time buffer interface. */
@@ -402,12 +414,20 @@ class DefaultCommit
      */
     Addr PC[Impl::MaxThreads];
 
+    /** The commit micro PC of each thread.  Refers to the instruction that
+     * is currently being processed/committed.
+     */
+    Addr microPC[Impl::MaxThreads];
+
     /** The next PC of each thread. */
     Addr nextPC[Impl::MaxThreads];
 
     /** The next NPC of each thread. */
     Addr nextNPC[Impl::MaxThreads];
 
+    /** The next micro PC of each thread. */
+    Addr nextMicroPC[Impl::MaxThreads];
+
     /** The sequence number of the youngest valid instruction in the ROB. */
     InstSeqNum youngestSeqNum[Impl::MaxThreads];
 
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 9dd5ed291..fc24d7edc 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -124,7 +124,7 @@ DefaultCommit<Impl>::DefaultCommit(O3CPU *_cpu, Params *params)
         committedStores[i] = false;
         trapSquash[i] = false;
         tcSquash[i] = false;
-        PC[i] = nextPC[i] = nextNPC[i] = 0;
+        microPC[i] = nextMicroPC[i] = PC[i] = nextPC[i] = nextNPC[i] = 0;
     }
 #if FULL_SYSTEM
     interrupt = NoFault;
@@ -508,6 +508,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
 
     toIEW->commitInfo[tid].nextPC = PC[tid];
     toIEW->commitInfo[tid].nextNPC = nextPC[tid];
+    toIEW->commitInfo[tid].nextMicroPC = nextMicroPC[tid];
 }
 
 template <class Impl>
@@ -768,6 +769,7 @@ DefaultCommit<Impl>::commit()
 
             toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
             toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid];
+            toIEW->commitInfo[tid].nextMicroPC = fromIEW->nextMicroPC[tid];
 
             toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
 
@@ -877,6 +879,7 @@ DefaultCommit<Impl>::commitInsts()
             PC[tid] = head_inst->readPC();
             nextPC[tid] = head_inst->readNextPC();
             nextNPC[tid] = head_inst->readNextNPC();
+            nextMicroPC[tid] = head_inst->readNextMicroPC();
 
             // Increment the total number of non-speculative instructions
             // executed.
@@ -905,12 +908,10 @@ DefaultCommit<Impl>::commitInsts()
                 }
 
                 PC[tid] = nextPC[tid];
-#if ISA_HAS_DELAY_SLOT
                 nextPC[tid] = nextNPC[tid];
                 nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst);
-#else
-                nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst);
-#endif
+                microPC[tid] = nextMicroPC[tid];
+                nextMicroPC[tid] = microPC[tid] + 1;
 
 #if FULL_SYSTEM
                 int count = 0;
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index b2b4645d2..59978a065 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -696,7 +696,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
 
     // Squash Throughout Pipeline
     InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
-    fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, tid);
+    fetch.squash(0, sizeof(TheISA::MachInst), 0, squash_seq_num, tid);
     decode.squash(tid);
     rename.squash(squash_seq_num, tid);
     iew.squash(tid);
@@ -1150,6 +1150,20 @@ FullO3CPU<Impl>::setPC(Addr new_PC,unsigned tid)
     commit.setPC(new_PC, tid);
 }
 
+template <class Impl>
+uint64_t
+FullO3CPU<Impl>::readMicroPC(unsigned tid)
+{
+    return commit.readMicroPC(tid);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setMicroPC(Addr new_PC,unsigned tid)
+{
+    commit.setMicroPC(new_PC, tid);
+}
+
 template <class Impl>
 uint64_t
 FullO3CPU<Impl>::readNextPC(unsigned tid)
@@ -1178,6 +1192,20 @@ FullO3CPU<Impl>::setNextNPC(uint64_t val,unsigned tid)
     commit.setNextNPC(val, tid);
 }
 
+template <class Impl>
+uint64_t
+FullO3CPU<Impl>::readNextMicroPC(unsigned tid)
+{
+    return commit.readNextMicroPC(tid);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setNextMicroPC(Addr new_PC,unsigned tid)
+{
+    commit.setNextMicroPC(new_PC, tid);
+}
+
 template <class Impl>
 typename FullO3CPU<Impl>::ListIt
 FullO3CPU<Impl>::addInst(DynInstPtr &inst)
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 4b247e6e3..bff78bf9e 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -433,22 +433,34 @@ class FullO3CPU : public BaseO3CPU
     void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid);
 
     /** Reads the commit PC of a specific thread. */
-    uint64_t readPC(unsigned tid);
+    Addr readPC(unsigned tid);
 
     /** Sets the commit PC of a specific thread. */
     void setPC(Addr new_PC, unsigned tid);
 
+    /** Reads the commit micro PC of a specific thread. */
+    Addr readMicroPC(unsigned tid);
+
+    /** Sets the commmit micro PC of a specific thread. */
+    void setMicroPC(Addr new_microPC, unsigned tid);
+
     /** Reads the next PC of a specific thread. */
-    uint64_t readNextPC(unsigned tid);
+    Addr readNextPC(unsigned tid);
 
     /** Sets the next PC of a specific thread. */
-    void setNextPC(uint64_t val, unsigned tid);
+    void setNextPC(Addr val, unsigned tid);
 
     /** Reads the next NPC of a specific thread. */
-    uint64_t readNextNPC(unsigned tid);
+    Addr readNextNPC(unsigned tid);
 
     /** Sets the next NPC of a specific thread. */
-    void setNextNPC(uint64_t val, unsigned tid);
+    void setNextNPC(Addr val, unsigned tid);
+
+    /** Reads the commit next micro PC of a specific thread. */
+    Addr readNextMicroPC(unsigned tid);
+
+    /** Sets the commit next micro PC of a specific thread. */
+    void setNextMicroPC(Addr val, unsigned tid);
 
     /** Function to add instruction onto the head of the list of the
      *  instructions.  Used when new instructions are fetched.
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index c9d0a1885..ce6738456 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -273,6 +273,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
     ///explicitly for ISAs with delay slots.
     toFetch->decodeInfo[tid].nextNPC =
         inst->branchTarget() + sizeof(TheISA::MachInst);
+    toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC();
 #if ISA_HAS_DELAY_SLOT
     toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
         (inst->readNextPC() + sizeof(TheISA::MachInst));
@@ -735,7 +736,8 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
                 // a check at the end
                 squash(inst, inst->threadNumber);
                 Addr target = inst->branchTarget();
-                inst->setPredTarg(target, target + sizeof(TheISA::MachInst));
+                //The micro pc after an instruction level branch should be 0
+                inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 0);
                 break;
             }
         }
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index bb0057e7c..7645a226c 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -227,7 +227,7 @@ class DefaultFetch
      * @param next_NPC Used for ISAs which use delay slots.
      * @return Whether or not a branch was predicted as taken.
      */
-    bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC);
+    bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC, Addr &next_MicroPC);
 
     /**
      * Fetches the cache line that contains fetch_PC.  Returns any
@@ -242,12 +242,14 @@ class DefaultFetch
     bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid);
 
     /** Squashes a specific thread and resets the PC. */
-    inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid);
+    inline void doSquash(const Addr &new_PC, const Addr &new_NPC,
+                         const Addr &new_MicroPC, unsigned tid);
 
     /** Squashes a specific thread and resets the PC. Also tells the CPU to
      * remove any instructions between fetch and decode that should be sqaushed.
      */
     void squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
+                          const Addr &new_MicroPC,
                           const InstSeqNum &seq_num, unsigned tid);
 
     /** Checks if a thread is stalled. */
@@ -263,6 +265,7 @@ class DefaultFetch
      * squash should be the commit stage.
      */
     void squash(const Addr &new_PC, const Addr &new_NPC,
+                const Addr &new_MicroPC,
                 const InstSeqNum &seq_num, unsigned tid);
 
     /** Ticks the fetch stage, processing all inputs signals and fetching
@@ -346,16 +349,12 @@ class DefaultFetch
     /** Per-thread fetch PC. */
     Addr PC[Impl::MaxThreads];
 
+    /** Per-thread fetch micro PC. */
+    Addr microPC[Impl::MaxThreads];
+
     /** Per-thread next PC. */
     Addr nextPC[Impl::MaxThreads];
 
-    /** Per-thread next Next PC.
-     *  This is not a real register but is used for
-     *  architectures that use a branch-delay slot.
-     *  (such as MIPS or Sparc)
-     */
-    Addr nextNPC[Impl::MaxThreads];
-
     /** Memory request used to access cache. */
     RequestPtr memReq[Impl::MaxThreads];
 
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 25498c7f3..d1f38e38b 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -312,7 +312,7 @@ DefaultFetch<Impl>::initStage()
     for (int tid = 0; tid < numThreads; tid++) {
         PC[tid] = cpu->readPC(tid);
         nextPC[tid] = cpu->readNextPC(tid);
-        nextNPC[tid] = cpu->readNextNPC(tid);
+        microPC[tid] = cpu->readMicroPC(tid);
     }
 
     for (int tid=0; tid < numThreads; tid++) {
@@ -439,11 +439,7 @@ DefaultFetch<Impl>::takeOverFrom()
         stalls[i].commit = 0;
         PC[i] = cpu->readPC(i);
         nextPC[i] = cpu->readNextPC(i);
-#if ISA_HAS_DELAY_SLOT
-        nextNPC[i] = cpu->readNextNPC(i);
-#else
-        nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst);
-#endif
+        microPC[i] = cpu->readMicroPC(i);
         fetchStatus[i] = Running;
     }
     numInst = 0;
@@ -493,7 +489,7 @@ DefaultFetch<Impl>::switchToInactive()
 template <class Impl>
 bool
 DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
-                                          Addr &next_NPC)
+                                          Addr &next_NPC, Addr &next_MicroPC)
 {
     // Do branch prediction check here.
     // A bit of a misnomer...next_PC is actually the current PC until
@@ -501,13 +497,22 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
     bool predict_taken;
 
     if (!inst->isControl()) {
-        next_PC  = next_NPC;
-        next_NPC = next_NPC + instSize;
-        inst->setPredTarg(next_PC, next_NPC);
+        if (inst->isMicroOp() && !inst->isLastMicroOp()) {
+            next_MicroPC++;
+        } else {
+            next_PC  = next_NPC;
+            next_NPC = next_NPC + instSize;
+            next_MicroPC = 0;
+        }
+        inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
         inst->setPredTaken(false);
         return false;
     }
 
+    //Assume for now that all control flow is to a different macroop which
+    //would reset the micro pc to 0.
+    next_MicroPC = 0;
+
     int tid = inst->threadNumber;
     Addr pred_PC = next_PC;
     predict_taken = branchPred.predict(inst, pred_PC, tid);
@@ -534,7 +539,7 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
 #endif
 /*    DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
             tid, next_PC, next_NPC);*/
-    inst->setPredTarg(next_PC, next_NPC);
+    inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
     inst->setPredTaken(predict_taken);
 
     ++fetchedBranches;
@@ -658,14 +663,14 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
 template <class Impl>
 inline void
 DefaultFetch<Impl>::doSquash(const Addr &new_PC,
-        const Addr &new_NPC, unsigned tid)
+        const Addr &new_NPC, const Addr &new_microPC, unsigned tid)
 {
     DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
             tid, new_PC, new_NPC);
 
     PC[tid] = new_PC;
     nextPC[tid] = new_NPC;
-    nextNPC[tid] = new_NPC + instSize;
+    microPC[tid] = new_microPC;
 
     // Clear the icache miss if it's outstanding.
     if (fetchStatus[tid] == IcacheWaitResponse) {
@@ -693,12 +698,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
 template<class Impl>
 void
 DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
-                                     const InstSeqNum &seq_num,
-                                     unsigned tid)
+                                     const Addr &new_MicroPC,
+                                     const InstSeqNum &seq_num, unsigned tid)
 {
     DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
 
-    doSquash(new_PC, new_NPC, tid);
+    doSquash(new_PC, new_NPC, new_MicroPC, tid);
 
     // Tell the CPU to remove any instructions that are in flight between
     // fetch and decode.
@@ -774,11 +779,12 @@ DefaultFetch<Impl>::updateFetchStatus()
 template <class Impl>
 void
 DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
+                           const Addr &new_MicroPC,
                            const InstSeqNum &seq_num, unsigned tid)
 {
     DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
 
-    doSquash(new_PC, new_NPC, tid);
+    doSquash(new_PC, new_NPC, new_MicroPC, tid);
 
     // Tell the CPU to remove any instructions that are not in the ROB.
     cpu->removeInstsNotInROB(tid);
@@ -893,6 +899,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
         // In any case, squash.
         squash(fromCommit->commitInfo[tid].nextPC,
                fromCommit->commitInfo[tid].nextNPC,
+               fromCommit->commitInfo[tid].nextMicroPC,
                fromCommit->commitInfo[tid].doneSeqNum,
                tid);
 
@@ -948,6 +955,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
             // Squash unless we're already squashing
             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
                              fromDecode->decodeInfo[tid].nextNPC,
+                             fromDecode->decodeInfo[tid].nextMicroPC,
                              fromDecode->decodeInfo[tid].doneSeqNum,
                              tid);
 
@@ -1002,9 +1010,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
 
     // The current PC.
-    Addr &fetch_PC = PC[tid];
-
-    Addr &fetch_NPC = nextPC[tid];
+    Addr fetch_PC = PC[tid];
+    Addr fetch_NPC = nextPC[tid];
+    Addr fetch_MicroPC = microPC[tid];
 
     // Fault code for memory access.
     Fault fault = NoFault;
@@ -1063,6 +1071,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 
     Addr next_PC = fetch_PC;
     Addr next_NPC = fetch_NPC;
+    Addr next_MicroPC = fetch_MicroPC;
 
     InstSeqNum inst_seq;
     MachInst inst;
@@ -1070,6 +1079,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     // @todo: Fix this hack.
     unsigned offset = (fetch_PC & cacheBlkMask) & ~3;
 
+    StaticInstPtr staticInst = NULL;
+    StaticInstPtr macroop = NULL;
+
     if (fault == NoFault) {
         // If the read of the first instruction was successful, then grab the
         // instructions from the rest of the cache line and put them into the
@@ -1104,19 +1116,29 @@ DefaultFetch<Impl>::fetch(bool &status_change)
             // Make sure this is a valid index.
             assert(offset <= cacheBlkSize - instSize);
 
-            // Get the instruction from the array of the cache line.
-            inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
-                        (&cacheData[tid][offset]));
+            if (!macroop) {
+                // Get the instruction from the array of the cache line.
+                inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
+                            (&cacheData[tid][offset]));
 
-            predecoder.setTC(cpu->thread[tid]->getTC());
-            predecoder.moreBytes(fetch_PC, 0, inst);
+                predecoder.setTC(cpu->thread[tid]->getTC());
+                predecoder.moreBytes(fetch_PC, 0, inst);
 
-            ext_inst = predecoder.getExtMachInst();
+                ext_inst = predecoder.getExtMachInst();
+                staticInst = StaticInstPtr(ext_inst);
+                if (staticInst->isMacroOp())
+                    macroop = staticInst;
+            }
+            if (macroop) {
+                staticInst = macroop->fetchMicroOp(fetch_MicroPC);
+                if (staticInst->isLastMicroOp())
+                    macroop = NULL;
+            }
 
             // Create a new DynInst from the instruction fetched.
-            DynInstPtr instruction = new DynInst(ext_inst,
-                                                 fetch_PC, fetch_NPC,
-                                                 next_PC, next_NPC,
+            DynInstPtr instruction = new DynInst(staticInst,
+                                                 fetch_PC, fetch_NPC, fetch_MicroPC,
+                                                 next_PC, next_NPC, next_MicroPC,
                                                  inst_seq, cpu);
             instruction->setTid(tid);
 
@@ -1139,7 +1161,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                                      instruction->readPC());
 
             ///FIXME This needs to be more robust in dealing with delay slots
-            lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
+            lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC);
             predicted_branch |= (next_PC != fetch_NPC);
 
             // Add instruction to the CPU's list of instructions.
@@ -1157,6 +1179,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
             // Move to the next instruction, unless we have a branch.
             fetch_PC = next_PC;
             fetch_NPC = next_NPC;
+            fetch_MicroPC = next_MicroPC;
 
             if (instruction->isQuiesce()) {
                 DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
@@ -1167,7 +1190,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                 break;
             }
 
-            offset += instSize;
+            if (!macroop)
+                offset += instSize;
         }
 
         if (offset >= cacheBlkSize) {
@@ -1191,7 +1215,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     if (fault == NoFault) {
         PC[tid] = next_PC;
         nextPC[tid] = next_NPC;
-        nextNPC[tid] = next_NPC + instSize;
+        microPC[tid] = next_MicroPC;
         DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
     } else {
         // We shouldn't be in an icache miss and also have a fault (an ITB
@@ -1210,8 +1234,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         // We will use a nop in order to carry the fault.
         ext_inst = TheISA::NoopMachInst;
 
+        StaticInstPtr staticInst = new StaticInst(ext_inst);
         // Create a new DynInst from the dummy nop.
-        DynInstPtr instruction = new DynInst(ext_inst,
+        DynInstPtr instruction = new DynInst(staticInst,
                                              fetch_PC, fetch_NPC,
                                              next_PC, next_NPC,
                                              inst_seq, cpu);
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 050785818..399c44909 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -454,6 +454,7 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
 #endif
     toCommit->nextPC[tid] = inst->readNextPC();
     toCommit->nextNPC[tid] = inst->readNextNPC();
+    toCommit->nextMicroPC[tid] = inst->readNextMicroPC();
 
     toCommit->includeSquashInst[tid] = false;
 
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 6e7180b1e..d78de2c87 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -963,6 +963,7 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
             // Floating point and Miscellaneous registers need their indexes
             // adjusted to account for the expanded number of flattened int regs.
             flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
+            DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", src_reg, flat_src_reg);
         }
 
         inst->flattenSrcReg(src_idx, flat_src_reg);
@@ -979,9 +980,11 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
 
         // See if the register is ready or not.
         if (scoreboard->getReg(renamed_reg) == true) {
-            DPRINTF(Rename, "[tid:%u]: Register is ready.\n", tid);
+            DPRINTF(Rename, "[tid:%u]: Register %d is ready.\n", tid, renamed_reg);
 
             inst->markSrcRegReady(src_idx);
+        } else {
+            DPRINTF(Rename, "[tid:%u]: Register %d is not ready.\n", tid, renamed_reg);
         }
 
         ++renameRenameLookups;
@@ -1008,6 +1011,7 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
             // Floating point and Miscellaneous registers need their indexes
             // adjusted to account for the expanded number of flattened int regs.
             flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
+            DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", dest_reg, flat_dest_reg);
         }
 
         inst->flattenDestReg(dest_idx, flat_dest_reg);
diff --git a/src/cpu/o3/sparc/dyn_inst.hh b/src/cpu/o3/sparc/dyn_inst.hh
index 72242b161..a7ab6cd79 100644
--- a/src/cpu/o3/sparc/dyn_inst.hh
+++ b/src/cpu/o3/sparc/dyn_inst.hh
@@ -56,8 +56,14 @@ class SparcDynInst : public BaseDynInst<Impl>
 
   public:
     /** BaseDynInst constructor given a binary instruction. */
-    SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
-            Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu);
+    SparcDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
+            Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+            InstSeqNum seq_num, O3CPU *cpu);
+
+    /** BaseDynInst constructor given a binary instruction. */
+    SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
+            Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+            InstSeqNum seq_num, O3CPU *cpu);
 
     /** BaseDynInst constructor given a static inst pointer. */
     SparcDynInst(StaticInstPtr &_staticInst);
diff --git a/src/cpu/o3/sparc/dyn_inst_impl.hh b/src/cpu/o3/sparc/dyn_inst_impl.hh
index c4d30b6f4..6bfe97717 100644
--- a/src/cpu/o3/sparc/dyn_inst_impl.hh
+++ b/src/cpu/o3/sparc/dyn_inst_impl.hh
@@ -30,11 +30,24 @@
 
 #include "cpu/o3/sparc/dyn_inst.hh"
 
+template <class Impl>
+SparcDynInst<Impl>::SparcDynInst(StaticInstPtr staticInst,
+        Addr PC, Addr NPC, Addr microPC,
+        Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+        InstSeqNum seq_num, O3CPU *cpu)
+    : BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
+            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
+{
+    initVars();
+}
+
 template <class Impl>
 SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
-        Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC,
+        Addr PC, Addr NPC, Addr microPC,
+        Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
         InstSeqNum seq_num, O3CPU *cpu)
-    : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
+    : BaseDynInst<Impl>(inst, PC, NPC, microPC,
+            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
 {
     initVars();
 }
-- 
cgit v1.2.3


From 308b2f0ce3215eaaed69da937555008f9ed36835 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 15 Apr 2007 21:51:05 +0000
Subject: Add extra constructors to Alpha and MIPS

--HG--
extra : convert_revision : 26ea87bfe9e5c27134eb9a15bf9e4629afae6c69
---
 src/cpu/o3/alpha/dyn_inst.hh      |  9 +++++++--
 src/cpu/o3/alpha/dyn_inst_impl.hh | 19 +++++++++++++++++--
 src/cpu/o3/mips/dyn_inst.hh       | 10 ++++++++--
 src/cpu/o3/mips/dyn_inst_impl.hh  | 18 +++++++++++++++---
 4 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/src/cpu/o3/alpha/dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh
index 20759d849..a6fb7b885 100644
--- a/src/cpu/o3/alpha/dyn_inst.hh
+++ b/src/cpu/o3/alpha/dyn_inst.hh
@@ -73,8 +73,13 @@ class AlphaDynInst : public BaseDynInst<Impl>
 
   public:
     /** BaseDynInst constructor given a binary instruction. */
-    AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
-                 Addr Pred_PC, Addr Pred_NPC,
+    AlphaDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
+                 Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+                 InstSeqNum seq_num, O3CPU *cpu);
+
+    /** BaseDynInst constructor given a binary instruction. */
+    AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
+                 Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
                  InstSeqNum seq_num, O3CPU *cpu);
 
     /** BaseDynInst constructor given a static inst pointer. */
diff --git a/src/cpu/o3/alpha/dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh
index fdce1ade5..6dfe0ccdd 100644
--- a/src/cpu/o3/alpha/dyn_inst_impl.hh
+++ b/src/cpu/o3/alpha/dyn_inst_impl.hh
@@ -31,10 +31,25 @@
 #include "cpu/o3/alpha/dyn_inst.hh"
 
 template <class Impl>
-AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
+AlphaDynInst<Impl>::AlphaDynInst(StaticInstPtr staticInst,
+                                 Addr PC, Addr NPC, Addr microPC,
                                  Addr Pred_PC, Addr Pred_NPC,
+                                 Addr Pred_MicroPC,
                                  InstSeqNum seq_num, O3CPU *cpu)
-    : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
+    : BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
+            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
+{
+    initVars();
+}
+
+template <class Impl>
+AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst,
+                                 Addr PC, Addr NPC, Addr microPC,
+                                 Addr Pred_PC, Addr Pred_NPC,
+                                 Addr Pred_MicroPC,
+                                 InstSeqNum seq_num, O3CPU *cpu)
+    : BaseDynInst<Impl>(inst, PC, NPC, microPC,
+            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
 {
     initVars();
 }
diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh
index 366b4bb23..cf78c0941 100755
--- a/src/cpu/o3/mips/dyn_inst.hh
+++ b/src/cpu/o3/mips/dyn_inst.hh
@@ -69,10 +69,16 @@ class MipsDynInst : public BaseDynInst<Impl>
     };
 
   public:
+    /** BaseDynInst constructor given a binary instruction. */
+    MipsDynInst(StaticInstPtr staticInst,
+                Addr PC, Addr NPC, Addr microPC,
+                Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+                InstSeqNum seq_num, O3CPU *cpu);
+
     /** BaseDynInst constructor given a binary instruction. */
     MipsDynInst(ExtMachInst inst,
-                Addr PC, Addr NPC,
-                Addr Pred_PC, Addr Pred_NPC,
+                Addr PC, Addr NPC, Addr microPC,
+                Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
                 InstSeqNum seq_num, O3CPU *cpu);
 
     /** BaseDynInst constructor given a static inst pointer. */
diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh
index c0f9ae771..7e8697b32 100755
--- a/src/cpu/o3/mips/dyn_inst_impl.hh
+++ b/src/cpu/o3/mips/dyn_inst_impl.hh
@@ -30,12 +30,24 @@
 
 #include "cpu/o3/mips/dyn_inst.hh"
 
+template <class Impl>
+MipsDynInst<Impl>::MipsDynInst(StaticInstPtr staticInst,
+                               Addr PC, Addr NPC, Addr microPC,
+                               Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+                               InstSeqNum seq_num, O3CPU *cpu)
+    : BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
+            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
+{
+    initVars();
+}
+
 template <class Impl>
 MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst,
-                               Addr PC, Addr NPC,
-                               Addr Pred_PC, Addr Pred_NPC,
+                               Addr PC, Addr NPC, Addr microPC,
+                               Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
                                InstSeqNum seq_num, O3CPU *cpu)
-    : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
+    : BaseDynInst<Impl>(inst, PC, NPC, microPC,
+            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
 {
     initVars();
 }
-- 
cgit v1.2.3


From 8248af53b19a633ae6d9aa8cd6b5a12cfa3b1644 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 15 Apr 2007 21:52:38 +0000
Subject: Make an inner loop which pulls microops out of macroops. These aren't
 checked for control flow because we can pull out microops until we run out of
 buffer. This prevents microops from being interpretted as branches because
 the pc doesn't become npc.

--HG--
extra : convert_revision : 9fff7c6c32900692bbc567ecb75701c9c73da259
---
 src/cpu/o3/fetch_impl.hh | 127 ++++++++++++++++++++++++-----------------------
 1 file changed, 64 insertions(+), 63 deletions(-)

diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index d1f38e38b..3ae7bc402 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -1094,11 +1094,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         // ended this fetch block.
         bool predicted_branch = false;
 
-        for (;
-             offset < cacheBlkSize &&
-                 numInst < fetchWidth &&
-                 !predicted_branch;
-             ++numInst) {
+        while (offset < cacheBlkSize &&
+               numInst < fetchWidth &&
+               !predicted_branch) {
 
             // If we're branching after this instruction, quite fetching
             // from the same block then.
@@ -1109,10 +1107,6 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                         fetch_PC, fetch_NPC);
             }
 
-
-            // Get a sequence number.
-            inst_seq = cpu->getAndIncrementInstSeq();
-
             // Make sure this is a valid index.
             assert(offset <= cacheBlkSize - instSize);
 
@@ -1129,80 +1123,87 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                 if (staticInst->isMacroOp())
                     macroop = staticInst;
             }
-            if (macroop) {
-                staticInst = macroop->fetchMicroOp(fetch_MicroPC);
-                if (staticInst->isLastMicroOp())
-                    macroop = NULL;
-            }
+            do {
+                if (macroop) {
+                    staticInst = macroop->fetchMicroOp(fetch_MicroPC);
+                    if (staticInst->isLastMicroOp())
+                        macroop = NULL;
+                }
 
-            // Create a new DynInst from the instruction fetched.
-            DynInstPtr instruction = new DynInst(staticInst,
-                                                 fetch_PC, fetch_NPC, fetch_MicroPC,
-                                                 next_PC, next_NPC, next_MicroPC,
-                                                 inst_seq, cpu);
-            instruction->setTid(tid);
+                // Get a sequence number.
+                inst_seq = cpu->getAndIncrementInstSeq();
 
-            instruction->setASID(tid);
+                // Create a new DynInst from the instruction fetched.
+                DynInstPtr instruction = new DynInst(staticInst,
+                                                     fetch_PC, fetch_NPC, fetch_MicroPC,
+                                                     next_PC, next_NPC, next_MicroPC,
+                                                     inst_seq, cpu);
+                instruction->setTid(tid);
 
-            instruction->setThreadState(cpu->thread[tid]);
+                instruction->setASID(tid);
 
-            DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
-                    "[sn:%lli]\n",
-                    tid, instruction->readPC(), inst_seq);
+                instruction->setThreadState(cpu->thread[tid]);
 
-            //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
+                DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
+                        "[sn:%lli]\n",
+                        tid, instruction->readPC(), inst_seq);
 
-            DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
-                    tid, instruction->staticInst->disassemble(fetch_PC));
+                //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
 
-            instruction->traceData =
-                Trace::getInstRecord(curTick, cpu->tcBase(tid),
-                                     instruction->staticInst,
-                                     instruction->readPC());
+                DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
+                        tid, instruction->staticInst->disassemble(fetch_PC));
 
-            ///FIXME This needs to be more robust in dealing with delay slots
-            lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC);
-            predicted_branch |= (next_PC != fetch_NPC);
+                instruction->traceData =
+                    Trace::getInstRecord(curTick, cpu->tcBase(tid),
+                                         instruction->staticInst,
+                                         instruction->readPC());
 
-            // Add instruction to the CPU's list of instructions.
-            instruction->setInstListIt(cpu->addInst(instruction));
+                ///FIXME This needs to be more robust in dealing with delay slots
+                predicted_branch |=
+                    lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC);
 
-            // Write the instruction to the first slot in the queue
-            // that heads to decode.
-            toDecode->insts[numInst] = instruction;
+                // Add instruction to the CPU's list of instructions.
+                instruction->setInstListIt(cpu->addInst(instruction));
 
-            toDecode->size++;
+                // Write the instruction to the first slot in the queue
+                // that heads to decode.
+                toDecode->insts[numInst] = instruction;
 
-            // Increment stat of fetched instructions.
-            ++fetchedInsts;
+                toDecode->size++;
 
-            // Move to the next instruction, unless we have a branch.
-            fetch_PC = next_PC;
-            fetch_NPC = next_NPC;
-            fetch_MicroPC = next_MicroPC;
+                // Increment stat of fetched instructions.
+                ++fetchedInsts;
 
-            if (instruction->isQuiesce()) {
-                DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
-                        curTick);
-                fetchStatus[tid] = QuiescePending;
-                ++numInst;
-                status_change = true;
-                break;
-            }
+                // Move to the next instruction, unless we have a branch.
+                fetch_PC = next_PC;
+                fetch_NPC = next_NPC;
+                fetch_MicroPC = next_MicroPC;
+
+                if (instruction->isQuiesce()) {
+                    DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
+                            curTick);
+                    fetchStatus[tid] = QuiescePending;
+                    ++numInst;
+                    status_change = true;
+                    break;
+                }
 
-            if (!macroop)
-                offset += instSize;
+                ++numInst;
+            } while (staticInst->isMicroOp() &&
+                     !staticInst->isLastMicroOp() &&
+                     numInst < fetchWidth);
+            offset += instSize;
         }
 
-        if (offset >= cacheBlkSize) {
-            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
-                    "block.\n", tid);
+        if (predicted_branch) {
+            DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
+                    "instruction encountered.\n", tid);
         } else if (numInst >= fetchWidth) {
             DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
                     "for this cycle.\n", tid);
-        } else if (predicted_branch) {
-            DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
-                    "instruction encountered.\n", tid);
+        } else if (offset >= cacheBlkSize) {
+            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
+                    "block.\n", tid);
         }
     }
 
-- 
cgit v1.2.3


From cea543576082ed860e8dae17519ace48e5b2c78a Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 22 Apr 2007 17:43:45 +0000
Subject: Make the GSR into a renamed control register. It should be split into
 a renamed part and a control part for the different bitfields, but the
 renamed part is all that's actually used.

--HG--
extra : convert_revision : ffeb4f874bd4430255064f6e8bcb135309932ff8
---
 src/arch/sparc/isa/operands.isa | 3 ++-
 src/arch/sparc/isa_traits.hh    | 2 +-
 src/arch/sparc/sparc_traits.hh  | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/arch/sparc/isa/operands.isa b/src/arch/sparc/isa/operands.isa
index 110b37d15..a627a2e6f 100644
--- a/src/arch/sparc/isa/operands.isa
+++ b/src/arch/sparc/isa/operands.isa
@@ -149,7 +149,8 @@ def operands {{
     'Fprs':		('ControlReg', 'udw', 'MISCREG_FPRS', None, 43),
     'Pcr':		('ControlReg', 'udw', 'MISCREG_PCR', None, 44),
     'Pic':		('ControlReg', 'udw', 'MISCREG_PIC', None, 45),
-    'Gsr':		('ControlReg', 'udw', 'MISCREG_GSR', None, 46),
+#   'Gsr':		('ControlReg', 'udw', 'MISCREG_GSR', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 46),
+    'Gsr':		('IntReg', 'udw', 'NumIntArchRegs + 8', None, 46),
     'Softint':		('ControlReg', 'udw', 'MISCREG_SOFTINT', None, 47),
     'SoftintSet':	('ControlReg', 'udw', 'MISCREG_SOFTINT_SET', None, 48),
     'SoftintClr':	('ControlReg', 'udw', 'MISCREG_SOFTINT_CLR', None, 49),
diff --git a/src/arch/sparc/isa_traits.hh b/src/arch/sparc/isa_traits.hh
index d0b8827f3..8b3ec36a6 100644
--- a/src/arch/sparc/isa_traits.hh
+++ b/src/arch/sparc/isa_traits.hh
@@ -58,7 +58,7 @@ namespace SparcISA
 
     // These enumerate all the registers for dependence tracking.
     enum DependenceTags {
-        FP_Base_DepTag = 32*3+8,
+        FP_Base_DepTag = 32*3+9,
         Ctrl_Base_DepTag = FP_Base_DepTag + 64
     };
 
diff --git a/src/arch/sparc/sparc_traits.hh b/src/arch/sparc/sparc_traits.hh
index d89ec1119..715c08c03 100644
--- a/src/arch/sparc/sparc_traits.hh
+++ b/src/arch/sparc/sparc_traits.hh
@@ -42,7 +42,7 @@ namespace SparcISA
     // Number of register windows, can legally be 3 to 32
     const int NWindows = 8;
     //const int NumMicroIntRegs = 1;
-    const int NumMicroIntRegs = 8;
+    const int NumMicroIntRegs = 9;
 
 //    const int NumRegularIntRegs = MaxGL * 8 + NWindows * 16;
 //    const int NumMicroIntRegs = 1;
-- 
cgit v1.2.3


From acc62514b1a4244182a7e5fad8ca03505389d94d Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 22 Apr 2007 17:50:43 +0000
Subject: Make the floating point zero register special handling only apply for
 ALPHA.

--HG--
extra : convert_revision : 4f393a5471656b29cecbacfcb337992239775915
---
 src/cpu/o3/free_list.hh  |  2 ++
 src/cpu/o3/regfile.hh    |  4 ++++
 src/cpu/o3/rename_map.cc |  4 ++++
 src/cpu/o3/scoreboard.cc | 15 +++++++++++++++
 4 files changed, 25 insertions(+)

diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh
index c669b0b34..42fc0c533 100644
--- a/src/cpu/o3/free_list.hh
+++ b/src/cpu/o3/free_list.hh
@@ -168,7 +168,9 @@ SimpleFreeList::addReg(PhysRegIndex freed_reg)
         if (freed_reg != TheISA::ZeroReg)
             freeIntRegs.push(freed_reg);
     } else if (freed_reg < numPhysicalRegs) {
+#if THE_ISA == ALPHA_ISA
         if (freed_reg != (TheISA::ZeroReg + numPhysicalIntRegs))
+#endif
             freeFloatRegs.push(freed_reg);
     }
 }
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index b5b1cd021..75d3fa6eb 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -179,7 +179,9 @@ class PhysRegFile
         DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                 int(reg_idx), (uint64_t)val);
 
+#if THE_ISA == ALPHA_ISA
         if (reg_idx != TheISA::ZeroReg)
+#endif
             floatRegFile[reg_idx].d = val;
     }
 
@@ -194,7 +196,9 @@ class PhysRegFile
         DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                 int(reg_idx), (uint64_t)val);
 
+#if THE_ISA == ALPHA_ISA
         if (reg_idx != TheISA::ZeroReg)
+#endif
             floatRegFile[reg_idx].d = val;
     }
 
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index b436ec1c3..e6649ce3e 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -165,17 +165,21 @@ SimpleRenameMap::rename(RegIndex arch_reg)
 
         // If it's not referencing the zero register, then rename the
         // register.
+#if THE_ISA == ALPHA_ISA
         if (arch_reg != floatZeroReg) {
+#endif
             renamed_reg = freeList->getFloatReg();
 
             floatRenameMap[arch_reg].physical_reg = renamed_reg;
 
             assert(renamed_reg < numPhysicalRegs &&
                    renamed_reg >= numPhysicalIntRegs);
+#if THE_ISA == ALPHA_ISA
         } else {
             // Otherwise return the zero register so nothing bad happens.
             renamed_reg = floatZeroReg;
         }
+#endif
     } else {
         // Subtract off the base offset for miscellaneous registers.
         arch_reg = arch_reg - numLogicalRegs;
diff --git a/src/cpu/o3/scoreboard.cc b/src/cpu/o3/scoreboard.cc
index 1859b35a4..e7f8b7949 100644
--- a/src/cpu/o3/scoreboard.cc
+++ b/src/cpu/o3/scoreboard.cc
@@ -29,6 +29,7 @@
  *          Kevin Lim
  */
 
+#include "arch/isa_specific.hh"
 #include "cpu/o3/scoreboard.hh"
 
 Scoreboard::Scoreboard(unsigned activeThreads,
@@ -79,11 +80,18 @@ Scoreboard::name() const
 bool
 Scoreboard::getReg(PhysRegIndex phys_reg)
 {
+#if THE_ISA == ALPHA_ISA
     // Always ready if int or fp zero reg.
     if (phys_reg == zeroRegIdx ||
         phys_reg == (zeroRegIdx + numPhysicalIntRegs)) {
         return 1;
     }
+#else
+    // Always ready if int zero reg.
+    if (phys_reg == zeroRegIdx) {
+        return 1;
+    }
+#endif
 
     return regScoreBoard[phys_reg];
 }
@@ -99,11 +107,18 @@ Scoreboard::setReg(PhysRegIndex phys_reg)
 void
 Scoreboard::unsetReg(PhysRegIndex ready_reg)
 {
+#if THE_ISA == ALPHA_ISA
     if (ready_reg == zeroRegIdx ||
         ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
         // Don't do anything if int or fp zero reg.
         return;
     }
+#else
+    if (ready_reg == zeroRegIdx) {
+        // Don't do anything if int zero reg.
+        return;
+    }
+#endif
 
     regScoreBoard[ready_reg] = 0;
 }
-- 
cgit v1.2.3


From f0929006965514982603fe58ebc3211acf021cce Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 22 Apr 2007 19:30:47 +0000
Subject: Change mcf to use smred inputs so it doesn't take two days to run in
 o3.

--HG--
extra : convert_revision : 6bf58f5ea12afc56dff6237640fbeded58b9951e
---
 tests/long/10.mcf/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/long/10.mcf/test.py b/tests/long/10.mcf/test.py
index ffe2758f1..cb8acff22 100644
--- a/tests/long/10.mcf/test.py
+++ b/tests/long/10.mcf/test.py
@@ -29,5 +29,5 @@
 m5.AddToPath('../configs/common')
 from cpu2000 import mcf
 
-workload = mcf(isa, opsys, 'lgred')
+workload = mcf(isa, opsys, 'smred')
 root.system.cpu.workload = workload.makeLiveProcess()
-- 
cgit v1.2.3

-- 
cgit v1.2.3


From dc1c9e03007f084caabc995b41616603e0a004dc Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Tue, 8 May 2007 13:02:19 +0000
Subject: Add a hack to truncate addresses to 32 bits in SE. Paging should be
 changed to use the architecture's TLB, at which point this can be removed.

--HG--
extra : convert_revision : 54f3c18e5aead727d0ac244ed00fd97d3ca8ad75
---
 src/arch/sparc/isa/formats/mem/basicmem.isa |  6 ++++--
 src/arch/sparc/isa/formats/mem/blockmem.isa |  6 ++++--
 src/arch/sparc/isa/formats/mem/swap.isa     |  8 ++++++--
 src/arch/sparc/isa/formats/mem/util.isa     | 12 +++++++++++-
 src/arch/sparc/process.cc                   |  4 ++--
 5 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/arch/sparc/isa/formats/mem/basicmem.isa b/src/arch/sparc/isa/formats/mem/basicmem.isa
index 2f62c7bef..aa6c4cdea 100644
--- a/src/arch/sparc/isa/formats/mem/basicmem.isa
+++ b/src/arch/sparc/isa/formats/mem/basicmem.isa
@@ -57,10 +57,12 @@ let {{
         addrCalcImm = 'EA = Rs1 + imm;'
         iop = InstObjParams(name, Name, 'Mem',
                 {"code": code, "postacc_code" : postacc_code,
-                 "fault_check": faultCode, "ea_code": addrCalcReg}, opt_flags)
+                 "fault_check": faultCode, "ea_code": addrCalcReg,
+                 "EA_trunc": TruncateEA}, opt_flags)
         iop_imm = InstObjParams(name, Name + "Imm", 'MemImm',
                 {"code": code, "postacc_code" : postacc_code,
-                "fault_check": faultCode, "ea_code": addrCalcImm}, opt_flags)
+                 "fault_check": faultCode, "ea_code": addrCalcImm,
+                 "EA_trunc": TruncateEA}, opt_flags)
         header_output = MemDeclare.subst(iop) + MemDeclare.subst(iop_imm)
         decoder_output = BasicConstructor.subst(iop) + BasicConstructor.subst(iop_imm)
         decode_block = ROrImmDecode.subst(iop)
diff --git a/src/arch/sparc/isa/formats/mem/blockmem.isa b/src/arch/sparc/isa/formats/mem/blockmem.isa
index e19016bd0..ea74ef179 100644
--- a/src/arch/sparc/isa/formats/mem/blockmem.isa
+++ b/src/arch/sparc/isa/formats/mem/blockmem.isa
@@ -298,11 +298,13 @@ let {{
             iop = InstObjParams(name, Name, 'BlockMem',
                     {"code": pcedCode, "ea_code": addrCalcReg,
                     "fault_check": faultCode, "micro_pc": microPc,
-                    "set_flags": flag_code}, opt_flags)
+                    "set_flags": flag_code, "EA_trunc" : TruncateEA},
+                    opt_flags)
             iop_imm = InstObjParams(name, Name + 'Imm', 'BlockMemImm',
                     {"code": pcedCode, "ea_code": addrCalcImm,
                     "fault_check": faultCode, "micro_pc": microPc,
-                    "set_flags": flag_code}, opt_flags)
+                    "set_flags": flag_code, "EA_trunc" : TruncateEA},
+                    opt_flags)
             decoder_output += BlockMemMicroConstructor.subst(iop)
             decoder_output += BlockMemMicroConstructor.subst(iop_imm)
             exec_output += doDualSplitExecute(
diff --git a/src/arch/sparc/isa/formats/mem/swap.isa b/src/arch/sparc/isa/formats/mem/swap.isa
index b71542a2b..3814d1030 100644
--- a/src/arch/sparc/isa/formats/mem/swap.isa
+++ b/src/arch/sparc/isa/formats/mem/swap.isa
@@ -51,6 +51,7 @@ def template SwapExecute {{
             }
             if(storeCond && fault == NoFault)
             {
+                %(EA_trunc)s
                 fault = xc->write((uint%(mem_acc_size)s_t)Mem,
                         EA, %(asi_val)s, &mem_data);
             }
@@ -91,6 +92,7 @@ def template SwapInitiateAcc {{
             }
             if(fault == NoFault)
             {
+                %(EA_trunc)s
                 fault = xc->write((uint%(mem_acc_size)s_t)Mem,
                         EA, %(asi_val)s, &mem_data);
             }
@@ -157,12 +159,14 @@ let {{
         addrCalcReg = 'EA = Rs1;'
         iop = InstObjParams(name, Name, 'Mem',
                 {"code": code, "postacc_code" : postacc_code,
-                 "fault_check": faultCode, "ea_code": addrCalcReg}, opt_flags)
+                 "fault_check": faultCode, "ea_code": addrCalcReg,
+                 "EA_trunc" : TruncateEA}, opt_flags)
         header_output = MemDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
         microParams = {"code": code, "postacc_code" : postacc_code,
-            "ea_code" : addrCalcReg, "fault_check" : faultCode}
+            "ea_code" : addrCalcReg, "fault_check" : faultCode,
+            "EA_trunc" : TruncateEA}
         exec_output = doSplitExecute(execute, name, Name, asi,
                 ["IsStoreConditional"], microParams);
         return (header_output, decoder_output, exec_output, decode_block)
diff --git a/src/arch/sparc/isa/formats/mem/util.isa b/src/arch/sparc/isa/formats/mem/util.isa
index dfe937371..38cde9a50 100644
--- a/src/arch/sparc/isa/formats/mem/util.isa
+++ b/src/arch/sparc/isa/formats/mem/util.isa
@@ -149,6 +149,7 @@ def template LoadExecute {{
             %(fault_check)s;
             if(fault == NoFault)
             {
+                %(EA_trunc)s
                 fault = xc->read(EA, (%(mem_acc_type)s%(mem_acc_size)s_t&)Mem, %(asi_val)s);
             }
             if(fault == NoFault)
@@ -179,6 +180,7 @@ def template LoadInitiateAcc {{
             %(fault_check)s;
             if(fault == NoFault)
             {
+                %(EA_trunc)s
                 fault = xc->read(EA, (%(mem_acc_type)s%(mem_acc_size)s_t&)Mem, %(asi_val)s);
             }
             return fault;
@@ -224,6 +226,7 @@ def template StoreExecute {{
             }
             if(storeCond && fault == NoFault)
             {
+                %(EA_trunc)s
                 fault = xc->write((%(mem_acc_type)s%(mem_acc_size)s_t)Mem,
                         EA, %(asi_val)s, 0);
             }
@@ -257,6 +260,7 @@ def template StoreInitiateAcc {{
             }
             if(storeCond && fault == NoFault)
             {
+                %(EA_trunc)s
                 fault = xc->write((%(mem_acc_type)s%(mem_acc_size)s_t)Mem,
                         EA, %(asi_val)s, 0);
             }
@@ -317,6 +321,11 @@ let {{
             fault = new PrivilegedAction;
     '''
 
+    TruncateEA = '''
+#if !FULL_SYSTEM
+                EA = Pstate<3:> ? EA<31:0> : EA;
+#endif
+    '''
 }};
 
 //A simple function to generate the name of the macro op of a certain
@@ -346,7 +355,8 @@ let {{
                 (eaRegCode, nameReg, NameReg),
                 (eaImmCode, nameImm, NameImm)):
             microParams = {"code": code, "postacc_code" : postacc_code,
-                "ea_code": eaCode, "fault_check": faultCode}
+                "ea_code": eaCode, "fault_check": faultCode,
+                "EA_trunc" : TruncateEA}
             executeCode += doSplitExecute(execute, name, Name,
                     asi, opt_flags, microParams)
         return executeCode
diff --git a/src/arch/sparc/process.cc b/src/arch/sparc/process.cc
index e4774ab54..11fa9be28 100644
--- a/src/arch/sparc/process.cc
+++ b/src/arch/sparc/process.cc
@@ -87,8 +87,8 @@ Sparc32LiveProcess::startup()
 
     //From the SPARC ABI
 
-    //The process runs in user mode
-    threadContexts[0]->setMiscReg(MISCREG_PSTATE, 0x02);
+    //The process runs in user mode with 32 bit addresses
+    threadContexts[0]->setMiscReg(MISCREG_PSTATE, 0x0a);
 
     //Setup default FP state
     threadContexts[0]->setMiscRegNoEffect(MISCREG_FSR, 0);
-- 
cgit v1.2.3


From c2ac0fd89b9928f653e1485b2432cd71b455d7c5 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Tue, 8 May 2007 17:19:33 +0000
Subject: Fix insertBits so it doesn't shift things into oblivion

--HG--
extra : convert_revision : 8833b60e3fc94c917fbdb7a99f3d90155907b44e
---
 src/base/bitfield.hh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/base/bitfield.hh b/src/base/bitfield.hh
index 69cce2245..518bad6b8 100644
--- a/src/base/bitfield.hh
+++ b/src/base/bitfield.hh
@@ -96,8 +96,9 @@ inline
 T
 insertBits(T val, int first, int last, B bit_val)
 {
+    T t_bit_val = bit_val;
     T bmask = mask(first - last + 1) << last;
-    return ((bit_val << last) & bmask) | (val & ~bmask);
+    return ((t_bit_val << last) & bmask) | (val & ~bmask);
 }
 
 /**
-- 
cgit v1.2.3


From debf04aef1b0f662e981507545cdac956dd22a47 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sat, 12 May 2007 15:11:44 -0700
Subject: Make sure all addresses used in syscalls are truncated to 32 bits.
 Actually -all- arguements are truncated to 32 bits, but we should be able to
 get away with it.

--HG--
extra : convert_revision : 3b8766c68a4ab36e2e769fac4812657f3f7e0d1c
---
 src/cpu/o3/sparc/cpu_impl.hh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/cpu/o3/sparc/cpu_impl.hh b/src/cpu/o3/sparc/cpu_impl.hh
index 50d980f55..2e398577e 100644
--- a/src/cpu/o3/sparc/cpu_impl.hh
+++ b/src/cpu/o3/sparc/cpu_impl.hh
@@ -272,7 +272,10 @@ SparcO3CPU<Impl>::getSyscallArg(int i, int tid)
 {
     TheISA::IntReg idx = TheISA::flattenIntIndex(this->tcBase(tid),
             SparcISA::ArgumentReg0 + i);
-    return this->readArchIntReg(idx, tid);
+    TheISA::IntReg val = this->readArchIntReg(idx, tid);
+    if (bits(this->readMiscRegNoEffect(SparcISA::MISCREG_PSTATE, tid), 3, 3))
+        val = bits(val, 31, 0);
+    return val;
 }
 
 template <class Impl>
-- 
cgit v1.2.3


From df7730b6774a730d554bfaa469ad95eeeffd3dc9 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Wed, 20 Jun 2007 19:46:45 -0700
Subject: Fix compiler errors.

--HG--
extra : convert_revision : 2b10076a24cb36cb748e299011ae691f09c158cd
---
 src/cpu/base_dyn_inst.hh      |  8 ++++----
 src/cpu/base_dyn_inst_impl.hh |  6 +++---
 src/cpu/o3/fetch_impl.hh      | 23 +++++++++++------------
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 1311e5cf2..a55c1e3c0 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -498,11 +498,11 @@ class BaseDynInst : public FastAlloc, public RefCounted
     bool isQuiesce() const { return staticInst->isQuiesce(); }
     bool isIprAccess() const { return staticInst->isIprAccess(); }
     bool isUnverifiable() const { return staticInst->isUnverifiable(); }
-    bool isMacroOp() const { return staticInst->isMacroOp(); }
-    bool isMicroOp() const { return staticInst->isMicroOp(); }
+    bool isMacroop() const { return staticInst->isMacroop(); }
+    bool isMicroop() const { return staticInst->isMicroop(); }
     bool isDelayedCommit() const { return staticInst->isDelayedCommit(); }
-    bool isLastMicroOp() const { return staticInst->isLastMicroOp(); }
-    bool isFirstMicroOp() const { return staticInst->isFirstMicroOp(); }
+    bool isLastMicroop() const { return staticInst->isLastMicroop(); }
+    bool isFirstMicroop() const { return staticInst->isFirstMicroop(); }
     bool isMicroBranch() const { return staticInst->isMicroBranch(); }
 
     /** Temporarily sets this instruction as a serialize before instruction. */
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index acf8af9cf..5c18ae694 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -73,7 +73,7 @@ BaseDynInst<Impl>::BaseDynInst(StaticInstPtr _staticInst,
     seqNum = seq_num;
 
     bool nextIsMicro =
-        staticInst->isMicroOp() && !staticInst->isLastMicroOp();
+        staticInst->isMicroop() && !staticInst->isLastMicroop();
 
     PC = inst_PC;
     microPC = inst_MicroPC;
@@ -101,12 +101,12 @@ BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst inst,
                                Addr pred_PC, Addr pred_NPC,
                                Addr pred_MicroPC,
                                InstSeqNum seq_num, ImplCPU *cpu)
-  : staticInst(inst), traceData(NULL), cpu(cpu)
+  : staticInst(inst, inst_PC), traceData(NULL), cpu(cpu)
 {
     seqNum = seq_num;
 
     bool nextIsMicro =
-        staticInst->isMicroOp() && !staticInst->isLastMicroOp();
+        staticInst->isMicroop() && !staticInst->isLastMicroop();
 
     PC = inst_PC;
     microPC = inst_MicroPC;
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 0fd1e7bac..857a08629 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -498,7 +498,7 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
     bool predict_taken;
 
     if (!inst->isControl()) {
-        if (inst->isMicroOp() && !inst->isLastMicroOp()) {
+        if (inst->isMicroop() && !inst->isLastMicroop()) {
             next_MicroPC++;
         } else {
             next_PC  = next_NPC;
@@ -1120,14 +1120,14 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                 predecoder.moreBytes(fetch_PC, fetch_PC, 0, inst);
 
                 ext_inst = predecoder.getExtMachInst();
-                staticInst = StaticInstPtr(ext_inst);
-                if (staticInst->isMacroOp())
+                staticInst = StaticInstPtr(ext_inst, fetch_PC);
+                if (staticInst->isMacroop())
                     macroop = staticInst;
             }
             do {
                 if (macroop) {
-                    staticInst = macroop->fetchMicroOp(fetch_MicroPC);
-                    if (staticInst->isLastMicroOp())
+                    staticInst = macroop->fetchMicroop(fetch_MicroPC);
+                    if (staticInst->isLastMicroop())
                         macroop = NULL;
                 }
 
@@ -1194,8 +1194,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                 }
 
                 ++numInst;
-            } while (staticInst->isMicroOp() &&
-                     !staticInst->isLastMicroOp() &&
+            } while (staticInst->isMicroop() &&
+                     !staticInst->isLastMicroop() &&
                      numInst < fetchWidth);
             offset += instSize;
         }
@@ -1240,13 +1240,12 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         // We will use a nop in order to carry the fault.
         ext_inst = TheISA::NoopMachInst;
 
-        StaticInstPtr staticInst = new StaticInst(ext_inst);
         // Create a new DynInst from the dummy nop.
-        DynInstPtr instruction = new DynInst(staticInst,
-                                             fetch_PC, fetch_NPC,
-                                             next_PC, next_NPC,
+        DynInstPtr instruction = new DynInst(ext_inst,
+                                             fetch_PC, fetch_NPC, fetch_MicroPC,
+                                             next_PC, next_NPC, next_MicroPC,
                                              inst_seq, cpu);
-        instruction->setPredTarg(next_PC, next_NPC);
+        instruction->setPredTarg(next_PC, next_NPC, 1);
         instruction->setTid(tid);
 
         instruction->setASID(tid);
-- 
cgit v1.2.3


From afd00820004984de085a727e60e25742a69d9c51 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Wed, 20 Jun 2007 19:48:25 -0700
Subject: long is too long

--HG--
extra : convert_revision : 7342cd4a1700a247f30d6f85fc6c2685341ba20e
---
 tests/long/20.parser/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/long/20.parser/test.py b/tests/long/20.parser/test.py
index 82ab71c90..8e745ec26 100644
--- a/tests/long/20.parser/test.py
+++ b/tests/long/20.parser/test.py
@@ -29,5 +29,5 @@
 m5.AddToPath('../configs/common')
 from cpu2000 import parser
 
-workload = parser(isa, opsys, 'lgred')
+workload = parser(isa, opsys, 'mdred')
 root.system.cpu.workload = workload.makeLiveProcess()
-- 
cgit v1.2.3


From ec24de8b59e174b93b7c42669d71fe61db296688 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Thu, 21 Jun 2007 20:35:26 +0000
Subject: Get rid of an unnecessary include file.

--HG--
extra : convert_revision : d8d139180917f54006a5a79df4a0f206ddd39fed
---
 src/arch/x86/isa/includes.isa | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa
index 3ef204850..4f27c72f5 100644
--- a/src/arch/x86/isa/includes.isa
+++ b/src/arch/x86/isa/includes.isa
@@ -103,7 +103,6 @@ output header {{
 #include "base/misc.hh"
 #include "cpu/static_inst.hh"
 #include "mem/packet.hh"
-#include "mem/request.hh"  // some constructors use MemReq flags
 #include "sim/faults.hh"
 }};
 
-- 
cgit v1.2.3


From 70d6044527d6e6dfaf2de6674ae412706b6e131c Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Thu, 21 Jun 2007 20:35:27 +0000
Subject: Make symbols for regular registers.

--HG--
extra : convert_revision : 28a6df1efe4298877dc2b20179caeb25dfdc4622
---
 src/arch/x86/isa/insts/control_transfer/call.py          | 4 ++--
 src/arch/x86/isa/insts/data_transfer/stack_operations.py | 8 ++++----
 src/arch/x86/isa/microasm.isa                            | 3 +++
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/arch/x86/isa/insts/control_transfer/call.py b/src/arch/x86/isa/insts/control_transfer/call.py
index 1372f7dba..530162bfd 100644
--- a/src/arch/x86/isa/insts/control_transfer/call.py
+++ b/src/arch/x86/isa/insts/control_transfer/call.py
@@ -61,8 +61,8 @@ def macroop CALL_I
 
     limm t2, imm
     rdip t1
-    subi "INTREG_RSP", "INTREG_RSP", dsz
-    st t1, ss, [0, t0, "INTREG_RSP"]
+    subi rsp, rsp, dsz
+    st t1, ss, [0, t0, rsp]
     wrip t1, t2
 };
 '''
diff --git a/src/arch/x86/isa/insts/data_transfer/stack_operations.py b/src/arch/x86/isa/insts/data_transfer/stack_operations.py
index ca2443752..585437b8c 100644
--- a/src/arch/x86/isa/insts/data_transfer/stack_operations.py
+++ b/src/arch/x86/isa/insts/data_transfer/stack_operations.py
@@ -58,16 +58,16 @@ def macroop POP_R {
     # Make the default data size of pops 64 bits in 64 bit mode
     .adjust_env oszIn64Override
 
-    ld reg, ss, [0, t0, "INTREG_RSP"]
-    addi "INTREG_RSP", "INTREG_RSP", dsz
+    ld reg, ss, [0, t0, rsp]
+    addi rsp, rsp, dsz
 };
 
 def macroop PUSH_R {
     # Make the default data size of pops 64 bits in 64 bit mode
     .adjust_env oszIn64Override
 
-    subi "INTREG_RSP", "INTREG_RSP", dsz
-    st reg, ss, [0, t0, "INTREG_RSP"]
+    subi rsp, rsp, dsz
+    st reg, ss, [0, t0, rsp]
 };
 '''
 #let {{
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index 4e06f4391..ee2b92f53 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -91,6 +91,9 @@ let {{
         "osz" : "env.operandSize",
         "ssz" : "env.stackSize"
     }
+
+    for reg in ('ax', 'bx', 'cx', 'dx', 'sp', 'bp', 'si', 'di'):
+        assembler.symbols["r%s" % reg] = "INTREG_R%s" % reg.upper()
     assembler.symbols.update(symbols)
 
     # Code literal which forces a default 64 bit operand size in 64 bit mode.
-- 
cgit v1.2.3


From 8e6abaed797d567b4ce009abac63ba19f87efa28 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Fri, 22 Jun 2007 15:06:10 -0400
Subject: Update of reference outputs. SPARC_SE o3 gzip didn't have reference
 outputs, mcf has a reduced input size, and most of the other changes are for
 a change in how branch mispredicts work which makes things more accurate.

--HG--
extra : convert_revision : 33ad6a220945b344d2fc5c6abef8e67467e0c0ec
---
 .../00.gzip/ref/sparc/linux/o3-timing/config.ini   |  391 +++
 .../00.gzip/ref/sparc/linux/o3-timing/config.out   |  366 +++
 .../00.gzip/ref/sparc/linux/o3-timing/m5stats.txt  |  423 +++
 .../long/00.gzip/ref/sparc/linux/o3-timing/stderr  |    6 +
 .../long/00.gzip/ref/sparc/linux/o3-timing/stdout  |   44 +
 .../ref/sparc/linux/simple-atomic/config.ini       |    4 +-
 .../ref/sparc/linux/simple-atomic/config.out       |    2 +-
 .../ref/sparc/linux/simple-atomic/m5stats.txt      |   22 +-
 .../10.mcf/ref/sparc/linux/simple-atomic/mcf.out   | 3165 ++++----------------
 .../10.mcf/ref/sparc/linux/simple-atomic/stdout    |   28 +-
 .../ref/sparc/linux/simple-timing/config.ini       |   13 +-
 .../ref/sparc/linux/simple-timing/config.out       |    8 +-
 .../ref/sparc/linux/simple-timing/m5stats.txt      |  288 +-
 .../10.mcf/ref/sparc/linux/simple-timing/mcf.out   | 3165 ++++----------------
 .../10.mcf/ref/sparc/linux/simple-timing/stdout    |   28 +-
 .../00.hello/ref/alpha/linux/o3-timing/config.ini  |   13 +
 .../00.hello/ref/alpha/linux/o3-timing/m5stats.txt |  343 +--
 .../00.hello/ref/alpha/linux/o3-timing/stdout      |   10 +-
 .../00.hello/ref/alpha/tru64/o3-timing/config.ini  |   13 +
 .../00.hello/ref/alpha/tru64/o3-timing/m5stats.txt |  161 +-
 .../00.hello/ref/alpha/tru64/o3-timing/stdout      |   10 +-
 .../ref/alpha/linux/o3-timing/config.ini           |   13 +
 .../ref/alpha/linux/o3-timing/m5stats.txt          |  687 ++---
 .../ref/alpha/linux/o3-timing/stdout               |   10 +-
 .../ref/sparc/linux/o3-timing/config.ini           |   21 +-
 .../ref/sparc/linux/o3-timing/config.out           |    6 +-
 .../ref/sparc/linux/o3-timing/m5stats.txt          |  409 ++-
 .../02.insttest/ref/sparc/linux/o3-timing/stdout   |    6 +-
 28 files changed, 3372 insertions(+), 6283 deletions(-)
 create mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini
 create mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out
 create mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt
 create mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/stderr
 create mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout

diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini
new file mode 100644
index 000000000..585239418
--- /dev/null
+++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini
@@ -0,0 +1,391 @@
+[root]
+type=Root
+children=system
+dummy=0
+
+[system]
+type=System
+children=cpu membus physmem
+mem_mode=atomic
+physmem=system.physmem
+
+[system.cpu]
+type=DerivO3CPU
+children=dcache fuPool icache l2cache toL2Bus workload
+BTBEntries=4096
+BTBTagSize=16
+LFSTSize=1024
+LQEntries=32
+RASSize=16
+SQEntries=32
+SSITSize=1024
+activity=0
+backComSize=5
+cachePorts=200
+choiceCtrBits=2
+choicePredictorSize=8192
+clock=500
+commitToDecodeDelay=1
+commitToFetchDelay=1
+commitToIEWDelay=1
+commitToRenameDelay=1
+commitWidth=8
+cpu_id=0
+decodeToFetchDelay=1
+decodeToRenameDelay=1
+decodeWidth=8
+defer_registration=false
+dispatchWidth=8
+fetchToDecodeDelay=1
+fetchTrapLatency=1
+fetchWidth=8
+forwardComSize=5
+fuPool=system.cpu.fuPool
+function_trace=false
+function_trace_start=0
+globalCtrBits=2
+globalHistoryBits=13
+globalPredictorSize=8192
+iewToCommitDelay=1
+iewToDecodeDelay=1
+iewToFetchDelay=1
+iewToRenameDelay=1
+instShiftAmt=2
+issueToExecuteDelay=1
+issueWidth=8
+localCtrBits=2
+localHistoryBits=11
+localHistoryTableSize=2048
+localPredictorSize=2048
+max_insts_all_threads=0
+max_insts_any_thread=0
+max_loads_all_threads=0
+max_loads_any_thread=0
+numIQEntries=64
+numPhysFloatRegs=256
+numPhysIntRegs=256
+numROBEntries=192
+numRobs=1
+numThreads=1
+phase=0
+predType=tournament
+progress_interval=0
+renameToDecodeDelay=1
+renameToFetchDelay=1
+renameToIEWDelay=2
+renameToROBDelay=1
+renameWidth=8
+smtCommitPolicy=RoundRobin
+smtFetchPolicy=SingleThread
+smtIQPolicy=Partitioned
+smtIQThreshold=100
+smtLSQPolicy=Partitioned
+smtLSQThreshold=100
+smtNumFetchingThreads=1
+smtROBPolicy=Partitioned
+smtROBThreshold=100
+squashWidth=8
+system=system
+trapLatency=13
+wbDepth=1
+wbWidth=8
+workload=system.cpu.workload
+dcache_port=system.cpu.dcache.cpu_side
+icache_port=system.cpu.icache.cpu_side
+
+[system.cpu.dcache]
+type=BaseCache
+adaptive_compression=false
+addr_range=0:18446744073709551615
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+latency=1000
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10000
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=262144
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=20
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.dcache_port
+mem_side=system.cpu.toL2Bus.port[1]
+
+[system.cpu.fuPool]
+type=FUPool
+children=FUList0 FUList1 FUList2 FUList3 FUList4 FUList5 FUList6 FUList7
+FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
+
+[system.cpu.fuPool.FUList0]
+type=FUDesc
+children=opList0
+count=6
+opList=system.cpu.fuPool.FUList0.opList0
+
+[system.cpu.fuPool.FUList0.opList0]
+type=OpDesc
+issueLat=1
+opClass=IntAlu
+opLat=1
+
+[system.cpu.fuPool.FUList1]
+type=FUDesc
+children=opList0 opList1
+count=2
+opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
+
+[system.cpu.fuPool.FUList1.opList0]
+type=OpDesc
+issueLat=1
+opClass=IntMult
+opLat=3
+
+[system.cpu.fuPool.FUList1.opList1]
+type=OpDesc
+issueLat=19
+opClass=IntDiv
+opLat=20
+
+[system.cpu.fuPool.FUList2]
+type=FUDesc
+children=opList0 opList1 opList2
+count=4
+opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
+
+[system.cpu.fuPool.FUList2.opList0]
+type=OpDesc
+issueLat=1
+opClass=FloatAdd
+opLat=2
+
+[system.cpu.fuPool.FUList2.opList1]
+type=OpDesc
+issueLat=1
+opClass=FloatCmp
+opLat=2
+
+[system.cpu.fuPool.FUList2.opList2]
+type=OpDesc
+issueLat=1
+opClass=FloatCvt
+opLat=2
+
+[system.cpu.fuPool.FUList3]
+type=FUDesc
+children=opList0 opList1 opList2
+count=2
+opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
+
+[system.cpu.fuPool.FUList3.opList0]
+type=OpDesc
+issueLat=1
+opClass=FloatMult
+opLat=4
+
+[system.cpu.fuPool.FUList3.opList1]
+type=OpDesc
+issueLat=12
+opClass=FloatDiv
+opLat=12
+
+[system.cpu.fuPool.FUList3.opList2]
+type=OpDesc
+issueLat=24
+opClass=FloatSqrt
+opLat=24
+
+[system.cpu.fuPool.FUList4]
+type=FUDesc
+children=opList0
+count=0
+opList=system.cpu.fuPool.FUList4.opList0
+
+[system.cpu.fuPool.FUList4.opList0]
+type=OpDesc
+issueLat=1
+opClass=MemRead
+opLat=1
+
+[system.cpu.fuPool.FUList5]
+type=FUDesc
+children=opList0
+count=0
+opList=system.cpu.fuPool.FUList5.opList0
+
+[system.cpu.fuPool.FUList5.opList0]
+type=OpDesc
+issueLat=1
+opClass=MemWrite
+opLat=1
+
+[system.cpu.fuPool.FUList6]
+type=FUDesc
+children=opList0 opList1
+count=4
+opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
+
+[system.cpu.fuPool.FUList6.opList0]
+type=OpDesc
+issueLat=1
+opClass=MemRead
+opLat=1
+
+[system.cpu.fuPool.FUList6.opList1]
+type=OpDesc
+issueLat=1
+opClass=MemWrite
+opLat=1
+
+[system.cpu.fuPool.FUList7]
+type=FUDesc
+children=opList0
+count=1
+opList=system.cpu.fuPool.FUList7.opList0
+
+[system.cpu.fuPool.FUList7.opList0]
+type=OpDesc
+issueLat=3
+opClass=IprAccess
+opLat=3
+
+[system.cpu.icache]
+type=BaseCache
+adaptive_compression=false
+addr_range=0:18446744073709551615
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+latency=1000
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10000
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=131072
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=20
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.icache_port
+mem_side=system.cpu.toL2Bus.port[0]
+
+[system.cpu.l2cache]
+type=BaseCache
+adaptive_compression=false
+addr_range=0:18446744073709551615
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+latency=1000
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10000
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=2097152
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=5
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.toL2Bus.port[2]
+mem_side=system.membus.port[1]
+
+[system.cpu.toL2Bus]
+type=Bus
+block_size=64
+bus_id=0
+clock=1000
+responder_set=false
+width=64
+port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=gzip input.log 1
+cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/o3-timing
+egid=100
+env=
+euid=100
+executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip
+gid=100
+input=cin
+output=cout
+pid=100
+ppid=99
+system=system
+uid=100
+
+[system.membus]
+type=Bus
+block_size=64
+bus_id=0
+clock=1000
+responder_set=false
+width=64
+port=system.physmem.port[0] system.cpu.l2cache.mem_side
+
+[system.physmem]
+type=PhysicalMemory
+file=
+latency=1
+range=0:134217727
+zero=false
+port=system.membus.port[0]
+
diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out
new file mode 100644
index 000000000..b8a2728b3
--- /dev/null
+++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out
@@ -0,0 +1,366 @@
+[root]
+type=Root
+dummy=0
+
+[system.physmem]
+type=PhysicalMemory
+file=
+range=[0,134217727]
+latency=1
+zero=false
+
+[system]
+type=System
+physmem=system.physmem
+mem_mode=atomic
+
+[system.membus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+responder_set=false
+block_size=64
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=gzip input.log 1
+executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip
+input=cin
+output=cout
+env=
+cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/o3-timing
+system=system
+uid=100
+euid=100
+gid=100
+egid=100
+pid=100
+ppid=99
+
+[system.cpu.fuPool.FUList0.opList0]
+type=OpDesc
+opClass=IntAlu
+opLat=1
+issueLat=1
+
+[system.cpu.fuPool.FUList0]
+type=FUDesc
+opList=system.cpu.fuPool.FUList0.opList0
+count=6
+
+[system.cpu.fuPool.FUList1.opList0]
+type=OpDesc
+opClass=IntMult
+opLat=3
+issueLat=1
+
+[system.cpu.fuPool.FUList1.opList1]
+type=OpDesc
+opClass=IntDiv
+opLat=20
+issueLat=19
+
+[system.cpu.fuPool.FUList1]
+type=FUDesc
+opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
+count=2
+
+[system.cpu.fuPool.FUList2.opList0]
+type=OpDesc
+opClass=FloatAdd
+opLat=2
+issueLat=1
+
+[system.cpu.fuPool.FUList2.opList1]
+type=OpDesc
+opClass=FloatCmp
+opLat=2
+issueLat=1
+
+[system.cpu.fuPool.FUList2.opList2]
+type=OpDesc
+opClass=FloatCvt
+opLat=2
+issueLat=1
+
+[system.cpu.fuPool.FUList2]
+type=FUDesc
+opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
+count=4
+
+[system.cpu.fuPool.FUList3.opList0]
+type=OpDesc
+opClass=FloatMult
+opLat=4
+issueLat=1
+
+[system.cpu.fuPool.FUList3.opList1]
+type=OpDesc
+opClass=FloatDiv
+opLat=12
+issueLat=12
+
+[system.cpu.fuPool.FUList3.opList2]
+type=OpDesc
+opClass=FloatSqrt
+opLat=24
+issueLat=24
+
+[system.cpu.fuPool.FUList3]
+type=FUDesc
+opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
+count=2
+
+[system.cpu.fuPool.FUList4.opList0]
+type=OpDesc
+opClass=MemRead
+opLat=1
+issueLat=1
+
+[system.cpu.fuPool.FUList4]
+type=FUDesc
+opList=system.cpu.fuPool.FUList4.opList0
+count=0
+
+[system.cpu.fuPool.FUList5.opList0]
+type=OpDesc
+opClass=MemWrite
+opLat=1
+issueLat=1
+
+[system.cpu.fuPool.FUList5]
+type=FUDesc
+opList=system.cpu.fuPool.FUList5.opList0
+count=0
+
+[system.cpu.fuPool.FUList6.opList0]
+type=OpDesc
+opClass=MemRead
+opLat=1
+issueLat=1
+
+[system.cpu.fuPool.FUList6.opList1]
+type=OpDesc
+opClass=MemWrite
+opLat=1
+issueLat=1
+
+[system.cpu.fuPool.FUList6]
+type=FUDesc
+opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
+count=4
+
+[system.cpu.fuPool.FUList7.opList0]
+type=OpDesc
+opClass=IprAccess
+opLat=3
+issueLat=3
+
+[system.cpu.fuPool.FUList7]
+type=FUDesc
+opList=system.cpu.fuPool.FUList7.opList0
+count=1
+
+[system.cpu.fuPool]
+type=FUPool
+FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
+
+[system.cpu]
+type=DerivO3CPU
+clock=500
+phase=0
+numThreads=1
+cpu_id=0
+activity=0
+workload=system.cpu.workload
+checker=null
+max_insts_any_thread=0
+max_insts_all_threads=0
+max_loads_any_thread=0
+max_loads_all_threads=0
+progress_interval=0
+cachePorts=200
+decodeToFetchDelay=1
+renameToFetchDelay=1
+iewToFetchDelay=1
+commitToFetchDelay=1
+fetchWidth=8
+renameToDecodeDelay=1
+iewToDecodeDelay=1
+commitToDecodeDelay=1
+fetchToDecodeDelay=1
+decodeWidth=8
+iewToRenameDelay=1
+commitToRenameDelay=1
+decodeToRenameDelay=1
+renameWidth=8
+commitToIEWDelay=1
+renameToIEWDelay=2
+issueToExecuteDelay=1
+dispatchWidth=8
+issueWidth=8
+wbWidth=8
+wbDepth=1
+fuPool=system.cpu.fuPool
+iewToCommitDelay=1
+renameToROBDelay=1
+commitWidth=8
+squashWidth=8
+trapLatency=13
+backComSize=5
+forwardComSize=5
+predType=tournament
+localPredictorSize=2048
+localCtrBits=2
+localHistoryTableSize=2048
+localHistoryBits=11
+globalPredictorSize=8192
+globalCtrBits=2
+globalHistoryBits=13
+choicePredictorSize=8192
+choiceCtrBits=2
+BTBEntries=4096
+BTBTagSize=16
+RASSize=16
+LQEntries=32
+SQEntries=32
+LFSTSize=1024
+SSITSize=1024
+numPhysIntRegs=256
+numPhysFloatRegs=256
+numIQEntries=64
+numROBEntries=192
+smtNumFetchingThreads=1
+smtFetchPolicy=SingleThread
+smtLSQPolicy=Partitioned
+smtLSQThreshold=100
+smtIQPolicy=Partitioned
+smtIQThreshold=100
+smtROBPolicy=Partitioned
+smtROBThreshold=100
+smtCommitPolicy=RoundRobin
+instShiftAmt=2
+defer_registration=false
+function_trace=false
+function_trace_start=0
+
+[system.cpu.icache]
+type=BaseCache
+size=131072
+assoc=2
+block_size=64
+latency=1000
+mshrs=10
+tgts_per_mshr=20
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10000
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+
+[system.cpu.dcache]
+type=BaseCache
+size=262144
+assoc=2
+block_size=64
+latency=1000
+mshrs=10
+tgts_per_mshr=20
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10000
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+
+[system.cpu.l2cache]
+type=BaseCache
+size=2097152
+assoc=2
+block_size=64
+latency=1000
+mshrs=10
+tgts_per_mshr=5
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10000
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+
+[system.cpu.toL2Bus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+responder_set=false
+block_size=64
+
diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt b/tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt
new file mode 100644
index 000000000..929354b82
--- /dev/null
+++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt
@@ -0,0 +1,423 @@
+
+---------- Begin Simulation Statistics ----------
+global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
+global.BPredUnit.BTBHits                    155497873                       # Number of BTB hits
+global.BPredUnit.BTBLookups                 176569029                       # Number of BTB lookups
+global.BPredUnit.RASInCorrect                       0                       # Number of incorrect RAS predictions.
+global.BPredUnit.condIncorrect               90327270                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted              223339092                       # Number of conditional branches predicted
+global.BPredUnit.lookups                    223339092                       # Number of BP lookups
+global.BPredUnit.usedRAS                            0                       # Number of times the RAS was used to get a target.
+host_inst_rate                                  54106                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 156124                       # Number of bytes of host memory used
+host_seconds                                 27529.37                       # Real time elapsed on the host
+host_tick_rate                               45674334                       # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads          464625781                       # Number of conflicting loads.
+memdepunit.memDep.conflictingStores         155659586                       # Number of conflicting stores.
+memdepunit.memDep.insertedLoads             751805606                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores            305482201                       # Number of stores inserted to the mem dependence unit.
+sim_freq                                 1000000000000                       # Frequency of simulated ticks
+sim_insts                                  1489514762                       # Number of instructions simulated
+sim_seconds                                  1.257386                       # Number of seconds simulated
+sim_ticks                                1257385552000                       # Number of ticks simulated
+system.cpu.commit.COM:branches               86246390                       # Number of branches committed
+system.cpu.commit.COM:bw_lim_events           9313657                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
+system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle.samples   2273477268                      
+system.cpu.commit.COM:committed_per_cycle.min_value            0                      
+                               0   1413600532   6217.79%           
+                               1    557883273   2453.88%           
+                               2    123364539    542.62%           
+                               3    120963543    532.06%           
+                               4     18884040     83.06%           
+                               5     12171132     53.54%           
+                               6      9965158     43.83%           
+                               7      7331394     32.25%           
+                               8      9313657     40.97%           
+system.cpu.commit.COM:committed_per_cycle.max_value            8                      
+system.cpu.commit.COM:committed_per_cycle.end_dist
+
+system.cpu.commit.COM:count                1489514762                       # Number of instructions committed
+system.cpu.commit.COM:loads                 402511689                       # Number of loads committed
+system.cpu.commit.COM:membars                   51356                       # Number of memory barriers committed
+system.cpu.commit.COM:refs                  569359657                       # Number of memory references committed
+system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
+system.cpu.commit.branchMispredicts          90327270                       # The number of times a branch was mispredicted
+system.cpu.commit.commitCommittedInsts     1489514762                       # The number of committed instructions
+system.cpu.commit.commitNonSpecStalls         2243499                       # The number of times commit has been forced to stall to communicate backwards
+system.cpu.commit.commitSquashedInsts      1399513618                       # The number of squashed insts skipped by commit
+system.cpu.committedInsts                  1489514762                       # Number of Instructions Simulated
+system.cpu.committedInsts_total            1489514762                       # Number of Instructions Simulated
+system.cpu.cpi                               1.688316                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                         1.688316                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses          431095835                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  2842.252413                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  2392.500580                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits              430168385                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency     2636047000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.002151                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses               927450                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits            694672                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency    556921500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.000540                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses          232778                       # number of ReadReq MSHR misses
+system.cpu.dcache.SwapReq_accesses               1326                       # number of SwapReq accesses(hits+misses)
+system.cpu.dcache.SwapReq_avg_miss_latency         3500                       # average SwapReq miss latency
+system.cpu.dcache.SwapReq_avg_mshr_miss_latency         2500                       # average SwapReq mshr miss latency
+system.cpu.dcache.SwapReq_hits                   1319                       # number of SwapReq hits
+system.cpu.dcache.SwapReq_miss_latency          24500                       # number of SwapReq miss cycles
+system.cpu.dcache.SwapReq_miss_rate          0.005279                       # miss rate for SwapReq accesses
+system.cpu.dcache.SwapReq_misses                    7                       # number of SwapReq misses
+system.cpu.dcache.SwapReq_mshr_miss_latency        17500                       # number of SwapReq MSHR miss cycles
+system.cpu.dcache.SwapReq_mshr_miss_rate     0.005279                       # mshr miss rate for SwapReq accesses
+system.cpu.dcache.SwapReq_mshr_misses               7                       # number of SwapReq MSHR misses
+system.cpu.dcache.WriteReq_accesses         166846642                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_avg_miss_latency  3889.592412                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  3171.120393                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits             165155866                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency    6576429500                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.010134                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses             1690776                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_hits          1420478                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_miss_latency    857147500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.001620                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses         270298                       # number of WriteReq MSHR misses
+system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs                1183.354576                       # Average number of references to valid blocks.
+system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
+system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
+system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
+system.cpu.dcache.demand_accesses           597942477                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  3518.594842                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  2810.845677                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits               595324251                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency      9212476500                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.004379                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses               2618226                       # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits            2115150                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency   1414069000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.000841                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses           503076                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
+system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
+system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
+system.cpu.dcache.overall_accesses          597942477                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  3518.594842                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  2810.845677                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
+system.cpu.dcache.overall_hits              595324251                       # number of overall hits
+system.cpu.dcache.overall_miss_latency     9212476500                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.004379                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses              2618226                       # number of overall misses
+system.cpu.dcache.overall_mshr_hits           2115150                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency   1414069000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.000841                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses          503076                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
+system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
+system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
+system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
+system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
+system.cpu.dcache.prefetcher.num_hwpf_evicted            0                       # number of hwpf removed due to no buffer left
+system.cpu.dcache.prefetcher.num_hwpf_identified            0                       # number of hwpf identified
+system.cpu.dcache.prefetcher.num_hwpf_issued            0                       # number of hwpf issued
+system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
+system.cpu.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
+system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.dcache.replacements                 498987                       # number of replacements
+system.cpu.dcache.sampled_refs                 503083                       # Sample count of references to valid blocks.
+system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
+system.cpu.dcache.tagsinuse               4095.797134                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                595325570                       # Total number of references to valid blocks.
+system.cpu.dcache.warmup_cycle               77974000                       # Cycle when the warmup percentage was hit.
+system.cpu.dcache.writebacks                   335737                       # number of writebacks
+system.cpu.decode.DECODE:BlockedCycles      435745843                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:DecodedInsts      3276032607                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles        1073744654                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles          761619600                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles       241293837                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:UnblockCycles        2367171                       # Number of cycles decode is unblocking
+system.cpu.fetch.Branches                   223339092                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                 355860305                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                    1166695920                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes              14770227                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                     3591774268                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                93734364                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.088811                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles          355860305                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches          155497873                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        1.428271                       # Number of inst fetches per cycle
+system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist.samples          2514771105                      
+system.cpu.fetch.rateDist.min_value                 0                      
+                               0   1703935491   6775.71%           
+                               1    252157679   1002.71%           
+                               2     75632424    300.75%           
+                               3     38096592    151.49%           
+                               4     76680653    304.92%           
+                               5     30840750    122.64%           
+                               6     33076966    131.53%           
+                               7     20130593     80.05%           
+                               8    284219957   1130.20%           
+system.cpu.fetch.rateDist.max_value                 8                      
+system.cpu.fetch.rateDist.end_dist
+
+system.cpu.icache.ReadReq_accesses          355860305                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  5111.111111                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4198.640483                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits              355858946                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        6946000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.000004                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                 1359                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits                35                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency      5559000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.000004                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses            1324                       # number of ReadReq MSHR misses
+system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.icache.avg_refs               268775.638973                       # Average number of references to valid blocks.
+system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
+system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
+system.cpu.icache.cache_copies                      0                       # number of cache copies performed
+system.cpu.icache.demand_accesses           355860305                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  5111.111111                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4198.640483                       # average overall mshr miss latency
+system.cpu.icache.demand_hits               355858946                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency         6946000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.000004                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                  1359                       # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits                 35                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency      5559000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.000004                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses             1324                       # number of demand (read+write) MSHR misses
+system.cpu.icache.fast_writes                       0                       # number of fast writes performed
+system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
+system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
+system.cpu.icache.overall_accesses          355860305                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency  5111.111111                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4198.640483                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
+system.cpu.icache.overall_hits              355858946                       # number of overall hits
+system.cpu.icache.overall_miss_latency        6946000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.000004                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                 1359                       # number of overall misses
+system.cpu.icache.overall_mshr_hits                35                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency      5559000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.000004                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses            1324                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
+system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
+system.cpu.icache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
+system.cpu.icache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
+system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
+system.cpu.icache.prefetcher.num_hwpf_evicted            0                       # number of hwpf removed due to no buffer left
+system.cpu.icache.prefetcher.num_hwpf_identified            0                       # number of hwpf identified
+system.cpu.icache.prefetcher.num_hwpf_issued            0                       # number of hwpf issued
+system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
+system.cpu.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
+system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.icache.replacements                    198                       # number of replacements
+system.cpu.icache.sampled_refs                   1324                       # Sample count of references to valid blocks.
+system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
+system.cpu.icache.tagsinuse               1026.431065                       # Cycle average of tags in use
+system.cpu.icache.total_refs                355858946                       # Total number of references to valid blocks.
+system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
+system.cpu.icache.writebacks                        0                       # number of writebacks
+system.cpu.idleCycles                            1497                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                128998684                       # Number of branches executed
+system.cpu.iew.EXEC:nop                             0                       # number of nop insts executed
+system.cpu.iew.EXEC:rate                     0.879999                       # Inst execution rate
+system.cpu.iew.EXEC:refs                    756340485                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                  208683785                       # Number of stores executed
+system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
+system.cpu.iew.WB:consumers                1511846593                       # num instructions consuming a value
+system.cpu.iew.WB:count                    2184193190                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.964010                       # average fanout of values written-back
+system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
+system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
+system.cpu.iew.WB:producers                1457435157                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.868546                       # insts written-back per cycle
+system.cpu.iew.WB:sent                     2194556483                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts             93921260                       # Number of branch mispredicts detected at execute
+system.cpu.iew.iewBlockCycles                  242324                       # Number of cycles IEW is blocking
+system.cpu.iew.iewDispLoadInsts             751805606                       # Number of dispatched load instructions
+system.cpu.iew.iewDispNonSpecInsts           21112863                       # Number of dispatched non-speculative instructions
+system.cpu.iew.iewDispSquashedInsts           6967923                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts            305482201                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts          2889028359                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts             547656700                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts         155922171                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts            2212995141                       # Number of executed instructions
+system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
+system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
+system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
+system.cpu.iew.iewSquashCycles              241293837                       # Number of cycles IEW is squashing
+system.cpu.iew.iewUnblockCycles                  1173                       # Number of cycles IEW is unblocking
+system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
+system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
+system.cpu.iew.lsq.thread.0.forwLoads       116560202                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.ignoredResponses       586068                       # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
+system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
+system.cpu.iew.lsq.thread.0.memOrderViolation      3827981                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.rescheduledLoads           59                       # Number of loads that were rescheduled
+system.cpu.iew.lsq.thread.0.squashedLoads    349293917                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores    138634233                       # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents        3827981                       # Number of memory order violations
+system.cpu.iew.predictedNotTakenIncorrect      1127857                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect       92793403                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc                               0.592306                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.592306                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0              2368917312                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0.start_dist
+                      No_OpClass    351375247     14.83%            # Type of FU issued
+                          IntAlu   1188705257     50.18%            # Type of FU issued
+                         IntMult            0      0.00%            # Type of FU issued
+                          IntDiv            0      0.00%            # Type of FU issued
+                        FloatAdd      2951238      0.12%            # Type of FU issued
+                        FloatCmp            0      0.00%            # Type of FU issued
+                        FloatCvt            0      0.00%            # Type of FU issued
+                       FloatMult            0      0.00%            # Type of FU issued
+                        FloatDiv            0      0.00%            # Type of FU issued
+                       FloatSqrt            0      0.00%            # Type of FU issued
+                         MemRead    592531661     25.01%            # Type of FU issued
+                        MemWrite    233353909      9.85%            # Type of FU issued
+                       IprAccess            0      0.00%            # Type of FU issued
+                    InstPrefetch            0      0.00%            # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0.end_dist
+system.cpu.iq.ISSUE:fu_busy_cnt               6622922                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate             0.002796                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_full.start_dist
+                      No_OpClass            0      0.00%            # attempts to use FU when none available
+                          IntAlu      3150287     47.57%            # attempts to use FU when none available
+                         IntMult            0      0.00%            # attempts to use FU when none available
+                          IntDiv            0      0.00%            # attempts to use FU when none available
+                        FloatAdd       202242      3.05%            # attempts to use FU when none available
+                        FloatCmp            0      0.00%            # attempts to use FU when none available
+                        FloatCvt            0      0.00%            # attempts to use FU when none available
+                       FloatMult            0      0.00%            # attempts to use FU when none available
+                        FloatDiv            0      0.00%            # attempts to use FU when none available
+                       FloatSqrt            0      0.00%            # attempts to use FU when none available
+                         MemRead      2975364     44.93%            # attempts to use FU when none available
+                        MemWrite       295029      4.45%            # attempts to use FU when none available
+                       IprAccess            0      0.00%            # attempts to use FU when none available
+                    InstPrefetch            0      0.00%            # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full.end_dist
+system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle.samples   2514771105                      
+system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
+                               0   1264571415   5028.57%           
+                               1    618163663   2458.13%           
+                               2    318214573   1265.38%           
+                               3    195947630    779.19%           
+                               4     78232851    311.09%           
+                               5     28085074    111.68%           
+                               6      8167595     32.48%           
+                               7      2987163     11.88%           
+                               8       401141      1.60%           
+system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
+system.cpu.iq.ISSUE:issued_per_cycle.end_dist
+
+system.cpu.iq.ISSUE:rate                     0.942001                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                 2867645475                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                2368917312                       # Number of instructions issued
+system.cpu.iq.iqNonSpecInstsAdded            21382884                       # Number of non-speculative instructions added to the IQ
+system.cpu.iq.iqSquashedInstsExamined      1368214032                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued            461256                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedNonSpecRemoved       19139385                       # Number of squashed non-spec instructions that were removed
+system.cpu.iq.iqSquashedOperandsExamined   1296493196                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.l2cache.ReadReq_accesses            504406                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency  4393.799833                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2267.430007                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_hits                476939                       # number of ReadReq hits
+system.cpu.l2cache.ReadReq_miss_latency     120684500                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.054454                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses               27467                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency     62279500                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.054454                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses          27467                       # number of ReadReq MSHR misses
+system.cpu.l2cache.Writeback_accesses          335737                       # number of Writeback accesses(hits+misses)
+system.cpu.l2cache.Writeback_hits              335720                       # number of Writeback hits
+system.cpu.l2cache.Writeback_miss_rate       0.000051                       # miss rate for Writeback accesses
+system.cpu.l2cache.Writeback_misses                17                       # number of Writeback misses
+system.cpu.l2cache.Writeback_mshr_miss_rate     0.000051                       # mshr miss rate for Writeback accesses
+system.cpu.l2cache.Writeback_mshr_misses           17                       # number of Writeback MSHR misses
+system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.l2cache.avg_refs                 29.586740                       # Average number of references to valid blocks.
+system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
+system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
+system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
+system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
+system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
+system.cpu.l2cache.demand_accesses             504406                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency  4393.799833                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2267.430007                       # average overall mshr miss latency
+system.cpu.l2cache.demand_hits                 476939                       # number of demand (read+write) hits
+system.cpu.l2cache.demand_miss_latency      120684500                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate          0.054454                       # miss rate for demand accesses
+system.cpu.l2cache.demand_misses                27467                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
+system.cpu.l2cache.demand_mshr_miss_latency     62279500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_rate     0.054454                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_misses           27467                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
+system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
+system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
+system.cpu.l2cache.overall_accesses            840143                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency  4391.082084                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2267.430007                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
+system.cpu.l2cache.overall_hits                812659                       # number of overall hits
+system.cpu.l2cache.overall_miss_latency     120684500                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate         0.032713                       # miss rate for overall accesses
+system.cpu.l2cache.overall_misses               27484                       # number of overall misses
+system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
+system.cpu.l2cache.overall_mshr_miss_latency     62279500                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_rate     0.032693                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_misses          27467                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
+system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
+system.cpu.l2cache.prefetcher.num_hwpf_evicted            0                       # number of hwpf removed due to no buffer left
+system.cpu.l2cache.prefetcher.num_hwpf_identified            0                       # number of hwpf identified
+system.cpu.l2cache.prefetcher.num_hwpf_issued            0                       # number of hwpf issued
+system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
+system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
+system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.l2cache.replacements                  2692                       # number of replacements
+system.cpu.l2cache.sampled_refs                 27467                       # Sample count of references to valid blocks.
+system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
+system.cpu.l2cache.tagsinuse             24466.224839                       # Cycle average of tags in use
+system.cpu.l2cache.total_refs                  812659                       # Total number of references to valid blocks.
+system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
+system.cpu.l2cache.writebacks                    2555                       # number of writebacks
+system.cpu.numCycles                       2514771105                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles         14153952                       # Number of cycles rename is blocking
+system.cpu.rename.RENAME:CommittedMaps     1244762263                       # Number of HB maps that are committed
+system.cpu.rename.RENAME:IQFullEvents             845                       # Number of times rename has blocked due to IQ full
+system.cpu.rename.RENAME:IdleCycles        1122858502                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents       18964355                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:RenameLookups     4974059876                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts      3105364972                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands   2435580679                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles          713636177                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles       241293837                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles       24303898                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps        1190818416                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles    398524739                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializingInsts     21495577                       # count of serializing insts renamed
+system.cpu.rename.RENAME:skidInsts          149561373                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:tempSerializingInsts     21338548                       # count of temporary serializing insts renamed
+system.cpu.timesIdled                               3                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.workload.PROG:num_syscalls              19                       # Number of system calls
+
+---------- End Simulation Statistics   ----------
diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/stderr b/tests/long/00.gzip/ref/sparc/linux/o3-timing/stderr
new file mode 100644
index 000000000..6fe2fe04f
--- /dev/null
+++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/stderr
@@ -0,0 +1,6 @@
+warn: More than two loadable segments in ELF object.
+warn: Ignoring segment @ 0xb4000 length 0x10.
+warn: More than two loadable segments in ELF object.
+warn: Ignoring segment @ 0x0 length 0x0.
+warn: Entering event queue @ 0.  Starting simulation...
+warn: Ignoring request to flush register windows.
diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout b/tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout
new file mode 100644
index 000000000..c0d965c7b
--- /dev/null
+++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout
@@ -0,0 +1,44 @@
+spec_init
+Loading Input Data
+Duplicating 262144 bytes
+Duplicating 524288 bytes
+Input data 1048576 bytes in length
+Compressing Input Data, level 1
+Compressed data 108074 bytes in length
+Uncompressing Data
+Uncompressed data 1048576 bytes in length
+Uncompressed data compared correctly
+Compressing Input Data, level 3
+Compressed data 97831 bytes in length
+Uncompressing Data
+Uncompressed data 1048576 bytes in length
+Uncompressed data compared correctly
+Compressing Input Data, level 5
+Compressed data 83382 bytes in length
+Uncompressing Data
+Uncompressed data 1048576 bytes in length
+Uncompressed data compared correctly
+Compressing Input Data, level 7
+Compressed data 76606 bytes in length
+Uncompressing Data
+Uncompressed data 1048576 bytes in length
+Uncompressed data compared correctly
+Compressing Input Data, level 9
+Compressed data 73189 bytes in length
+Uncompressing Data
+Uncompressed data 1048576 bytes in length
+Uncompressed data compared correctly
+Tested 1MB buffer: OK!
+M5 Simulator System
+
+Copyright (c) 2001-2006
+The Regents of The University of Michigan
+All Rights Reserved
+
+
+M5 compiled Jun 21 2007 21:15:48
+M5 started Fri Jun 22 01:01:27 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/o3-timing tests/run.py long/00.gzip/sparc/linux/o3-timing
+Global frequency set at 1000000000000 ticks per second
+Exiting @ tick 1257385552000 because target called exit()
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini
index 9b8d69888..9cdc13914 100644
--- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini
@@ -39,7 +39,7 @@ env=
 euid=100
 executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
 gid=100
-input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in
 output=cout
 pid=100
 ppid=99
@@ -53,7 +53,7 @@ bus_id=0
 clock=1000
 responder_set=false
 width=64
-port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port
+port=system.physmem.port[0] system.cpu.icache_port system.cpu.dcache_port
 
 [system.physmem]
 type=PhysicalMemory
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
index 8a5c9fd62..b84a9d780 100644
--- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
@@ -26,7 +26,7 @@ block_size=64
 type=LiveProcess
 cmd=mcf mcf.in
 executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
-input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in
 output=cout
 env=
 cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt
index 530572b5d..ed8482fb4 100644
--- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt
@@ -1,18 +1,18 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 686638                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 149820                       # Number of bytes of host memory used
-host_seconds                                  2504.37                       # Real time elapsed on the host
-host_tick_rate                              343319148                       # Simulator tick rate (ticks/s)
+host_inst_rate                                1151751                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 150484                       # Number of bytes of host memory used
+host_seconds                                   211.71                       # Real time elapsed on the host
+host_tick_rate                              575874246                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                  1719594534                       # Number of instructions simulated
-sim_seconds                                  0.859797                       # Number of seconds simulated
-sim_ticks                                859797266500                       # Number of ticks simulated
+sim_insts                                   243840172                       # Number of instructions simulated
+sim_seconds                                  0.121920                       # Number of seconds simulated
+sim_ticks                                121920085500                       # Number of ticks simulated
 system.cpu.idle_fraction                            0                       # Percentage of idle cycles
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                       1719594534                       # number of cpu cycles simulated
-system.cpu.num_insts                       1719594534                       # Number of instructions executed
-system.cpu.num_refs                         774793634                       # Number of memory references
-system.cpu.workload.PROG:num_syscalls             632                       # Number of system calls
+system.cpu.numCycles                        243840172                       # number of cpu cycles simulated
+system.cpu.num_insts                        243840172                       # Number of instructions executed
+system.cpu.num_refs                         105125191                       # Number of memory references
+system.cpu.workload.PROG:num_syscalls             428                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/mcf.out b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/mcf.out
index 6bbb02cf0..095132477 100644
--- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/mcf.out
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/mcf.out
@@ -1,3092 +1,999 @@
 ()
-1642
-***
-1759
-()
-1641
-***
-1691
+500
 ()
-1640
+499
 ()
-1639
+498
 ()
-1638
+496
 ()
-1637
+495
 ()
-1636
+494
 ()
-1635
+493
 ()
-1634
+492
 ()
-1633
+491
 ()
-1632
+490
 ()
-1631
+489
 ()
-1630
+488
 ()
-1629
+487
 ()
-1628
+486
 ()
-1627
+484
 ()
-1626
+482
 ()
-1625
-***
-1784
+481
 ()
-1624
+480
 ()
-1623
+479
 ()
-1622
-***
-1688
+478
 ()
-1621
+477
 ()
-1618
+476
 ()
-1617
-***
-1796
+475
 ()
-1616
+474
 ()
-1615
-***
-1668
+473
 ()
-1614
+472
 ()
-1613
+471
 ()
-1612
-***
-1700
+469
 ()
-1611
+468
 ()
-1610
+467
 ()
-1608
+466
 ()
-1606
+465
 ()
-1605
+464
 ()
-1604
+463
 ()
-1603
+462
 ()
-1602
+461
 ()
-1601
+460
 ()
-1599
+459
 ()
-1598
-***
-1714
+458
 ()
-1597
+457
 ()
-1595
+455
 ()
-1591
+454
 ()
-1590
-***
-1773
+452
 ()
-1589
+451
 ()
-1588
+450
 ()
-1587
-***
-1710
+449
 ()
-1586
+448
 ()
-1585
+446
 ()
-1584
-***
-1748
+445
 ()
-1583
-***
-1648
+444
 ()
-1582
+443
 ()
-1581
-***
-1757
+442
 ()
-1579
+440
 ()
-1578
-***
-1726
+439
 ()
-1575
-***
-1763
+438
 ()
-1574
+436
 ()
-1573
+435
 ()
-1572
+433
 ()
-1571
+432
 ()
-1568
+431
 ()
-1567
+428
 ()
-1565
-***
-1643
+427
 ()
-1564
+425
 ()
-1563
+424
 ()
-1562
+423
 ()
-1559
+420
 ()
-1557
+419
 ()
-1556
+416
 ()
-1555
+414
 ()
-1554
+413
 ()
-1553
-***
-1684
+412
 ()
-1552
+407
 ()
-1551
-***
-1697
+406
 ()
-1549
+405
 ()
-1546
-***
-1768
+404
 ()
-1544
-***
-1798
+403
 ()
-1542
+402
 ()
-1541
-***
-1650
+401
 ()
-1540
+400
 ()
-1539
+399
 ()
-1538
+398
 ()
-1536
+396
 ()
-1534
+395
 ()
-1533
+393
 ()
-1532
+392
 ()
-1529
+390
 ()
-1528
+389
 ()
-1527
+388
 ()
-1526
+387
 ()
-1525
+386
 ()
-1524
-***
-1736
+385
 ()
-1523
+384
 ()
-1522
-***
-1794
+383
 ()
-1521
+382
 ()
-1519
+381
 ()
-1517
-***
-1687
+380
 ()
-1516
+379
 ()
-1515
+377
 ()
-1514
+375
 ()
-1513
+374
 ()
-1512
+373
 ()
-1511
+372
 ()
-1510
+371
 ()
-1509
+370
 ()
-1508
+369
 ()
-1507
+368
 ()
-1506
+366
 ()
-1505
+365
 ()
-1504
+364
 ()
-1503
+362
 ()
-1502
-***
-1746
+361
 ()
-1501
-***
-1766
+360
 ()
-1498
+359
 ()
-1497
+358
 ()
-1495
+357
 ()
-1494
+356
 ()
-1493
-***
-1673
+355
 ()
-1490
-***
-1774
+354
 ()
-1486
+352
 ()
-1485
+350
 ()
-1482
+347
 ()
-1481
+344
 ()
-1480
+342
 ()
-1479
+341
 ()
-1477
+340
 ()
-1476
+339
 ()
-1475
+338
 ()
-1473
+332
 ()
-1472
+325
 ()
-1471
+320
 ***
-1728
-()
-1470
-()
-1469
-()
-1467
-()
-1466
-()
-1465
-()
-1464
-()
-1463
-()
-1462
-()
-1461
-()
-1460
-()
-1459
-()
-1455
+345
 ()
-1454
+319
 ***
-1782
-()
-1453
-()
-1452
-()
-1451
+497
 ()
-1449
+318
 ***
-1732
-()
-1448
-()
-1445
-()
-1444
-()
-1442
-()
-1441
-()
-1440
-()
-1438
-()
-1437
-()
-1435
-()
-1433
+349
 ()
-1432
+317
 ***
-1665
-()
-1431
-()
-1426
-()
-1425
-()
-1424
-()
-1423
+408
 ()
-1420
+316
 ***
-1499
+324
 ()
-1419
-***
-1457
+315
 ***
-1653
+328
 ()
-1418
-***
-1577
+314
 ***
-1664
+335
 ()
-1417
+313
 ***
-1489
+378
 ()
-1416
+312
 ***
-1545
+426
 ()
-1415
+311
 ***
-1430
+411
 ()
-1414
+304
 ***
-1434
+343
 ()
-1413
-***
-1594
+303
 ***
-1735
+417
 ()
-1412
-***
-1560
+302
 ***
-1724
+485
 ()
-1411
+301
 ***
-1428
+363
 ()
-1404
-***
-1496
+300
 ***
-1780
+376
 ()
-1403
+299
 ***
-1561
+333
 ()
-1402
+292
 ***
-1548
+337
 ()
-1401
-***
-1569
+291
 ***
-1792
+409
 ()
-1400
+290
 ***
-1537
+421
 ()
-1399
+289
 ***
-1429
+437
 ()
-1392
+288
 ***
-1580
+430
 ()
-1391
+287
 ***
-1410
+348
 ()
-1390
+286
 ***
-1500
+326
 ()
-1389
-***
-1483
+284
 ()
-1388
+282
 ***
-1570
+308
 ()
-1387
+279
 ***
-1543
-()
-1386
+297
 ***
-1558
-()
-1385
+305
 ()
-1384
+278
 ()
-1382
+277
 ***
-1439
+307
 ()
-1381
+276
 ***
-1677
+296
 ()
-1380
+273
 ()
-1378
-***
-1397
+271
 ()
-1377
-***
-1787
+265
 ()
-1376
+246
 ***
-1408
-()
-1375
-()
-1374
+267
 ()
-1373
+245
 ***
-1671
-()
-1372
+280
 ()
-1370
+244
 ***
-1793
+391
 ()
-1369
+243
+***
+330
 ()
-1365
+242
 ***
-1762
+456
 ()
-1346
+241
+***
+346
 ()
-1345
+240
 ***
-1566
+483
 ()
-1344
+239
 ***
-1520
+260
 ()
-1343
+238
 ***
-1492
+261
 ()
-1342
+237
 ***
-1576
+262
 ***
-1656
+294
 ()
-1341
+236
 ***
-1447
+253
 ()
-1340
+229
 ***
-1550
-()
-1339
-()
-1338
-()
-1337
+397
 ()
-1329
+228
 ***
-1336
+298
 ()
-1328
+227
 ***
-1446
+415
 ()
-1327
+226
 ***
-1607
-()
-1325
-()
-1324
-()
-1323
-()
-1317
-()
-1315
+264
 ()
-1311
+224
 ***
-1450
+232
+()
+222
 ***
-1720
+233
 ()
-1310
+217
 ***
-1619
+250
 ()
-1309
+211
 ***
-1458
+331
 ()
-1308
+210
+***
+394
 ()
-1307
+209
 ***
-1427
+410
 ()
-1306
+208
 ***
-1364
+321
+()
+207
 ***
-1696
+327
 ()
-1299
+206
+***
+309
 ()
-1297
+199
 ***
-1395
+259
 ()
-1296
+198
+***
+219
 ()
-1295
+197
 ***
-1326
+220
 ()
-1294
+195
 ***
-1371
+429
 ()
-1293
+194
 ***
-1456
+470
 ()
-1292
+193
 ***
-1312
+274
 ()
-1291
+191
+***
+203
 ()
-1290
+190
 ***
-1363
+263
 ()
-1282
+189
+215
 ***
-1592
+230
 ()
-1281
+188
+***
+266
 ***
-1379
+295
 ()
-1280
+182
 ***
-1478
+329
 ()
-1279
+181
 ***
-1436
+351
 ()
-1278
+180
 ***
-1620
+441
 ()
-1277
+179
 ***
-1487
+453
 ()
-1276
+178
 ***
-1288
+418
 ()
-1275
+177
 ***
-1596
+353
 ()
-1274
+176
 ***
-1322
+422
 ()
-1273
+175
 ***
-1305
+225
 ***
-1699
+255
 ()
-1272
+174
+***
+269
 ()
-1271
+173
 ***
-1484
+214
 ()
-1270
+172
 ***
-1518
+186
 ()
-1269
+171
 ***
-1289
+447
 ()
-1268
+170
 ***
-1443
+270
 ***
-1786
+306
 ()
-1265
+169
+***
+336
 ()
-1243
+168
 ***
-1368
+285
 ()
-1242
+165
+***
+249
 ()
-1241
+146
 ***
-1421
+154
+()
+143
 ***
-1749
+334
 ()
-1240
+142
 ***
-1260
+216
 ***
-1678
-()
-1239
-()
-1238
+257
 ()
-1236
+141
 ***
-1263
+167
 ***
-1767
+251
 ()
-1235
+140
+***
+162
+***
+293
 ()
-1234
+139
+***
+158
 ()
-1233
+137
+***
+166
+***
+201
 ()
-1232
+136
 ***
-1752
+160
 ()
-1231
+134
 ***
-1791
+221
 ()
-1230
+132
+***
+213
 ()
-1229
+131
+***
+187
 ()
-1228
+129
 ***
-1702
+235
 ()
-1227
+128
+***
+153
 ()
-1226
+127
+***
+156
 ()
-1225
+126
+***
+159
+***
+218
 ()
-1224
+125
+***
+155
 ()
-1223
+124
+***
+157
 ()
-1216
+123
 ***
-1531
+152
 ()
-1215
+116
 ***
-1530
+135
 ***
-1797
+163
 ()
-1214
+115
 ***
-1474
+133
 ***
-1742
-()
-1213
+204
 ***
-1488
+248
 ()
-1212
+114
 ***
-1298
+192
 ***
-1789
+212
 ()
-1211
+113
 ***
-1491
+268
 ()
-1210
+112
 ***
-1600
+367
 ()
-1209
+111
 ***
-1244
+272
 ()
-1208
-***
-1609
+110
 ***
-1704
+434
 ()
-1207
+109
 ***
-1237
+323
 ()
-1206
+108
 ***
-1468
+281
 ()
-1205
+107
+***
+144
 ***
-1547
+148
 ()
-1204
+106
 ***
-1246
+275
 ()
-1203
+105
 ***
-1593
+196
 ***
-1734
+254
 ()
-1202
+104
 ***
-1535
-()
-1200
-()
-1198
-()
-1196
-()
-1195
+138
+***
+161
 ()
-1194
+103
 ***
-1302
+310
 ()
-1192
+102
+***
+223
+***
+252
 ()
-1191
+80
 ()
-1189
+70
 ()
-1188
+69
 ()
-1187
+68
 ()
-1186
+66
 ()
-1183
+64
 ()
-1181
+62
 ***
-1778
-()
-1179
+256
 ()
-1178
+61
+***
+93
 ()
-1177
+59
 ***
-1645
+120
 ()
-1176
+58
 ()
-1175
-***
-1318
+57
 ***
-1649
+183
 ()
-1173
+55
 ()
-1172
+54
 ()
-1171
-()
-1169
+52
 ***
-1654
+147
 ()
-1168
+51
 ***
-1692
-()
-1167
-()
-1164
-()
-1163
+118
 ()
-1162
+50
 ***
-1716
-()
-1160
+83
 ()
-1159
+49
 ***
-1663
-()
-1157
-()
-1156
-()
-1155
-()
-1154
-()
-1153
-()
-1152
-()
-1150
-()
-1149
-()
-1147
-()
-1145
+98
 ()
-1143
+48
 ***
-1711
-()
-1142
+99
 ()
-1141
+47
 ()
-1140
+46
+***
+184
 ()
-1139
+45
 ***
-1755
+121
 ()
-1138
+44
 ()
-1137
+43
 ***
-1218
+88
 ()
-1136
-***
-1248
+42
 ***
-1670
-()
-1135
-()
-1134
+122
 ()
-1133
+41
 ***
-1662
-()
-1132
-()
-1131
-()
-1129
-()
-1128
-()
-1127
+91
 ()
-1126
+40
 ***
-1301
-()
-1125
-()
-1124
-()
-1123
-()
-1122
+96
 ()
-1120
+38
 ***
-1332
+100
 ()
-1119
+37
 ***
-1737
+149
 ()
-1118
+36
 ***
-1718
+74
 ()
-1117
-***
-1250
+35
 ***
-1658
-()
-1116
-()
-1114
-()
-1113
-()
-1112
+258
 ()
-1111
+34
 ***
-1772
+151
 ()
-1110
+33
 ***
-1359
+85
 ()
-1109
+32
 ()
-1108
+31
 ***
-1251
-()
-1106
+94
 ()
-1105
+30
 ***
-1771
-()
-1104
-()
-1102
-()
-1101
-()
-1100
+97
 ()
-1099
+29
 ***
-1689
-()
-1098
+90
 ()
-1097
+28
 ***
-1785
+89
 ()
-1096
+27
 ***
-1685
-()
-1095
-()
-1094
-()
-1093
-()
-1092
-()
-1091
-()
-1090
-()
-1089
-()
-1088
-()
-1087
-()
-1086
-()
-1085
+92
 ()
-1084
+26
 ***
-1739
-()
-1083
+72
 ***
-1405
-()
-1082
-()
-1081
-()
-1080
-()
-1078
-()
-1077
-()
-1076
-()
-1075
-()
-1074
-()
-1073
-()
-1072
-()
-1071
+247
 ()
-1070
+25
 ***
-1707
+86
 ()
-1069
+24
 ***
-1334
-()
-1068
-()
-1066
-()
-1065
-()
-1064
-()
-1063
-()
-1062
-()
-1061
-()
-1060
-()
-1059
-()
-1058
+82
 ()
-1057
+23
 ***
-1744
-()
-1056
-()
-1055
-()
-1054
+87
 ***
-1335
+117
 ()
-1052
+22
 ***
-1660
-()
-1051
-()
-1050
-()
-1049
-()
-1048
-()
-1047
-()
-1046
-()
-1045
+76
 ***
-1357
+119
 ()
-1044
+21
 ***
-1659
-()
-1043
-()
-1041
-()
-1040
-()
-1039
-()
-1038
-()
-1037
-()
-1036
-()
-1035
-()
-1034
+84
 ()
-1033
+20
 ***
-1690
-()
-1031
-()
-1030
-()
-1029
+78
 ()
-1028
+19
 ***
-1675
-()
-1027
-()
-1026
+73
 ()
-1025
+18
 ***
-1257
-()
-1024
-()
-1023
-()
-1022
-()
-1021
-()
-1020
+81
 ()
-1019
+17
 ***
-1284
+65
 ()
-1018
+16
+***
+63
+***
+101
 ()
-1017
+15
 ***
-1754
+71
 ()
-1016
+14
+***
+75
 ()
-1015
+13
 ***
-1247
+322
 ()
-1014
+12
+***
+77
 ()
-1013
+11
+***
+283
 ()
-1012
+10
 ***
-1319
+79
 ()
-1011
+9
 ***
-1352
+145
 ***
-1651
-()
-1010
+150
 ()
-1009
+8
 ***
-1705
-()
-1008
-()
-1007
-()
-1006
+67
 ()
-1005
+7
 ***
-1679
-()
-1004
-()
-1003
-()
-1002
-()
-1001
-()
-1000
+60
 ***
-1731
-()
-999
-()
-998
-()
-996
-()
-995
-()
-994
-()
-993
+231
 ()
-991
+6
 ***
-1799
-()
-990
-()
-989
-()
-987
-()
-986
-()
-985
-()
-984
-()
-983
+56
 ***
-1745
-()
-982
+234
 ()
-981
+5
 ***
-1644
-()
-980
-()
-979
-()
-978
-()
-977
-()
-976
-()
-975
-()
-974
+164
 ***
-1222
-()
-973
-()
-972
-()
-971
-()
-970
-()
-968
-()
-967
-()
-966
+202
 ()
-965
+4
 ***
-1347
-()
-964
-()
-963
+53
 ()
-962
+3
 ***
-1743
-()
-961
+130
 ***
-1719
-()
-960
+185
 ***
-1758
-()
-959
+200
 ()
-958
+2
 ***
-1733
-()
-957
-***
-1775
-()
-956
-()
-955
-()
-954
-()
-953
-()
-952
-***
-1393
-()
-951
-()
-950
-()
-949
-***
-1669
-()
-948
-()
-947
-()
-946
-***
-1681
-()
-944
-***
-1686
-()
-943
-()
-942
-()
-940
-***
-1783
-()
-939
-()
-938
-()
-937
-()
-936
-()
-934
-()
-933
-()
-932
-()
-931
-()
-930
-()
-929
-***
-1713
-()
-928
-***
-1725
-()
-927
-()
-926
-()
-925
-()
-924
-()
-923
-()
-922
-()
-921
-***
-1394
-()
-920
-***
-1741
-()
-919
-***
-1708
-()
-918
-()
-917
-()
-916
-***
-1723
-()
-915
-()
-914
-()
-913
-()
-912
-()
-911
-()
-910
-()
-909
-***
-1795
-()
-908
-()
-907
-()
-906
-()
-905
-()
-904
-()
-903
-***
-1330
-()
-902
-()
-901
-()
-900
-()
-899
-()
-898
-()
-897
-***
-1790
-()
-896
-***
-1652
-()
-895
-***
-1761
-()
-894
-()
-893
-()
-892
-()
-891
-()
-890
-***
-1253
-()
-889
-***
-1698
-()
-888
-()
-887
-()
-885
-()
-884
-***
-1703
-()
-883
-()
-882
-()
-881
-***
-1747
-()
-880
-()
-879
-***
-1647
-()
-878
-***
-1358
-()
-877
-***
-1407
-()
-876
-()
-875
-()
-874
-***
-1283
-()
-873
-***
-1682
-()
-872
-()
-871
-()
-870
-()
-869
-()
-868
-()
-867
-***
-1751
-()
-866
-()
-865
-()
-864
-()
-863
-()
-862
-***
-1753
-()
-861
-()
-860
-()
-859
-()
-858
-***
-1348
-()
-857
-()
-856
-***
-1350
-()
-855
-***
-1252
-()
-854
-()
-853
-***
-1201
-()
-852
-()
-851
-()
-850
-***
-1361
-()
-849
-()
-848
-()
-847
-()
-846
-()
-845
-()
-844
-()
-843
-()
-842
-()
-841
-()
-840
-()
-839
-***
-1360
-()
-838
-()
-837
-()
-836
-()
-835
-()
-834
-()
-833
-***
-1406
-()
-832
-()
-831
-()
-830
-()
-829
-()
-827
-()
-826
-()
-825
-()
-824
-()
-823
-()
-822
-()
-821
-***
-1683
-()
-820
-***
-1672
-()
-819
-()
-818
-***
-1693
-()
-816
-()
-815
-***
-1313
-()
-814
-()
-813
-()
-812
-***
-1727
-()
-811
-()
-810
-()
-809
-()
-808
-()
-806
-()
-805
-***
-1217
-()
-804
-()
-803
-()
-802
-()
-801
-()
-800
-()
-799
-()
-798
-()
-797
-***
-1220
-()
-796
-***
-1788
-()
-795
-()
-794
-***
-1255
-***
-1674
-()
-793
-***
-1740
-()
-792
-()
-791
-***
-1349
-()
-790
-()
-789
-()
-788
-()
-787
-***
-1800
-()
-786
-()
-785
-()
-784
-()
-783
-()
-782
-()
-781
-()
-780
-()
-779
-()
-778
-()
-777
-()
-776
-()
-775
-()
-774
-***
-1331
-()
-773
-()
-772
-***
-1256
-()
-771
-()
-770
-()
-769
-()
-768
-()
-767
-()
-766
-()
-765
-()
-764
-()
-763
-()
-762
-()
-761
-()
-759
-()
-758
-***
-1655
-()
-757
-()
-756
-()
-755
-***
-1760
-()
-754
-()
-753
-()
-752
-()
-751
-***
-1285
-***
-1680
-()
-750
-***
-1261
-()
-749
-()
-748
-()
-747
-()
-746
-()
-745
-***
-1362
-()
-744
-()
-743
-()
-742
-()
-741
-()
-740
-()
-739
-()
-738
-()
-737
-()
-736
-***
-1729
-()
-735
-***
-1769
-()
-734
-()
-733
-()
-732
-***
-1715
-()
-731
-()
-730
-()
-729
-()
-728
-()
-727
-***
-1721
-()
-726
-()
-725
-()
-724
-()
-723
-()
-722
-()
-721
-()
-720
-()
-719
-***
-1770
-()
-718
-()
-717
-()
-716
-()
-715
-()
-714
-()
-713
-()
-712
-()
-711
-***
-1779
-()
-710
-***
-1221
-()
-709
-()
-708
-()
-707
-()
-706
-()
-705
-***
-1661
-()
-704
-()
-703
-()
-702
-()
-701
-***
-1722
-()
-700
-()
-699
-()
-698
-()
-697
-()
-696
-()
-695
-()
-694
-()
-693
-()
-692
-***
-1776
-()
-690
-***
-1254
-()
-689
-***
-1738
-()
-688
-()
-687
-()
-686
-***
-1287
-()
-685
-()
-684
-()
-683
-()
-682
-()
-681
-***
-1666
-()
-680
-()
-679
-()
-678
-()
-677
-()
-676
-()
-675
-()
-674
-***
-1695
-()
-673
-***
-1709
-()
-672
-()
-671
-()
-670
-()
-669
-()
-667
-()
-666
-()
-665
-()
-664
-()
-663
-()
-662
-()
-661
-***
-1730
-()
-660
-()
-659
-()
-658
-()
-657
-()
-656
-()
-655
-()
-654
-()
-653
-()
-652
-()
-651
-()
-650
-()
-649
-()
-648
-()
-647
-()
-594
-610
-622
-()
-588
-()
-584
-601
-615
-***
-1266
-()
-578
-590
-603
-()
-574
-592
-607
-***
-1646
-()
-568
-()
-564
-582
-598
-()
-558
-570
-***
-1351
-***
-1712
-()
-554
-572
-()
-547
-560
-580
-()
-543
-562
-()
-536
-549
-()
-533
-551
-***
-1356
-()
-527
-539
-()
-524
-541
-()
-518
-530
-()
-514
-531
-()
-508
-521
-***
-1657
-()
-503
-523
-()
-498
-***
-1383
-()
-493
-512
-***
-1422
-()
-487
-501
-()
-484
-515
-***
-1354
-***
-1701
-()
-481
-502
-()
-475
-490
-511
-()
-472
-504
-538
-566
-589
-613
-629
-()
-470
-491
-***
-1303
-()
-464
-()
-461
-494
-526
-556
-579
-605
-623
-639
-()
-450
-***
-1355
-()
-438
-483
-516
-545
-569
-596
-616
-633
-()
-426
-471
-506
-535
-559
-586
-608
-627
-643
-***
-1259
-()
-414
-459
-495
-525
-548
-576
-599
-620
-635
-***
-1765
-()
-402
-449
-500
-()
-401
-446
-482
-***
-1258
-()
-391
-418
-434
-455
-()
-388
-435
-469
-()
-384
-407
-429
-454
-()
-378
-406
-447
-467
-()
-376
-423
-457
-***
-1316
-()
-373
-394
-416
-442
-()
-367
-393
-410
-431
-452
-478
-()
-366
-413
-465
-513
-550
-585
-617
-638
-()
-364
-***
-1146
-***
-1750
-()
-363
-411
-445
-()
-359
-396
-***
-1396
-***
-1756
-()
-357
-381
-405
-430
-458
-479
-***
-1353
-()
-351
-368
-()
-350
-389
-***
-1103
-()
-349
-397
-433
-()
-344
-369
-422
-443
-()
-338
-354
-380
-398
-419
-441
-466
-()
-335
-385
-421
-()
-332
-355
-***
-1320
-()
-327
-375
-428
-505
-540
-575
-609
-632
-***
-1321
-()
-326
-341
-***
-1182
-()
-323
-372
-409
-()
-319
-342
-()
-318
-331
-343
-356
-370
-382
-395
-408
-420
-432
-444
-456
-468
-480
-492
-()
-312
-***
-1161
-()
-309
-346
-383
-***
-1366
-()
-308
-***
-1262
-()
-305
-330
-()
-299
-315
-***
-1333
-***
-1676
-()
-293
-317
-()
-289
-296
-334
-371
-***
-1158
-()
-286
-302
-329
-()
-281
-303
-***
-1219
-()
-280
-292
-304
-316
-***
-1264
-()
-275
-290
-()
-270
-291
-()
-265
-278
-***
-1184
-()
-260
-279
-()
-255
-268
-***
-1367
-()
-250
-269
-***
-1165
-()
-245
-***
-1115
-()
-240
-259
-***
-1067
-()
-235
-248
-***
-1199
-***
-1717
-()
-230
-249
-()
-225
-238
-***
-1197
-()
-220
-239
-()
-215
-***
-935
-()
-210
-229
-258
-***
-1193
-()
-205
-***
-988
-()
-200
-219
-()
-195
-***
-1166
-***
-1667
-()
-190
-209
-***
-1079
-***
-1249
-()
-185
-198
-***
-1180
-()
-131
-161
-192
-221
-252
-282
-320
-()
-118
-151
-182
-211
-242
-271
-306
-***
-1398
-()
-112
-127
-140
-***
-1148
-()
-105
-141
-172
-201
-232
-261
-294
-()
-103
-***
-1144
-()
-92
-130
-162
-191
-222
-251
-283
-321
-358
-()
-91
-***
-886
-()
-80
-136
-174
-216
-254
-301
-348
-404
-473
-520
-555
-591
-619
-()
-79
-117
-152
-181
-212
-241
-272
-307
-345
-***
-1267
-()
-78
-116
-***
-1042
-***
-1764
-()
-74
-87
-100
-114
-126
-()
-73
-95
-111
-128
-149
-165
-178
-***
-997
-()
-70
-119
-166
-204
-246
-285
-339
-386
-439
-485
-532
-557
-583
-606
-625
-640
-646
-()
-66
-104
-142
-171
-202
-231
-262
-295
-333
-***
-1286
-()
-62
-86
-108
-124
-139
-159
-175
-188
-***
-1130
-()
-61
-72
-88
-113
-134
-148
-160
-179
-208
-228
-***
-1245
-()
-57
-106
-157
-193
-236
-273
-328
-374
-427
-474
-519
-552
-()
-56
-***
-969
-()
-55
-109
-153
-197
-233
-277
-325
-377
-424
-476
-517
-553
-577
-602
-621
-637
-645
-()
-54
-110
-154
-196
-234
-276
-324
-379
-425
-477
-522
-561
-595
-624
-642
-()
-53
-90
-129
-***
-1190
-()
-52
-***
-941
-()
-50
-59
-75
-99
-121
-137
-150
-169
-***
-945
-***
-1706
-()
-49
-69
-85
-101
-125
-145
-158
-170
-189
-218
-***
-992
-***
-1781
-()
-48
-68
-122
-163
-207
-244
-288
-336
-390
-436
-489
-529
-()
-45
-96
-143
-187
-223
-267
-310
-360
-***
-1409
-()
-41
-60
-82
-98
-115
-138
-155
-168
-180
-199
-()
-39
-67
-123
-164
-206
-243
-287
-337
-392
-437
-488
-534
-571
-604
-630
-()
-36
-43
-***
-1170
-()
-26
-***
-1107
-()
-24
-40
-***
-817
-()
-20
-46
-97
-144
-186
-224
-266
-311
-365
-412
-463
-507
-542
-567
-593
-614
-631
-()
-19
-33
-***
-1185
-***
-1694
-()
-18
-44
-94
-146
-184
-226
-263
-314
-361
-415
-460
-509
-546
-573
-597
-618
-634
-644
-()
-17
-31
-65
-102
-***
-807
-()
-16
-34
-84
-133
-177
-213
-256
-298
-352
-400
-453
-496
-()
-14
-37
-81
-135
-173
-217
-253
-300
-347
-403
-448
-499
-537
-563
-587
-611
-628
-641
-()
-13
-22
-42
-***
-691
-()
-12
-47
-93
-147
-183
-227
-264
-313
-362
-417
-462
-510
-544
-581
-612
-636
-()
-11
-29
-***
-760
-()
-10
-30
-63
-***
-1121
-()
-9
-35
-83
-132
-176
-214
-257
-297
-353
-399
-451
-497
-***
-1304
-()
-8
-25
-64
-***
-828
-()
-7
-23
-51
-89
-***
-1174
-***
-1300
-()
-6
-28
-71
-120
-167
-203
-247
-284
-340
-387
-440
-486
-528
-565
-600
-626
-()
-5
-***
-668
-()
-4
-32
-77
-***
-1032
-()
-3
-15
-38
-76
-***
-1314
-()
-2
-27
-***
-1053
+205
 ()
 1
-21
-58
-107
-156
-194
-237
-274
-322
 ***
-1151
+39
 ***
-1777
+95
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout
index bd861b307..448df62f5 100644
--- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout
@@ -4,19 +4,15 @@ by  Andreas Loebel
 Copyright (c) 1998,1999   ZIB Berlin
 All Rights Reserved.
 
-nodes                      : 1800
-active arcs                : 8190
-simplex iterations         : 6837
-flow value                 : 12860044181
-new implicit arcs          : 300000
-active arcs                : 308190
-simplex iterations         : 11843
-flow value                 : 9360043604
-new implicit arcs          : 22787
-active arcs                : 330977
-simplex iterations         : 11931
-flow value                 : 9360043512
-checksum                   : 798014
+nodes                      : 500
+active arcs                : 1905
+simplex iterations         : 1502
+flow value                 : 4990014995
+new implicit arcs          : 23867
+active arcs                : 25772
+simplex iterations         : 2663
+flow value                 : 3080014995
+checksum                   : 68389
 optimal
 M5 Simulator System
 
@@ -25,9 +21,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled May 15 2007 13:02:31
-M5 started Tue May 15 14:23:47 2007
+M5 compiled Jun 21 2007 21:15:48
+M5 started Fri Jun 22 01:58:18 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic tests/run.py long/10.mcf/sparc/linux/simple-atomic
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 859797266500 because target called exit()
+Exiting @ tick 121920085500 because target called exit()
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.ini b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.ini
index 9beb527ea..fe99eeeb9 100644
--- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.ini
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.ini
@@ -31,6 +31,7 @@ icache_port=system.cpu.icache.cpu_side
 [system.cpu.dcache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -44,7 +45,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -69,6 +70,7 @@ mem_side=system.cpu.toL2Bus.port[1]
 [system.cpu.icache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -82,7 +84,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -107,6 +109,7 @@ mem_side=system.cpu.toL2Bus.port[0]
 [system.cpu.l2cache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -120,7 +123,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=100000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -160,7 +163,7 @@ env=
 euid=100
 executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
 gid=100
-input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in
 output=cout
 pid=100
 ppid=99
@@ -174,7 +177,7 @@ bus_id=0
 clock=1000
 responder_set=false
 width=64
-port=system.physmem.port system.cpu.l2cache.mem_side
+port=system.physmem.port[0] system.cpu.l2cache.mem_side
 
 [system.physmem]
 type=PhysicalMemory
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out
index 5d5cc71c1..81e06c995 100644
--- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out
@@ -26,7 +26,7 @@ block_size=64
 type=LiveProcess
 cmd=mcf mcf.in
 executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
-input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in
 output=cout
 env=
 cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-timing
@@ -94,7 +94,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
@@ -131,7 +131,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
@@ -168,7 +168,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=100000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/m5stats.txt b/tests/long/10.mcf/ref/sparc/linux/simple-timing/m5stats.txt
index c95331047..56d2d33b9 100644
--- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/m5stats.txt
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/m5stats.txt
@@ -1,77 +1,77 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 480485                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 155316                       # Number of bytes of host memory used
-host_seconds                                  3578.87                       # Real time elapsed on the host
-host_tick_rate                              745845171                       # Simulator tick rate (ticks/s)
+host_inst_rate                                 697152                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 155896                       # Number of bytes of host memory used
+host_seconds                                   349.77                       # Real time elapsed on the host
+host_tick_rate                             1027373651                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                  1719594534                       # Number of instructions simulated
-sim_seconds                                  2.669285                       # Number of seconds simulated
-sim_ticks                                2669284585000                       # Number of ticks simulated
-system.cpu.dcache.ReadReq_accesses          607807189                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 12893.226605                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 11893.226605                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits              594739458                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency   168485217000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.021500                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses             13067731                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_miss_latency 155417486000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.021500                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses        13067731                       # number of ReadReq MSHR misses
-system.cpu.dcache.SwapReq_accesses              15448                       # number of SwapReq accesses(hits+misses)
-system.cpu.dcache.SwapReq_avg_miss_latency 13090.909091                       # average SwapReq miss latency
-system.cpu.dcache.SwapReq_avg_mshr_miss_latency 12090.909091                       # average SwapReq mshr miss latency
-system.cpu.dcache.SwapReq_hits                  15437                       # number of SwapReq hits
-system.cpu.dcache.SwapReq_miss_latency         144000                       # number of SwapReq miss cycles
-system.cpu.dcache.SwapReq_miss_rate          0.000712                       # miss rate for SwapReq accesses
-system.cpu.dcache.SwapReq_misses                   11                       # number of SwapReq misses
-system.cpu.dcache.SwapReq_mshr_miss_latency       133000                       # number of SwapReq MSHR miss cycles
-system.cpu.dcache.SwapReq_mshr_miss_rate     0.000712                       # mshr miss rate for SwapReq accesses
-system.cpu.dcache.SwapReq_mshr_misses              11                       # number of SwapReq MSHR misses
-system.cpu.dcache.WriteReq_accesses         166970997                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency 12404.292450                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency 11404.292450                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits             165264000                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency   21174090000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.010223                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses             1706997                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_miss_latency  19467093000                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.010223                       # mshr miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_mshr_misses        1706997                       # number of WriteReq MSHR misses
+sim_insts                                   243840172                       # Number of instructions simulated
+sim_seconds                                  0.359341                       # Number of seconds simulated
+sim_ticks                                359340764000                       # Number of ticks simulated
+system.cpu.dcache.ReadReq_accesses           82219469                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency 12000.343864                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 11000.343864                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits               81326673                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency    10713859000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.010859                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses               892796                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_miss_latency   9821063000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.010859                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses          892796                       # number of ReadReq MSHR misses
+system.cpu.dcache.SwapReq_accesses               3886                       # number of SwapReq accesses(hits+misses)
+system.cpu.dcache.SwapReq_avg_miss_latency        12500                       # average SwapReq miss latency
+system.cpu.dcache.SwapReq_avg_mshr_miss_latency        11500                       # average SwapReq mshr miss latency
+system.cpu.dcache.SwapReq_hits                   3882                       # number of SwapReq hits
+system.cpu.dcache.SwapReq_miss_latency          50000                       # number of SwapReq miss cycles
+system.cpu.dcache.SwapReq_miss_rate          0.001029                       # miss rate for SwapReq accesses
+system.cpu.dcache.SwapReq_misses                    4                       # number of SwapReq misses
+system.cpu.dcache.SwapReq_mshr_miss_latency        46000                       # number of SwapReq MSHR miss cycles
+system.cpu.dcache.SwapReq_mshr_miss_rate     0.001029                       # mshr miss rate for SwapReq accesses
+system.cpu.dcache.SwapReq_mshr_misses               4                       # number of SwapReq MSHR misses
+system.cpu.dcache.WriteReq_accesses          22901836                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_avg_miss_latency 12623.899964                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 11623.899964                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits              22855133                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency     589574000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.002039                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses               46703                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_miss_latency    542871000                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.002039                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses          46703                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  51.440428                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                 110.894471                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses           774778186                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 12836.737637                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 11836.737637                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits               760003458                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency    189659307000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.019070                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses              14774728                       # number of demand (read+write) misses
+system.cpu.dcache.demand_accesses           105121305                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency 12031.341172                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 11031.341172                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits               104181806                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency     11303433000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.008937                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                939499                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency 174884579000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.019070                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses         14774728                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_miss_latency  10363934000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.008937                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses           939499                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses          774778186                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 12836.737637                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 11836.737637                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses          105121305                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency 12031.341172                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 11031.341172                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits              760003458                       # number of overall hits
-system.cpu.dcache.overall_miss_latency   189659307000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.019070                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses             14774728                       # number of overall misses
+system.cpu.dcache.overall_hits              104181806                       # number of overall hits
+system.cpu.dcache.overall_miss_latency    11303433000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.008937                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses               939499                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency 174884579000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.019070                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses        14774728                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_miss_latency  10363934000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.008937                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses          939499                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -83,57 +83,57 @@ system.cpu.dcache.prefetcher.num_hwpf_issued            0
 system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu.dcache.replacements               14770643                       # number of replacements
-system.cpu.dcache.sampled_refs               14774739                       # Sample count of references to valid blocks.
+system.cpu.dcache.replacements                 935407                       # number of replacements
+system.cpu.dcache.sampled_refs                 939503                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse               4094.628585                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                760018895                       # Total number of references to valid blocks.
-system.cpu.dcache.warmup_cycle             3913237000                       # Cycle when the warmup percentage was hit.
-system.cpu.dcache.writebacks                  4191356                       # number of writebacks
-system.cpu.icache.ReadReq_accesses         1719594535                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 13991.120977                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 12991.120977                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits             1719593634                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency       12606000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.000001                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  901                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_miss_latency     11705000                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.000001                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             901                       # number of ReadReq MSHR misses
+system.cpu.dcache.tagsinuse               3560.887601                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                104185688                       # Total number of references to valid blocks.
+system.cpu.dcache.warmup_cycle           134116230000                       # Cycle when the warmup percentage was hit.
+system.cpu.dcache.writebacks                    94807                       # number of writebacks
+system.cpu.icache.ReadReq_accesses          243840173                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 13993.174061                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 12993.174061                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits              243839294                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency       12300000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.000004                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  879                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_miss_latency     11421000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.000004                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses             879                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs               1908538.994451                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs               277405.340159                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses          1719594535                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 13991.120977                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 12991.120977                       # average overall mshr miss latency
-system.cpu.icache.demand_hits              1719593634                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency        12606000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.000001                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   901                       # number of demand (read+write) misses
+system.cpu.icache.demand_accesses           243840173                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 13993.174061                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 12993.174061                       # average overall mshr miss latency
+system.cpu.icache.demand_hits               243839294                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency        12300000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.000004                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   879                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency     11705000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.000001                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              901                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_miss_latency     11421000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.000004                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses              879                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses         1719594535                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 13991.120977                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 12991.120977                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses          243840173                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 13993.174061                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 12993.174061                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits             1719593634                       # number of overall hits
-system.cpu.icache.overall_miss_latency       12606000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.000001                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  901                       # number of overall misses
+system.cpu.icache.overall_hits              243839294                       # number of overall hits
+system.cpu.icache.overall_miss_latency       12300000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.000004                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  879                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency     11705000                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.000001                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             901                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_miss_latency     11421000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.000004                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses             879                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.icache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -145,64 +145,60 @@ system.cpu.icache.prefetcher.num_hwpf_issued            0
 system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu.icache.replacements                     31                       # number of replacements
-system.cpu.icache.sampled_refs                    901                       # Sample count of references to valid blocks.
+system.cpu.icache.replacements                     25                       # number of replacements
+system.cpu.icache.sampled_refs                    879                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                737.715884                       # Cycle average of tags in use
-system.cpu.icache.total_refs               1719593634                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                716.200092                       # Cycle average of tags in use
+system.cpu.icache.total_refs                243839294                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.idle_fraction                            0                       # Percentage of idle cycles
-system.cpu.l2cache.ReadReq_accesses          14775639                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency 12999.785859                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 10999.785859                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_hits               8592784                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency   80375791000                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate         0.418449                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses             6182855                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency  68010081000                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate     0.418449                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses        6182855                       # number of ReadReq MSHR misses
-system.cpu.l2cache.Writeback_accesses         4191356                       # number of Writeback accesses(hits+misses)
-system.cpu.l2cache.Writeback_hits             4164131                       # number of Writeback hits
-system.cpu.l2cache.Writeback_miss_rate       0.006496                       # miss rate for Writeback accesses
-system.cpu.l2cache.Writeback_misses             27225                       # number of Writeback misses
-system.cpu.l2cache.Writeback_mshr_miss_rate     0.006496                       # mshr miss rate for Writeback accesses
-system.cpu.l2cache.Writeback_mshr_misses        27225                       # number of Writeback MSHR misses
+system.cpu.l2cache.ReadReq_accesses            940381                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency        13000                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency        11000                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_hits                924777                       # number of ReadReq hits
+system.cpu.l2cache.ReadReq_miss_latency     202852000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.016593                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses               15604                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency    171644000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.016593                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses          15604                       # number of ReadReq MSHR misses
+system.cpu.l2cache.Writeback_accesses           94807                       # number of Writeback accesses(hits+misses)
+system.cpu.l2cache.Writeback_hits               94807                       # number of Writeback hits
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                  2.063273                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                 65.341195                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses           14775639                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency 12999.785859                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency 10999.785859                       # average overall mshr miss latency
-system.cpu.l2cache.demand_hits                8592784                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency    80375791000                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_rate          0.418449                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses              6182855                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_accesses             940381                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency        13000                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
+system.cpu.l2cache.demand_hits                 924777                       # number of demand (read+write) hits
+system.cpu.l2cache.demand_miss_latency      202852000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate          0.016593                       # miss rate for demand accesses
+system.cpu.l2cache.demand_misses                15604                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency  68010081000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_rate     0.418449                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses         6182855                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_miss_latency    171644000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_rate     0.016593                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_misses           15604                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses          18966995                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency 12942.794779                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency 10999.785859                       # average overall mshr miss latency
+system.cpu.l2cache.overall_accesses           1035188                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency        13000                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.l2cache.overall_hits              12756915                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency   80375791000                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_rate         0.327415                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses             6210080                       # number of overall misses
+system.cpu.l2cache.overall_hits               1019584                       # number of overall hits
+system.cpu.l2cache.overall_miss_latency     202852000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate         0.015074                       # miss rate for overall accesses
+system.cpu.l2cache.overall_misses               15604                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency  68010081000                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_rate     0.325980                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses        6182855                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_miss_latency    171644000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_rate     0.015074                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_misses          15604                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -214,17 +210,17 @@ system.cpu.l2cache.prefetcher.num_hwpf_issued            0
 system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu.l2cache.replacements               6150087                       # number of replacements
-system.cpu.l2cache.sampled_refs               6182855                       # Sample count of references to valid blocks.
+system.cpu.l2cache.replacements                     0                       # number of replacements
+system.cpu.l2cache.sampled_refs                 15604                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse             26129.060966                       # Cycle average of tags in use
-system.cpu.l2cache.total_refs                12756915                       # Total number of references to valid blocks.
-system.cpu.l2cache.warmup_cycle          806915893000                       # Cycle when the warmup percentage was hit.
-system.cpu.l2cache.writebacks                 1069081                       # number of writebacks
+system.cpu.l2cache.tagsinuse             10833.027960                       # Cycle average of tags in use
+system.cpu.l2cache.total_refs                 1019584                       # Total number of references to valid blocks.
+system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
+system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                     2669284585000                       # number of cpu cycles simulated
-system.cpu.num_insts                       1719594534                       # Number of instructions executed
-system.cpu.num_refs                         774793634                       # Number of memory references
-system.cpu.workload.PROG:num_syscalls             632                       # Number of system calls
+system.cpu.numCycles                     359340764000                       # number of cpu cycles simulated
+system.cpu.num_insts                        243840172                       # Number of instructions executed
+system.cpu.num_refs                         105125191                       # Number of memory references
+system.cpu.workload.PROG:num_syscalls             428                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/mcf.out b/tests/long/10.mcf/ref/sparc/linux/simple-timing/mcf.out
index 6bbb02cf0..095132477 100644
--- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/mcf.out
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/mcf.out
@@ -1,3092 +1,999 @@
 ()
-1642
-***
-1759
-()
-1641
-***
-1691
+500
 ()
-1640
+499
 ()
-1639
+498
 ()
-1638
+496
 ()
-1637
+495
 ()
-1636
+494
 ()
-1635
+493
 ()
-1634
+492
 ()
-1633
+491
 ()
-1632
+490
 ()
-1631
+489
 ()
-1630
+488
 ()
-1629
+487
 ()
-1628
+486
 ()
-1627
+484
 ()
-1626
+482
 ()
-1625
-***
-1784
+481
 ()
-1624
+480
 ()
-1623
+479
 ()
-1622
-***
-1688
+478
 ()
-1621
+477
 ()
-1618
+476
 ()
-1617
-***
-1796
+475
 ()
-1616
+474
 ()
-1615
-***
-1668
+473
 ()
-1614
+472
 ()
-1613
+471
 ()
-1612
-***
-1700
+469
 ()
-1611
+468
 ()
-1610
+467
 ()
-1608
+466
 ()
-1606
+465
 ()
-1605
+464
 ()
-1604
+463
 ()
-1603
+462
 ()
-1602
+461
 ()
-1601
+460
 ()
-1599
+459
 ()
-1598
-***
-1714
+458
 ()
-1597
+457
 ()
-1595
+455
 ()
-1591
+454
 ()
-1590
-***
-1773
+452
 ()
-1589
+451
 ()
-1588
+450
 ()
-1587
-***
-1710
+449
 ()
-1586
+448
 ()
-1585
+446
 ()
-1584
-***
-1748
+445
 ()
-1583
-***
-1648
+444
 ()
-1582
+443
 ()
-1581
-***
-1757
+442
 ()
-1579
+440
 ()
-1578
-***
-1726
+439
 ()
-1575
-***
-1763
+438
 ()
-1574
+436
 ()
-1573
+435
 ()
-1572
+433
 ()
-1571
+432
 ()
-1568
+431
 ()
-1567
+428
 ()
-1565
-***
-1643
+427
 ()
-1564
+425
 ()
-1563
+424
 ()
-1562
+423
 ()
-1559
+420
 ()
-1557
+419
 ()
-1556
+416
 ()
-1555
+414
 ()
-1554
+413
 ()
-1553
-***
-1684
+412
 ()
-1552
+407
 ()
-1551
-***
-1697
+406
 ()
-1549
+405
 ()
-1546
-***
-1768
+404
 ()
-1544
-***
-1798
+403
 ()
-1542
+402
 ()
-1541
-***
-1650
+401
 ()
-1540
+400
 ()
-1539
+399
 ()
-1538
+398
 ()
-1536
+396
 ()
-1534
+395
 ()
-1533
+393
 ()
-1532
+392
 ()
-1529
+390
 ()
-1528
+389
 ()
-1527
+388
 ()
-1526
+387
 ()
-1525
+386
 ()
-1524
-***
-1736
+385
 ()
-1523
+384
 ()
-1522
-***
-1794
+383
 ()
-1521
+382
 ()
-1519
+381
 ()
-1517
-***
-1687
+380
 ()
-1516
+379
 ()
-1515
+377
 ()
-1514
+375
 ()
-1513
+374
 ()
-1512
+373
 ()
-1511
+372
 ()
-1510
+371
 ()
-1509
+370
 ()
-1508
+369
 ()
-1507
+368
 ()
-1506
+366
 ()
-1505
+365
 ()
-1504
+364
 ()
-1503
+362
 ()
-1502
-***
-1746
+361
 ()
-1501
-***
-1766
+360
 ()
-1498
+359
 ()
-1497
+358
 ()
-1495
+357
 ()
-1494
+356
 ()
-1493
-***
-1673
+355
 ()
-1490
-***
-1774
+354
 ()
-1486
+352
 ()
-1485
+350
 ()
-1482
+347
 ()
-1481
+344
 ()
-1480
+342
 ()
-1479
+341
 ()
-1477
+340
 ()
-1476
+339
 ()
-1475
+338
 ()
-1473
+332
 ()
-1472
+325
 ()
-1471
+320
 ***
-1728
-()
-1470
-()
-1469
-()
-1467
-()
-1466
-()
-1465
-()
-1464
-()
-1463
-()
-1462
-()
-1461
-()
-1460
-()
-1459
-()
-1455
+345
 ()
-1454
+319
 ***
-1782
-()
-1453
-()
-1452
-()
-1451
+497
 ()
-1449
+318
 ***
-1732
-()
-1448
-()
-1445
-()
-1444
-()
-1442
-()
-1441
-()
-1440
-()
-1438
-()
-1437
-()
-1435
-()
-1433
+349
 ()
-1432
+317
 ***
-1665
-()
-1431
-()
-1426
-()
-1425
-()
-1424
-()
-1423
+408
 ()
-1420
+316
 ***
-1499
+324
 ()
-1419
-***
-1457
+315
 ***
-1653
+328
 ()
-1418
-***
-1577
+314
 ***
-1664
+335
 ()
-1417
+313
 ***
-1489
+378
 ()
-1416
+312
 ***
-1545
+426
 ()
-1415
+311
 ***
-1430
+411
 ()
-1414
+304
 ***
-1434
+343
 ()
-1413
-***
-1594
+303
 ***
-1735
+417
 ()
-1412
-***
-1560
+302
 ***
-1724
+485
 ()
-1411
+301
 ***
-1428
+363
 ()
-1404
-***
-1496
+300
 ***
-1780
+376
 ()
-1403
+299
 ***
-1561
+333
 ()
-1402
+292
 ***
-1548
+337
 ()
-1401
-***
-1569
+291
 ***
-1792
+409
 ()
-1400
+290
 ***
-1537
+421
 ()
-1399
+289
 ***
-1429
+437
 ()
-1392
+288
 ***
-1580
+430
 ()
-1391
+287
 ***
-1410
+348
 ()
-1390
+286
 ***
-1500
+326
 ()
-1389
-***
-1483
+284
 ()
-1388
+282
 ***
-1570
+308
 ()
-1387
+279
 ***
-1543
-()
-1386
+297
 ***
-1558
-()
-1385
+305
 ()
-1384
+278
 ()
-1382
+277
 ***
-1439
+307
 ()
-1381
+276
 ***
-1677
+296
 ()
-1380
+273
 ()
-1378
-***
-1397
+271
 ()
-1377
-***
-1787
+265
 ()
-1376
+246
 ***
-1408
-()
-1375
-()
-1374
+267
 ()
-1373
+245
 ***
-1671
-()
-1372
+280
 ()
-1370
+244
 ***
-1793
+391
 ()
-1369
+243
+***
+330
 ()
-1365
+242
 ***
-1762
+456
 ()
-1346
+241
+***
+346
 ()
-1345
+240
 ***
-1566
+483
 ()
-1344
+239
 ***
-1520
+260
 ()
-1343
+238
 ***
-1492
+261
 ()
-1342
+237
 ***
-1576
+262
 ***
-1656
+294
 ()
-1341
+236
 ***
-1447
+253
 ()
-1340
+229
 ***
-1550
-()
-1339
-()
-1338
-()
-1337
+397
 ()
-1329
+228
 ***
-1336
+298
 ()
-1328
+227
 ***
-1446
+415
 ()
-1327
+226
 ***
-1607
-()
-1325
-()
-1324
-()
-1323
-()
-1317
-()
-1315
+264
 ()
-1311
+224
 ***
-1450
+232
+()
+222
 ***
-1720
+233
 ()
-1310
+217
 ***
-1619
+250
 ()
-1309
+211
 ***
-1458
+331
 ()
-1308
+210
+***
+394
 ()
-1307
+209
 ***
-1427
+410
 ()
-1306
+208
 ***
-1364
+321
+()
+207
 ***
-1696
+327
 ()
-1299
+206
+***
+309
 ()
-1297
+199
 ***
-1395
+259
 ()
-1296
+198
+***
+219
 ()
-1295
+197
 ***
-1326
+220
 ()
-1294
+195
 ***
-1371
+429
 ()
-1293
+194
 ***
-1456
+470
 ()
-1292
+193
 ***
-1312
+274
 ()
-1291
+191
+***
+203
 ()
-1290
+190
 ***
-1363
+263
 ()
-1282
+189
+215
 ***
-1592
+230
 ()
-1281
+188
+***
+266
 ***
-1379
+295
 ()
-1280
+182
 ***
-1478
+329
 ()
-1279
+181
 ***
-1436
+351
 ()
-1278
+180
 ***
-1620
+441
 ()
-1277
+179
 ***
-1487
+453
 ()
-1276
+178
 ***
-1288
+418
 ()
-1275
+177
 ***
-1596
+353
 ()
-1274
+176
 ***
-1322
+422
 ()
-1273
+175
 ***
-1305
+225
 ***
-1699
+255
 ()
-1272
+174
+***
+269
 ()
-1271
+173
 ***
-1484
+214
 ()
-1270
+172
 ***
-1518
+186
 ()
-1269
+171
 ***
-1289
+447
 ()
-1268
+170
 ***
-1443
+270
 ***
-1786
+306
 ()
-1265
+169
+***
+336
 ()
-1243
+168
 ***
-1368
+285
 ()
-1242
+165
+***
+249
 ()
-1241
+146
 ***
-1421
+154
+()
+143
 ***
-1749
+334
 ()
-1240
+142
 ***
-1260
+216
 ***
-1678
-()
-1239
-()
-1238
+257
 ()
-1236
+141
 ***
-1263
+167
 ***
-1767
+251
 ()
-1235
+140
+***
+162
+***
+293
 ()
-1234
+139
+***
+158
 ()
-1233
+137
+***
+166
+***
+201
 ()
-1232
+136
 ***
-1752
+160
 ()
-1231
+134
 ***
-1791
+221
 ()
-1230
+132
+***
+213
 ()
-1229
+131
+***
+187
 ()
-1228
+129
 ***
-1702
+235
 ()
-1227
+128
+***
+153
 ()
-1226
+127
+***
+156
 ()
-1225
+126
+***
+159
+***
+218
 ()
-1224
+125
+***
+155
 ()
-1223
+124
+***
+157
 ()
-1216
+123
 ***
-1531
+152
 ()
-1215
+116
 ***
-1530
+135
 ***
-1797
+163
 ()
-1214
+115
 ***
-1474
+133
 ***
-1742
-()
-1213
+204
 ***
-1488
+248
 ()
-1212
+114
 ***
-1298
+192
 ***
-1789
+212
 ()
-1211
+113
 ***
-1491
+268
 ()
-1210
+112
 ***
-1600
+367
 ()
-1209
+111
 ***
-1244
+272
 ()
-1208
-***
-1609
+110
 ***
-1704
+434
 ()
-1207
+109
 ***
-1237
+323
 ()
-1206
+108
 ***
-1468
+281
 ()
-1205
+107
+***
+144
 ***
-1547
+148
 ()
-1204
+106
 ***
-1246
+275
 ()
-1203
+105
 ***
-1593
+196
 ***
-1734
+254
 ()
-1202
+104
 ***
-1535
-()
-1200
-()
-1198
-()
-1196
-()
-1195
+138
+***
+161
 ()
-1194
+103
 ***
-1302
+310
 ()
-1192
+102
+***
+223
+***
+252
 ()
-1191
+80
 ()
-1189
+70
 ()
-1188
+69
 ()
-1187
+68
 ()
-1186
+66
 ()
-1183
+64
 ()
-1181
+62
 ***
-1778
-()
-1179
+256
 ()
-1178
+61
+***
+93
 ()
-1177
+59
 ***
-1645
+120
 ()
-1176
+58
 ()
-1175
-***
-1318
+57
 ***
-1649
+183
 ()
-1173
+55
 ()
-1172
+54
 ()
-1171
-()
-1169
+52
 ***
-1654
+147
 ()
-1168
+51
 ***
-1692
-()
-1167
-()
-1164
-()
-1163
+118
 ()
-1162
+50
 ***
-1716
-()
-1160
+83
 ()
-1159
+49
 ***
-1663
-()
-1157
-()
-1156
-()
-1155
-()
-1154
-()
-1153
-()
-1152
-()
-1150
-()
-1149
-()
-1147
-()
-1145
+98
 ()
-1143
+48
 ***
-1711
-()
-1142
+99
 ()
-1141
+47
 ()
-1140
+46
+***
+184
 ()
-1139
+45
 ***
-1755
+121
 ()
-1138
+44
 ()
-1137
+43
 ***
-1218
+88
 ()
-1136
-***
-1248
+42
 ***
-1670
-()
-1135
-()
-1134
+122
 ()
-1133
+41
 ***
-1662
-()
-1132
-()
-1131
-()
-1129
-()
-1128
-()
-1127
+91
 ()
-1126
+40
 ***
-1301
-()
-1125
-()
-1124
-()
-1123
-()
-1122
+96
 ()
-1120
+38
 ***
-1332
+100
 ()
-1119
+37
 ***
-1737
+149
 ()
-1118
+36
 ***
-1718
+74
 ()
-1117
-***
-1250
+35
 ***
-1658
-()
-1116
-()
-1114
-()
-1113
-()
-1112
+258
 ()
-1111
+34
 ***
-1772
+151
 ()
-1110
+33
 ***
-1359
+85
 ()
-1109
+32
 ()
-1108
+31
 ***
-1251
-()
-1106
+94
 ()
-1105
+30
 ***
-1771
-()
-1104
-()
-1102
-()
-1101
-()
-1100
+97
 ()
-1099
+29
 ***
-1689
-()
-1098
+90
 ()
-1097
+28
 ***
-1785
+89
 ()
-1096
+27
 ***
-1685
-()
-1095
-()
-1094
-()
-1093
-()
-1092
-()
-1091
-()
-1090
-()
-1089
-()
-1088
-()
-1087
-()
-1086
-()
-1085
+92
 ()
-1084
+26
 ***
-1739
-()
-1083
+72
 ***
-1405
-()
-1082
-()
-1081
-()
-1080
-()
-1078
-()
-1077
-()
-1076
-()
-1075
-()
-1074
-()
-1073
-()
-1072
-()
-1071
+247
 ()
-1070
+25
 ***
-1707
+86
 ()
-1069
+24
 ***
-1334
-()
-1068
-()
-1066
-()
-1065
-()
-1064
-()
-1063
-()
-1062
-()
-1061
-()
-1060
-()
-1059
-()
-1058
+82
 ()
-1057
+23
 ***
-1744
-()
-1056
-()
-1055
-()
-1054
+87
 ***
-1335
+117
 ()
-1052
+22
 ***
-1660
-()
-1051
-()
-1050
-()
-1049
-()
-1048
-()
-1047
-()
-1046
-()
-1045
+76
 ***
-1357
+119
 ()
-1044
+21
 ***
-1659
-()
-1043
-()
-1041
-()
-1040
-()
-1039
-()
-1038
-()
-1037
-()
-1036
-()
-1035
-()
-1034
+84
 ()
-1033
+20
 ***
-1690
-()
-1031
-()
-1030
-()
-1029
+78
 ()
-1028
+19
 ***
-1675
-()
-1027
-()
-1026
+73
 ()
-1025
+18
 ***
-1257
-()
-1024
-()
-1023
-()
-1022
-()
-1021
-()
-1020
+81
 ()
-1019
+17
 ***
-1284
+65
 ()
-1018
+16
+***
+63
+***
+101
 ()
-1017
+15
 ***
-1754
+71
 ()
-1016
+14
+***
+75
 ()
-1015
+13
 ***
-1247
+322
 ()
-1014
+12
+***
+77
 ()
-1013
+11
+***
+283
 ()
-1012
+10
 ***
-1319
+79
 ()
-1011
+9
 ***
-1352
+145
 ***
-1651
-()
-1010
+150
 ()
-1009
+8
 ***
-1705
-()
-1008
-()
-1007
-()
-1006
+67
 ()
-1005
+7
 ***
-1679
-()
-1004
-()
-1003
-()
-1002
-()
-1001
-()
-1000
+60
 ***
-1731
-()
-999
-()
-998
-()
-996
-()
-995
-()
-994
-()
-993
+231
 ()
-991
+6
 ***
-1799
-()
-990
-()
-989
-()
-987
-()
-986
-()
-985
-()
-984
-()
-983
+56
 ***
-1745
-()
-982
+234
 ()
-981
+5
 ***
-1644
-()
-980
-()
-979
-()
-978
-()
-977
-()
-976
-()
-975
-()
-974
+164
 ***
-1222
-()
-973
-()
-972
-()
-971
-()
-970
-()
-968
-()
-967
-()
-966
+202
 ()
-965
+4
 ***
-1347
-()
-964
-()
-963
+53
 ()
-962
+3
 ***
-1743
-()
-961
+130
 ***
-1719
-()
-960
+185
 ***
-1758
-()
-959
+200
 ()
-958
+2
 ***
-1733
-()
-957
-***
-1775
-()
-956
-()
-955
-()
-954
-()
-953
-()
-952
-***
-1393
-()
-951
-()
-950
-()
-949
-***
-1669
-()
-948
-()
-947
-()
-946
-***
-1681
-()
-944
-***
-1686
-()
-943
-()
-942
-()
-940
-***
-1783
-()
-939
-()
-938
-()
-937
-()
-936
-()
-934
-()
-933
-()
-932
-()
-931
-()
-930
-()
-929
-***
-1713
-()
-928
-***
-1725
-()
-927
-()
-926
-()
-925
-()
-924
-()
-923
-()
-922
-()
-921
-***
-1394
-()
-920
-***
-1741
-()
-919
-***
-1708
-()
-918
-()
-917
-()
-916
-***
-1723
-()
-915
-()
-914
-()
-913
-()
-912
-()
-911
-()
-910
-()
-909
-***
-1795
-()
-908
-()
-907
-()
-906
-()
-905
-()
-904
-()
-903
-***
-1330
-()
-902
-()
-901
-()
-900
-()
-899
-()
-898
-()
-897
-***
-1790
-()
-896
-***
-1652
-()
-895
-***
-1761
-()
-894
-()
-893
-()
-892
-()
-891
-()
-890
-***
-1253
-()
-889
-***
-1698
-()
-888
-()
-887
-()
-885
-()
-884
-***
-1703
-()
-883
-()
-882
-()
-881
-***
-1747
-()
-880
-()
-879
-***
-1647
-()
-878
-***
-1358
-()
-877
-***
-1407
-()
-876
-()
-875
-()
-874
-***
-1283
-()
-873
-***
-1682
-()
-872
-()
-871
-()
-870
-()
-869
-()
-868
-()
-867
-***
-1751
-()
-866
-()
-865
-()
-864
-()
-863
-()
-862
-***
-1753
-()
-861
-()
-860
-()
-859
-()
-858
-***
-1348
-()
-857
-()
-856
-***
-1350
-()
-855
-***
-1252
-()
-854
-()
-853
-***
-1201
-()
-852
-()
-851
-()
-850
-***
-1361
-()
-849
-()
-848
-()
-847
-()
-846
-()
-845
-()
-844
-()
-843
-()
-842
-()
-841
-()
-840
-()
-839
-***
-1360
-()
-838
-()
-837
-()
-836
-()
-835
-()
-834
-()
-833
-***
-1406
-()
-832
-()
-831
-()
-830
-()
-829
-()
-827
-()
-826
-()
-825
-()
-824
-()
-823
-()
-822
-()
-821
-***
-1683
-()
-820
-***
-1672
-()
-819
-()
-818
-***
-1693
-()
-816
-()
-815
-***
-1313
-()
-814
-()
-813
-()
-812
-***
-1727
-()
-811
-()
-810
-()
-809
-()
-808
-()
-806
-()
-805
-***
-1217
-()
-804
-()
-803
-()
-802
-()
-801
-()
-800
-()
-799
-()
-798
-()
-797
-***
-1220
-()
-796
-***
-1788
-()
-795
-()
-794
-***
-1255
-***
-1674
-()
-793
-***
-1740
-()
-792
-()
-791
-***
-1349
-()
-790
-()
-789
-()
-788
-()
-787
-***
-1800
-()
-786
-()
-785
-()
-784
-()
-783
-()
-782
-()
-781
-()
-780
-()
-779
-()
-778
-()
-777
-()
-776
-()
-775
-()
-774
-***
-1331
-()
-773
-()
-772
-***
-1256
-()
-771
-()
-770
-()
-769
-()
-768
-()
-767
-()
-766
-()
-765
-()
-764
-()
-763
-()
-762
-()
-761
-()
-759
-()
-758
-***
-1655
-()
-757
-()
-756
-()
-755
-***
-1760
-()
-754
-()
-753
-()
-752
-()
-751
-***
-1285
-***
-1680
-()
-750
-***
-1261
-()
-749
-()
-748
-()
-747
-()
-746
-()
-745
-***
-1362
-()
-744
-()
-743
-()
-742
-()
-741
-()
-740
-()
-739
-()
-738
-()
-737
-()
-736
-***
-1729
-()
-735
-***
-1769
-()
-734
-()
-733
-()
-732
-***
-1715
-()
-731
-()
-730
-()
-729
-()
-728
-()
-727
-***
-1721
-()
-726
-()
-725
-()
-724
-()
-723
-()
-722
-()
-721
-()
-720
-()
-719
-***
-1770
-()
-718
-()
-717
-()
-716
-()
-715
-()
-714
-()
-713
-()
-712
-()
-711
-***
-1779
-()
-710
-***
-1221
-()
-709
-()
-708
-()
-707
-()
-706
-()
-705
-***
-1661
-()
-704
-()
-703
-()
-702
-()
-701
-***
-1722
-()
-700
-()
-699
-()
-698
-()
-697
-()
-696
-()
-695
-()
-694
-()
-693
-()
-692
-***
-1776
-()
-690
-***
-1254
-()
-689
-***
-1738
-()
-688
-()
-687
-()
-686
-***
-1287
-()
-685
-()
-684
-()
-683
-()
-682
-()
-681
-***
-1666
-()
-680
-()
-679
-()
-678
-()
-677
-()
-676
-()
-675
-()
-674
-***
-1695
-()
-673
-***
-1709
-()
-672
-()
-671
-()
-670
-()
-669
-()
-667
-()
-666
-()
-665
-()
-664
-()
-663
-()
-662
-()
-661
-***
-1730
-()
-660
-()
-659
-()
-658
-()
-657
-()
-656
-()
-655
-()
-654
-()
-653
-()
-652
-()
-651
-()
-650
-()
-649
-()
-648
-()
-647
-()
-594
-610
-622
-()
-588
-()
-584
-601
-615
-***
-1266
-()
-578
-590
-603
-()
-574
-592
-607
-***
-1646
-()
-568
-()
-564
-582
-598
-()
-558
-570
-***
-1351
-***
-1712
-()
-554
-572
-()
-547
-560
-580
-()
-543
-562
-()
-536
-549
-()
-533
-551
-***
-1356
-()
-527
-539
-()
-524
-541
-()
-518
-530
-()
-514
-531
-()
-508
-521
-***
-1657
-()
-503
-523
-()
-498
-***
-1383
-()
-493
-512
-***
-1422
-()
-487
-501
-()
-484
-515
-***
-1354
-***
-1701
-()
-481
-502
-()
-475
-490
-511
-()
-472
-504
-538
-566
-589
-613
-629
-()
-470
-491
-***
-1303
-()
-464
-()
-461
-494
-526
-556
-579
-605
-623
-639
-()
-450
-***
-1355
-()
-438
-483
-516
-545
-569
-596
-616
-633
-()
-426
-471
-506
-535
-559
-586
-608
-627
-643
-***
-1259
-()
-414
-459
-495
-525
-548
-576
-599
-620
-635
-***
-1765
-()
-402
-449
-500
-()
-401
-446
-482
-***
-1258
-()
-391
-418
-434
-455
-()
-388
-435
-469
-()
-384
-407
-429
-454
-()
-378
-406
-447
-467
-()
-376
-423
-457
-***
-1316
-()
-373
-394
-416
-442
-()
-367
-393
-410
-431
-452
-478
-()
-366
-413
-465
-513
-550
-585
-617
-638
-()
-364
-***
-1146
-***
-1750
-()
-363
-411
-445
-()
-359
-396
-***
-1396
-***
-1756
-()
-357
-381
-405
-430
-458
-479
-***
-1353
-()
-351
-368
-()
-350
-389
-***
-1103
-()
-349
-397
-433
-()
-344
-369
-422
-443
-()
-338
-354
-380
-398
-419
-441
-466
-()
-335
-385
-421
-()
-332
-355
-***
-1320
-()
-327
-375
-428
-505
-540
-575
-609
-632
-***
-1321
-()
-326
-341
-***
-1182
-()
-323
-372
-409
-()
-319
-342
-()
-318
-331
-343
-356
-370
-382
-395
-408
-420
-432
-444
-456
-468
-480
-492
-()
-312
-***
-1161
-()
-309
-346
-383
-***
-1366
-()
-308
-***
-1262
-()
-305
-330
-()
-299
-315
-***
-1333
-***
-1676
-()
-293
-317
-()
-289
-296
-334
-371
-***
-1158
-()
-286
-302
-329
-()
-281
-303
-***
-1219
-()
-280
-292
-304
-316
-***
-1264
-()
-275
-290
-()
-270
-291
-()
-265
-278
-***
-1184
-()
-260
-279
-()
-255
-268
-***
-1367
-()
-250
-269
-***
-1165
-()
-245
-***
-1115
-()
-240
-259
-***
-1067
-()
-235
-248
-***
-1199
-***
-1717
-()
-230
-249
-()
-225
-238
-***
-1197
-()
-220
-239
-()
-215
-***
-935
-()
-210
-229
-258
-***
-1193
-()
-205
-***
-988
-()
-200
-219
-()
-195
-***
-1166
-***
-1667
-()
-190
-209
-***
-1079
-***
-1249
-()
-185
-198
-***
-1180
-()
-131
-161
-192
-221
-252
-282
-320
-()
-118
-151
-182
-211
-242
-271
-306
-***
-1398
-()
-112
-127
-140
-***
-1148
-()
-105
-141
-172
-201
-232
-261
-294
-()
-103
-***
-1144
-()
-92
-130
-162
-191
-222
-251
-283
-321
-358
-()
-91
-***
-886
-()
-80
-136
-174
-216
-254
-301
-348
-404
-473
-520
-555
-591
-619
-()
-79
-117
-152
-181
-212
-241
-272
-307
-345
-***
-1267
-()
-78
-116
-***
-1042
-***
-1764
-()
-74
-87
-100
-114
-126
-()
-73
-95
-111
-128
-149
-165
-178
-***
-997
-()
-70
-119
-166
-204
-246
-285
-339
-386
-439
-485
-532
-557
-583
-606
-625
-640
-646
-()
-66
-104
-142
-171
-202
-231
-262
-295
-333
-***
-1286
-()
-62
-86
-108
-124
-139
-159
-175
-188
-***
-1130
-()
-61
-72
-88
-113
-134
-148
-160
-179
-208
-228
-***
-1245
-()
-57
-106
-157
-193
-236
-273
-328
-374
-427
-474
-519
-552
-()
-56
-***
-969
-()
-55
-109
-153
-197
-233
-277
-325
-377
-424
-476
-517
-553
-577
-602
-621
-637
-645
-()
-54
-110
-154
-196
-234
-276
-324
-379
-425
-477
-522
-561
-595
-624
-642
-()
-53
-90
-129
-***
-1190
-()
-52
-***
-941
-()
-50
-59
-75
-99
-121
-137
-150
-169
-***
-945
-***
-1706
-()
-49
-69
-85
-101
-125
-145
-158
-170
-189
-218
-***
-992
-***
-1781
-()
-48
-68
-122
-163
-207
-244
-288
-336
-390
-436
-489
-529
-()
-45
-96
-143
-187
-223
-267
-310
-360
-***
-1409
-()
-41
-60
-82
-98
-115
-138
-155
-168
-180
-199
-()
-39
-67
-123
-164
-206
-243
-287
-337
-392
-437
-488
-534
-571
-604
-630
-()
-36
-43
-***
-1170
-()
-26
-***
-1107
-()
-24
-40
-***
-817
-()
-20
-46
-97
-144
-186
-224
-266
-311
-365
-412
-463
-507
-542
-567
-593
-614
-631
-()
-19
-33
-***
-1185
-***
-1694
-()
-18
-44
-94
-146
-184
-226
-263
-314
-361
-415
-460
-509
-546
-573
-597
-618
-634
-644
-()
-17
-31
-65
-102
-***
-807
-()
-16
-34
-84
-133
-177
-213
-256
-298
-352
-400
-453
-496
-()
-14
-37
-81
-135
-173
-217
-253
-300
-347
-403
-448
-499
-537
-563
-587
-611
-628
-641
-()
-13
-22
-42
-***
-691
-()
-12
-47
-93
-147
-183
-227
-264
-313
-362
-417
-462
-510
-544
-581
-612
-636
-()
-11
-29
-***
-760
-()
-10
-30
-63
-***
-1121
-()
-9
-35
-83
-132
-176
-214
-257
-297
-353
-399
-451
-497
-***
-1304
-()
-8
-25
-64
-***
-828
-()
-7
-23
-51
-89
-***
-1174
-***
-1300
-()
-6
-28
-71
-120
-167
-203
-247
-284
-340
-387
-440
-486
-528
-565
-600
-626
-()
-5
-***
-668
-()
-4
-32
-77
-***
-1032
-()
-3
-15
-38
-76
-***
-1314
-()
-2
-27
-***
-1053
+205
 ()
 1
-21
-58
-107
-156
-194
-237
-274
-322
 ***
-1151
+39
 ***
-1777
+95
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/stdout b/tests/long/10.mcf/ref/sparc/linux/simple-timing/stdout
index 272fc2ce1..51a3ec215 100644
--- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/stdout
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/stdout
@@ -4,19 +4,15 @@ by  Andreas Loebel
 Copyright (c) 1998,1999   ZIB Berlin
 All Rights Reserved.
 
-nodes                      : 1800
-active arcs                : 8190
-simplex iterations         : 6837
-flow value                 : 12860044181
-new implicit arcs          : 300000
-active arcs                : 308190
-simplex iterations         : 11843
-flow value                 : 9360043604
-new implicit arcs          : 22787
-active arcs                : 330977
-simplex iterations         : 11931
-flow value                 : 9360043512
-checksum                   : 798014
+nodes                      : 500
+active arcs                : 1905
+simplex iterations         : 1502
+flow value                 : 4990014995
+new implicit arcs          : 23867
+active arcs                : 25772
+simplex iterations         : 2663
+flow value                 : 3080014995
+checksum                   : 68389
 optimal
 M5 Simulator System
 
@@ -25,9 +21,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled May 15 2007 13:02:31
-M5 started Tue May 15 15:05:32 2007
+M5 compiled Jun 21 2007 21:15:48
+M5 started Fri Jun 22 02:01:52 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-timing tests/run.py long/10.mcf/sparc/linux/simple-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 2669284585000 because target called exit()
+Exiting @ tick 359340764000 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
index f2617931a..f112ef506 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
@@ -21,6 +21,7 @@ SQEntries=32
 SSITSize=1024
 activity=0
 backComSize=5
+cachePorts=200
 choiceCtrBits=2
 choicePredictorSize=8192
 clock=500
@@ -74,6 +75,15 @@ renameToFetchDelay=1
 renameToIEWDelay=2
 renameToROBDelay=1
 renameWidth=8
+smtCommitPolicy=RoundRobin
+smtFetchPolicy=SingleThread
+smtIQPolicy=Partitioned
+smtIQThreshold=100
+smtLSQPolicy=Partitioned
+smtLSQThreshold=100
+smtNumFetchingThreads=1
+smtROBPolicy=Partitioned
+smtROBThreshold=100
 squashWidth=8
 system=system
 trapLatency=13
@@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side
 [system.cpu.dcache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -261,6 +272,7 @@ opLat=3
 [system.cpu.icache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0]
 [system.cpu.l2cache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
index e1bed0c51..2ac86dd84 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
@@ -1,39 +1,40 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                          524                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      1590                       # Number of BTB lookups
+global.BPredUnit.BTBHits                          522                       # Number of BTB hits
+global.BPredUnit.BTBLookups                      1584                       # Number of BTB lookups
 global.BPredUnit.RASInCorrect                      57                       # Number of incorrect RAS predictions.
 global.BPredUnit.condIncorrect                    422                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   1093                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         1843                       # Number of BP lookups
+global.BPredUnit.condPredicted                   1088                       # Number of conditional branches predicted
+global.BPredUnit.lookups                         1837                       # Number of BP lookups
 global.BPredUnit.usedRAS                          241                       # Number of times the RAS was used to get a target.
-host_inst_rate                                   7145                       # Simulator instruction rate (inst/s)
-host_seconds                                     0.79                       # Real time elapsed on the host
-host_tick_rate                                5828052                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  39303                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 153768                       # Number of bytes of host memory used
+host_seconds                                     0.14                       # Real time elapsed on the host
+host_tick_rate                               32016268                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                 17                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores               127                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  1876                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 1144                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                  1874                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 1142                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        5623                       # Number of instructions simulated
 sim_seconds                                  0.000005                       # Number of seconds simulated
-sim_ticks                                     4588000                       # Number of ticks simulated
+sim_ticks                                     4589500                       # Number of ticks simulated
 system.cpu.commit.COM:branches                    862                       # Number of branches committed
 system.cpu.commit.COM:bw_lim_events               104                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples         8514                      
+system.cpu.commit.COM:committed_per_cycle.samples         8521                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0         6195   7276.25%           
-                               1         1158   1360.11%           
-                               2          469    550.86%           
-                               3          176    206.72%           
-                               4          131    153.86%           
-                               5           99    116.28%           
-                               6          109    128.02%           
-                               7           73     85.74%           
-                               8          104    122.15%           
+                               0         6200   7276.14%           
+                               1         1160   1361.34%           
+                               2          469    550.40%           
+                               3          177    207.72%           
+                               4          131    153.74%           
+                               5           98    115.01%           
+                               6          109    127.92%           
+                               7           73     85.67%           
+                               8          104    122.05%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -45,27 +46,27 @@ system.cpu.commit.COM:swp_count                     0                       # Nu
 system.cpu.commit.branchMispredicts               350                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts           5640                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls              17                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts            3588                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts            3571                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                        5623                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                  5623                       # Number of Instructions Simulated
-system.cpu.cpi                               1.635604                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         1.635604                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               1475                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  5928.571429                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency         5385                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   1342                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         788500                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.090169                       # miss rate for ReadReq accesses
+system.cpu.cpi                               1.636315                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                         1.636315                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               1470                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  5932.330827                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency         5380                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   1337                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency         789000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.090476                       # miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_misses                  133                       # number of ReadReq misses
 system.cpu.dcache.ReadReq_mshr_hits                33                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       538500                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.067797                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_latency       538000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.068027                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses             100                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses               812                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  4501.457726                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency  4504.373178                       # average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency  5116.438356                       # average WriteReq mshr miss latency
 system.cpu.dcache.WriteReq_hits                   469                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       1544000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency       1545000                       # number of WriteReq miss cycles
 system.cpu.dcache.WriteReq_miss_rate         0.422414                       # miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_misses                 343                       # number of WriteReq misses
 system.cpu.dcache.WriteReq_mshr_hits              270                       # number of WriteReq MSHR hits
@@ -74,37 +75,37 @@ system.cpu.dcache.WriteReq_mshr_miss_rate     0.089901                       # m
 system.cpu.dcache.WriteReq_mshr_misses             73                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  10.468208                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  10.439306                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                2287                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  4900.210084                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  5271.676301                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1811                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         2332500                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.208133                       # miss rate for demand accesses
+system.cpu.dcache.demand_accesses                2282                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  4903.361345                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  5268.786127                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                    1806                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         2334000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.208589                       # miss rate for demand accesses
 system.cpu.dcache.demand_misses                   476                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                303                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency       912000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.075645                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_latency       911500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.075811                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses              173                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               2287                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  4900.210084                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  5271.676301                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses               2282                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  4903.361345                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  5268.786127                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1811                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        2332500                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.208133                       # miss rate for overall accesses
+system.cpu.dcache.overall_hits                   1806                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        2334000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.208589                       # miss rate for overall accesses
 system.cpu.dcache.overall_misses                  476                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits               303                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency       912000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.075645                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_latency       911500                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.075811                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses             173                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -120,88 +121,88 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                    173                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                112.670676                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     1811                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                112.669258                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     1806                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
 system.cpu.decode.DECODE:BlockedCycles            389                       # Number of cycles decode is blocked
 system.cpu.decode.DECODE:BranchMispred             75                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           144                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           10499                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:BranchResolved           143                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts           10466                       # Number of instructions handled by decode
 system.cpu.decode.DECODE:IdleCycles              6230                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               1848                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles             682                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:RunCycles               1855                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles             679                       # Number of cycles decode is squashing
 system.cpu.decode.DECODE:SquashedInsts            228                       # Number of squashed instructions handled by decode
 system.cpu.decode.DECODE:UnblockCycles             48                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        1843                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      1471                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          3451                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   269                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          11450                       # Number of instructions fetch has processed
+system.cpu.fetch.Branches                        1837                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                      1469                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                          3456                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   267                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                          11417                       # Number of instructions fetch has processed
 system.cpu.fetch.SquashCycles                     455                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.200391                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               1471                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches                765                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        1.244971                       # Number of inst fetches per cycle
+system.cpu.fetch.branchRate                  0.199652                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles               1469                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches                763                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        1.240843                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples                9197                      
+system.cpu.fetch.rateDist.samples                9201                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0         7219   7849.30%           
-                               1          167    181.58%           
-                               2          147    159.83%           
-                               3          129    140.26%           
-                               4          200    217.46%           
-                               5          139    151.14%           
-                               6          181    196.80%           
-                               7           99    107.64%           
-                               8          916    995.98%           
+                               0         7216   7842.63%           
+                               1          168    182.59%           
+                               2          148    160.85%           
+                               3          136    147.81%           
+                               4          214    232.58%           
+                               5          138    149.98%           
+                               6          177    192.37%           
+                               7           95    103.25%           
+                               8          909    987.94%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses               1471                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5375.757576                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4524.038462                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   1141                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        1774000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.224337                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_accesses               1469                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  5381.818182                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4530.448718                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   1139                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        1776000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.224643                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                  330                       # number of ReadReq misses
 system.cpu.icache.ReadReq_mshr_hits                18                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      1411500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.212101                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_latency      1413500                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.212389                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             312                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                   3.657051                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                   3.650641                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                1471                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5375.757576                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4524.038462                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    1141                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         1774000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.224337                       # miss rate for demand accesses
+system.cpu.icache.demand_accesses                1469                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  5381.818182                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4530.448718                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                    1139                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency         1776000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.224643                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                   330                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                 18                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      1411500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.212101                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_latency      1413500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.212389                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              312                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               1471                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5375.757576                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4524.038462                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses               1469                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency  5381.818182                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4530.448718                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   1141                       # number of overall hits
-system.cpu.icache.overall_miss_latency        1774000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.224337                       # miss rate for overall accesses
+system.cpu.icache.overall_hits                   1139                       # number of overall hits
+system.cpu.icache.overall_miss_latency        1776000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.224643                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                  330                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                18                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      1411500                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.212101                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_latency      1413500                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.212389                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             312                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -217,39 +218,39 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    312                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                165.938349                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     1141                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                165.921810                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     1139                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                            2475                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     1148                       # Number of branches executed
+system.cpu.idleCycles                            2474                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                     1144                       # Number of branches executed
 system.cpu.iew.EXEC:nop                            40                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.837338                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         2524                       # number of memory reference insts executed
+system.cpu.iew.EXEC:rate                     0.835018                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         2519                       # number of memory reference insts executed
 system.cpu.iew.EXEC:stores                        977                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                      5205                       # num instructions consuming a value
-system.cpu.iew.WB:count                          7402                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.742747                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                      5193                       # num instructions consuming a value
+system.cpu.iew.WB:count                          7387                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.742923                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      3866                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.804828                       # insts written-back per cycle
-system.cpu.iew.WB:sent                           7467                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  374                       # Number of branch mispredicts detected at execute
+system.cpu.iew.WB:producers                      3858                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.802848                       # insts written-back per cycle
+system.cpu.iew.WB:sent                           7452                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                  373                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                       4                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  1876                       # Number of dispatched load instructions
+system.cpu.iew.iewDispLoadInsts                  1874                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                 22                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts               315                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 1144                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts                9245                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  1547                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               280                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                  7701                       # Number of executed instructions
+system.cpu.iew.iewDispSquashedInsts               302                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                 1142                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts                9228                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                  1542                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               285                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                  7683                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                    682                       # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles                    679                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                     0                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
@@ -259,17 +260,17 @@ system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Nu
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.memOrderViolation           63                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads          897                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores          332                       # Number of stores squashed
+system.cpu.iew.lsq.thread.0.squashedLoads          895                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores          330                       # Number of stores squashed
 system.cpu.iew.memOrderViolationEvents             63                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          263                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedNotTakenIncorrect          262                       # Number of branches that were predicted not taken incorrectly
 system.cpu.iew.predictedTakenIncorrect            111                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.611395                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.611395                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                    7981                       # Type of FU issued
+system.cpu.ipc                               0.611129                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.611129                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0                    7968                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                       No_OpClass            2      0.03%            # Type of FU issued
-                          IntAlu         5322     66.68%            # Type of FU issued
+                          IntAlu         5314     66.69%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            2      0.03%            # Type of FU issued
@@ -278,13 +279,13 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         1662     20.82%            # Type of FU issued
-                        MemWrite          992     12.43%            # Type of FU issued
+                         MemRead         1659     20.82%            # Type of FU issued
+                        MemWrite          990     12.42%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                   106                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.013282                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt                   105                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate             0.013178                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                       No_OpClass            0      0.00%            # attempts to use FU when none available
                           IntAlu            0      0.00%            # attempts to use FU when none available
@@ -296,41 +297,41 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                        FloatMult            0      0.00%            # attempts to use FU when none available
                         FloatDiv            0      0.00%            # attempts to use FU when none available
                        FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           71     66.98%            # attempts to use FU when none available
-                        MemWrite           35     33.02%            # attempts to use FU when none available
+                         MemRead           70     66.67%            # attempts to use FU when none available
+                        MemWrite           35     33.33%            # attempts to use FU when none available
                        IprAccess            0      0.00%            # attempts to use FU when none available
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples         9197                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples         9201                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0         5952   6471.68%           
-                               1         1107   1203.65%           
-                               2          919    999.24%           
-                               3          442    480.59%           
-                               4          375    407.74%           
-                               5          250    271.83%           
-                               6          115    125.04%           
-                               7           26     28.27%           
-                               8           11     11.96%           
+                               0         5952   6468.86%           
+                               1         1111   1207.48%           
+                               2          928   1008.59%           
+                               3          433    470.60%           
+                               4          378    410.82%           
+                               5          251    272.80%           
+                               6          111    120.64%           
+                               7           27     29.34%           
+                               8           10     10.87%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.867783                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                       9183                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                      7981                       # Number of instructions issued
+system.cpu.iq.ISSUE:rate                     0.865993                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                       9166                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                      7968                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                  22                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined            3171                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsExamined            3154                       # Number of squashed instructions iterated over during squash; mainly for profiling
 system.cpu.iq.iqSquashedInstsIssued                22                       # Number of squashed instructions issued
 system.cpu.iq.iqSquashedNonSpecRemoved              5                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined         2045                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.iq.iqSquashedOperandsExamined         2035                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses               483                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4639.751553                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2463.768116                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       2241000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_avg_miss_latency  4644.927536                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2467.908903                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency       2243500                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses                 483                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      1190000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency      1192000                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_mshr_misses            483                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@@ -342,29 +343,29 @@ system.cpu.l2cache.blocked_cycles_no_mshrs            0                       #
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                483                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4639.751553                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2463.768116                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_miss_latency  4644.927536                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2467.908903                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        2241000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        2243500                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  483                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      1190000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency      1192000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses             483                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               483                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4639.751553                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2463.768116                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_miss_latency  4644.927536                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2467.908903                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       2241000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       2243500                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 483                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      1190000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency      1192000                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses            483                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -381,27 +382,27 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.sampled_refs                   483                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               278.222582                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               278.204751                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                             9197                       # number of cpu cycles simulated
+system.cpu.numCycles                             9201                       # number of cpu cycles simulated
 system.cpu.rename.RENAME:BlockCycles               15                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           4051                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IdleCycles              6383                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:IdleCycles              6382                       # Number of cycles rename is idle
 system.cpu.rename.RENAME:LSQFullEvents             70                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:RenameLookups          12854                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           10031                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands         7485                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               1746                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles             682                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:RenameLookups          12837                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts           10018                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands         7477                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles               1754                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles             679                       # Number of cycles rename is squashing
 system.cpu.rename.RENAME:UnblockCycles            101                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps              3434                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:UndoneMaps              3426                       # Number of HB maps that are undone due to squashing
 system.cpu.rename.RENAME:serializeStallCycles          270                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts           26                       # count of serializing insts renamed
 system.cpu.rename.RENAME:skidInsts                380                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts           20                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                              25                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.timesIdled                              26                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls              17                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
index d935401d2..142cb9695 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
@@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:06:20
-M5 started Sun Jun 10 14:22:32 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
+M5 compiled Jun 21 2007 21:25:27
+M5 started Fri Jun 22 00:04:38 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 4588000 because target called exit()
+Exiting @ tick 4589500 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
index e3080f9e5..36a50c983 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
@@ -21,6 +21,7 @@ SQEntries=32
 SSITSize=1024
 activity=0
 backComSize=5
+cachePorts=200
 choiceCtrBits=2
 choicePredictorSize=8192
 clock=500
@@ -74,6 +75,15 @@ renameToFetchDelay=1
 renameToIEWDelay=2
 renameToROBDelay=1
 renameWidth=8
+smtCommitPolicy=RoundRobin
+smtFetchPolicy=SingleThread
+smtIQPolicy=Partitioned
+smtIQThreshold=100
+smtLSQPolicy=Partitioned
+smtLSQThreshold=100
+smtNumFetchingThreads=1
+smtROBPolicy=Partitioned
+smtROBThreshold=100
 squashWidth=8
 system=system
 trapLatency=13
@@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side
 [system.cpu.dcache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -261,6 +272,7 @@ opLat=3
 [system.cpu.icache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0]
 [system.cpu.l2cache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
index 6dd4c291d..d400dcd22 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
@@ -8,9 +8,10 @@ global.BPredUnit.condIncorrect                    208                       # Nu
 global.BPredUnit.condPredicted                    376                       # Number of conditional branches predicted
 global.BPredUnit.lookups                          738                       # Number of BP lookups
 global.BPredUnit.usedRAS                          140                       # Number of times the RAS was used to get a target.
-host_inst_rate                                   8881                       # Simulator instruction rate (inst/s)
-host_seconds                                     0.27                       # Real time elapsed on the host
-host_tick_rate                                7632084                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  39805                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 153128                       # Number of bytes of host memory used
+host_seconds                                     0.06                       # Real time elapsed on the host
+host_tick_rate                               34110715                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                  8                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                 7                       # Number of conflicting stores.
 memdepunit.memDep.insertedLoads                   608                       # Number of loads inserted to the mem dependence unit.
@@ -18,22 +19,22 @@ memdepunit.memDep.insertedStores                  357                       # Nu
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        2387                       # Number of instructions simulated
 sim_seconds                                  0.000002                       # Number of seconds simulated
-sim_ticks                                     2053000                       # Number of ticks simulated
+sim_ticks                                     2055000                       # Number of ticks simulated
 system.cpu.commit.COM:branches                    396                       # Number of branches committed
 system.cpu.commit.COM:bw_lim_events                41                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples         3906                      
+system.cpu.commit.COM:committed_per_cycle.samples         3910                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0         2949   7549.92%           
-                               1          266    681.00%           
-                               2          333    852.53%           
-                               3          131    335.38%           
-                               4           74    189.45%           
-                               5           64    163.85%           
-                               6           29     74.24%           
-                               7           19     48.64%           
-                               8           41    104.97%           
+                               0         2950   7544.76%           
+                               1          266    680.31%           
+                               2          336    859.34%           
+                               3          131    335.04%           
+                               4           76    194.37%           
+                               5           65    166.24%           
+                               6           27     69.05%           
+                               7           18     46.04%           
+                               8           41    104.86%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -48,17 +49,17 @@ system.cpu.commit.commitNonSpecStalls               4                       # Th
 system.cpu.commit.commitSquashedInsts             978                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                        2387                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                  2387                       # Number of Instructions Simulated
-system.cpu.cpi                               1.721408                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         1.721408                       # CPI: Total CPI of All Threads
+system.cpu.cpi                               1.723083                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                         1.723083                       # CPI: Total CPI of All Threads
 system.cpu.dcache.ReadReq_accesses                514                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  5456.521739                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency  4737.288136                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency  5391.304348                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  4669.491525                       # average ReadReq mshr miss latency
 system.cpu.dcache.ReadReq_hits                    445                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         376500                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency         372000                       # number of ReadReq miss cycles
 system.cpu.dcache.ReadReq_miss_rate          0.134241                       # miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_misses                   69                       # number of ReadReq misses
 system.cpu.dcache.ReadReq_mshr_hits                10                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       279500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency       275500                       # number of ReadReq MSHR miss cycles
 system.cpu.dcache.ReadReq_mshr_miss_rate     0.114786                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses              59                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses               294                       # number of WriteReq accesses(hits+misses)
@@ -81,29 +82,29 @@ system.cpu.dcache.blocked_cycles_no_mshrs            0                       # n
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
 system.cpu.dcache.demand_accesses                 808                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  5564.285714                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  4821.428571                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_miss_latency  5532.142857                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  4773.809524                       # average overall mshr miss latency
 system.cpu.dcache.demand_hits                     668                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency          779000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency          774500                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_rate           0.173267                       # miss rate for demand accesses
 system.cpu.dcache.demand_misses                   140                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                 56                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency       405000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency       401000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_rate      0.103960                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses               84                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.dcache.overall_accesses                808                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  5564.285714                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  4821.428571                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_miss_latency  5532.142857                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  4773.809524                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_hits                    668                       # number of overall hits
-system.cpu.dcache.overall_miss_latency         779000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency         774500                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_rate          0.173267                       # miss rate for overall accesses
 system.cpu.dcache.overall_misses                  140                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                56                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency       405000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency       401000                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_rate     0.103960                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses              84                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -120,7 +121,7 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                     84                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 51.851940                       # Cycle average of tags in use
+system.cpu.dcache.tagsinuse                 51.873008                       # Cycle average of tags in use
 system.cpu.dcache.total_refs                      668                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
@@ -129,43 +130,43 @@ system.cpu.decode.DECODE:BranchMispred             81                       # Nu
 system.cpu.decode.DECODE:BranchResolved           123                       # Number of times decode resolved a branch
 system.cpu.decode.DECODE:DecodedInsts            4033                       # Number of instructions handled by decode
 system.cpu.decode.DECODE:IdleCycles              3045                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles                767                       # Number of cycles decode is running
+system.cpu.decode.DECODE:RunCycles                771                       # Number of cycles decode is running
 system.cpu.decode.DECODE:SquashCycles             202                       # Number of cycles decode is squashing
 system.cpu.decode.DECODE:SquashedInsts            298                       # Number of squashed instructions handled by decode
 system.cpu.fetch.Branches                         738                       # Number of branches that fetch encountered
 system.cpu.fetch.CacheLines                       654                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          1440                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.Cycles                          1444                       # Number of cycles fetch has run and was not squashing or blocked
 system.cpu.fetch.IcacheSquashes                   120                       # Number of outstanding Icache misses that were squashed
 system.cpu.fetch.Insts                           4685                       # Number of instructions fetch has processed
 system.cpu.fetch.SquashCycles                     218                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.179606                       # Number of branch fetches per cycle
+system.cpu.fetch.branchRate                  0.179431                       # Number of branch fetches per cycle
 system.cpu.fetch.icacheStallCycles                654                       # Number of cycles fetch is stalled on an Icache miss
 system.cpu.fetch.predictedBranches                272                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        1.140180                       # Number of inst fetches per cycle
+system.cpu.fetch.rate                        1.139071                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples                4109                      
+system.cpu.fetch.rateDist.samples                4113                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0         3325   8091.99%           
-                               1           32     77.88%           
-                               2           74    180.09%           
-                               3           53    128.99%           
-                               4           99    240.93%           
-                               5           49    119.25%           
-                               6           38     92.48%           
-                               7           35     85.18%           
-                               8          404    983.21%           
+                               0         3325   8084.12%           
+                               1           32     77.80%           
+                               2           80    194.51%           
+                               3           50    121.57%           
+                               4           99    240.70%           
+                               5           52    126.43%           
+                               6           39     94.82%           
+                               7           35     85.10%           
+                               8          401    974.96%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
 system.cpu.icache.ReadReq_accesses                654                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5296.019900                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4553.763441                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_miss_latency  5298.507463                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4556.451613                       # average ReadReq mshr miss latency
 system.cpu.icache.ReadReq_hits                    453                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        1064500                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency        1065000                       # number of ReadReq miss cycles
 system.cpu.icache.ReadReq_miss_rate          0.307339                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                  201                       # number of ReadReq misses
 system.cpu.icache.ReadReq_mshr_hits                15                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency       847000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency       847500                       # number of ReadReq MSHR miss cycles
 system.cpu.icache.ReadReq_mshr_miss_rate     0.284404                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             186                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@@ -177,29 +178,29 @@ system.cpu.icache.blocked_cycles_no_mshrs            0                       # n
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
 system.cpu.icache.demand_accesses                 654                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5296.019900                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4553.763441                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_miss_latency  5298.507463                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4556.451613                       # average overall mshr miss latency
 system.cpu.icache.demand_hits                     453                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         1064500                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         1065000                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_rate           0.307339                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                   201                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                 15                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency       847000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency       847500                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_rate      0.284404                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              186                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.icache.overall_accesses                654                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5296.019900                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4553.763441                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_miss_latency  5298.507463                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4556.451613                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_hits                    453                       # number of overall hits
-system.cpu.icache.overall_miss_latency        1064500                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        1065000                       # number of overall miss cycles
 system.cpu.icache.overall_miss_rate          0.307339                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                  201                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                15                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency       847000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency       847500                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_rate     0.284404                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             186                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -216,14 +217,14 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    186                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                106.237740                       # Cycle average of tags in use
+system.cpu.icache.tagsinuse                106.293956                       # Cycle average of tags in use
 system.cpu.icache.total_refs                      453                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.idleCycles                            2992                       # Total number of cycles that the CPU has spent unscheduled due to idling
 system.cpu.iew.EXEC:branches                      501                       # Number of branches executed
 system.cpu.iew.EXEC:nop                           234                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.727184                       # Inst execution rate
+system.cpu.iew.EXEC:rate                     0.726477                       # Inst execution rate
 system.cpu.iew.EXEC:refs                          878                       # number of memory reference insts executed
 system.cpu.iew.EXEC:stores                        333                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
@@ -233,7 +234,7 @@ system.cpu.iew.WB:fanout                     0.799637                       # av
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:producers                      1321                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.709175                       # insts written-back per cycle
+system.cpu.iew.WB:rate                       0.708485                       # insts written-back per cycle
 system.cpu.iew.WB:sent                           2931                       # cumulative count of insts sent to commit
 system.cpu.iew.branchMispredicts                  135                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                       0                       # Number of cycles IEW is blocking
@@ -263,8 +264,8 @@ system.cpu.iew.lsq.thread.0.squashedStores           63                       #
 system.cpu.iew.memOrderViolationEvents             10                       # Number of memory order violations
 system.cpu.iew.predictedNotTakenIncorrect           98                       # Number of branches that were predicted not taken incorrectly
 system.cpu.iew.predictedTakenIncorrect             37                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.580920                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.580920                       # IPC: Total IPC of All Threads
+system.cpu.ipc                               0.580355                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.580355                       # IPC: Total IPC of All Threads
 system.cpu.iq.ISSUE:FU_type_0                    3075                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                       No_OpClass            0      0.00%            # Type of FU issued
@@ -301,21 +302,21 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples         4109                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples         4113                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0         2849   6933.56%           
-                               1          475   1156.00%           
-                               2          270    657.09%           
-                               3          217    528.11%           
-                               4          159    386.96%           
-                               5           86    209.30%           
-                               6           34     82.75%           
-                               7           13     31.64%           
-                               8            6     14.60%           
+                               0         2848   6924.39%           
+                               1          479   1164.60%           
+                               2          276    671.04%           
+                               3          213    517.87%           
+                               4          158    384.15%           
+                               5           86    209.09%           
+                               6           34     82.66%           
+                               7           13     31.61%           
+                               8            6     14.59%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.748357                       # Inst issue rate
+system.cpu.iq.ISSUE:rate                     0.747629                       # Inst issue rate
 system.cpu.iq.iqInstsAdded                       3330                       # Number of instructions added to the IQ (excludes non-spec)
 system.cpu.iq.iqInstsIssued                      3075                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                   7                       # Number of non-speculative instructions added to the IQ
@@ -323,9 +324,9 @@ system.cpu.iq.iqSquashedInstsExamined             790                       # Nu
 system.cpu.iq.iqSquashedNonSpecRemoved              3                       # Number of squashed non-spec instructions that were removed
 system.cpu.iq.iqSquashedOperandsExamined          409                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses               270                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4522.222222                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency  4509.259259                       # average ReadReq miss latency
 system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2388.888889                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       1221000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency       1217500                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses                 270                       # number of ReadReq misses
 system.cpu.l2cache.ReadReq_mshr_miss_latency       645000                       # number of ReadReq MSHR miss cycles
@@ -340,10 +341,10 @@ system.cpu.l2cache.blocked_cycles_no_mshrs            0                       #
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                270                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4522.222222                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency  4509.259259                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency  2388.888889                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        1221000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        1217500                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  270                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
@@ -354,11 +355,11 @@ system.cpu.l2cache.fast_writes                      0                       # nu
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               270                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4522.222222                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency  4509.259259                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency  2388.888889                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       1221000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       1217500                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 270                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
@@ -379,18 +380,18 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.sampled_refs                   270                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               158.236294                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               158.313436                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                             4109                       # number of cpu cycles simulated
+system.cpu.numCycles                             4113                       # number of cpu cycles simulated
 system.cpu.rename.RENAME:CommittedMaps           1768                       # Number of HB maps that are committed
 system.cpu.rename.RENAME:IdleCycles              3116                       # Number of cycles rename is idle
 system.cpu.rename.RENAME:LSQFullEvents              1                       # Number of times rename has blocked due to LSQ full
 system.cpu.rename.RENAME:RenameLookups           4416                       # Number of register rename lookups that rename has made
 system.cpu.rename.RENAME:RenamedInsts            3886                       # Number of instructions processed by rename
 system.cpu.rename.RENAME:RenamedOperands         2777                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles                696                       # Number of cycles rename is running
+system.cpu.rename.RENAME:RunCycles                700                       # Number of cycles rename is running
 system.cpu.rename.RENAME:SquashCycles             202                       # Number of cycles rename is squashing
 system.cpu.rename.RENAME:UnblockCycles              6                       # Number of cycles rename is unblocking
 system.cpu.rename.RENAME:UndoneMaps              1009                       # Number of HB maps that are undone due to squashing
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
index 60520dc0c..c276fcaea 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
@@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:06:20
-M5 started Sun Jun 10 14:22:36 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
+M5 compiled Jun 21 2007 21:25:27
+M5 started Fri Jun 22 00:04:44 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 2053000 because target called exit()
+Exiting @ tick 2055000 because target called exit()
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
index e9dddb505..f03824f95 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
@@ -21,6 +21,7 @@ SQEntries=32
 SSITSize=1024
 activity=0
 backComSize=5
+cachePorts=200
 choiceCtrBits=2
 choicePredictorSize=8192
 clock=500
@@ -74,6 +75,15 @@ renameToFetchDelay=1
 renameToIEWDelay=2
 renameToROBDelay=1
 renameWidth=8
+smtCommitPolicy=RoundRobin
+smtFetchPolicy=SingleThread
+smtIQPolicy=Partitioned
+smtIQThreshold=100
+smtLSQPolicy=Partitioned
+smtLSQThreshold=100
+smtNumFetchingThreads=1
+smtROBPolicy=Partitioned
+smtROBThreshold=100
 squashWidth=8
 system=system
 trapLatency=13
@@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side
 [system.cpu.dcache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -261,6 +272,7 @@ opLat=3
 [system.cpu.icache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0]
 [system.cpu.l2cache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
index dc1fcc248..39a686d6b 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
@@ -1,47 +1,48 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                          674                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      3410                       # Number of BTB lookups
-global.BPredUnit.RASInCorrect                     118                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                   1115                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   2318                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         3964                       # Number of BP lookups
-global.BPredUnit.usedRAS                          532                       # Number of times the RAS was used to get a target.
-host_inst_rate                                   8215                       # Simulator instruction rate (inst/s)
-host_seconds                                     1.37                       # Real time elapsed on the host
-host_tick_rate                                4009351                       # Simulator tick rate (ticks/s)
-memdepunit.memDep.conflictingLoads                 19                       # Number of conflicting loads.
-memdepunit.memDep.conflictingLoads                 18                       # Number of conflicting loads.
-memdepunit.memDep.conflictingStores                54                       # Number of conflicting stores.
+global.BPredUnit.BTBHits                          696                       # Number of BTB hits
+global.BPredUnit.BTBLookups                      3414                       # Number of BTB lookups
+global.BPredUnit.RASInCorrect                     125                       # Number of incorrect RAS predictions.
+global.BPredUnit.condIncorrect                   1124                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted                   2315                       # Number of conditional branches predicted
+global.BPredUnit.lookups                         3940                       # Number of BP lookups
+global.BPredUnit.usedRAS                          525                       # Number of times the RAS was used to get a target.
+host_inst_rate                                  52706                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 154396                       # Number of bytes of host memory used
+host_seconds                                     0.21                       # Real time elapsed on the host
+host_tick_rate                               25698682                       # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads                 16                       # Number of conflicting loads.
+memdepunit.memDep.conflictingLoads                 16                       # Number of conflicting loads.
+memdepunit.memDep.conflictingStores                53                       # Number of conflicting stores.
 memdepunit.memDep.conflictingStores                59                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  1925                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedLoads                  1898                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 1088                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                  1934                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                  1903                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 1082                       # Number of stores inserted to the mem dependence unit.
 memdepunit.memDep.insertedStores                 1090                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                       11247                       # Number of instructions simulated
 sim_seconds                                  0.000005                       # Number of seconds simulated
-sim_ticks                                     5490000                       # Number of ticks simulated
+sim_ticks                                     5491500                       # Number of ticks simulated
 system.cpu.commit.COM:branches                   1724                       # Number of branches committed
 system.cpu.commit.COM:branches_0                  862                       # Number of branches committed
 system.cpu.commit.COM:branches_1                  862                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events               165                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events               168                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:bw_limited_0                  0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:bw_limited_1                  0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples        10929                      
+system.cpu.commit.COM:committed_per_cycle.samples        10926                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0         6410   5865.13%           
-                               1         2019   1847.38%           
-                               2          999    914.08%           
-                               3          454    415.41%           
-                               4          300    274.50%           
-                               5          246    225.09%           
-                               6          200    183.00%           
-                               7          136    124.44%           
-                               8          165    150.97%           
+                               0         6353   5814.57%           
+                               1         2078   1901.89%           
+                               2          996    911.59%           
+                               3          472    432.00%           
+                               4          296    270.91%           
+                               5          241    220.57%           
+                               6          192    175.73%           
+                               7          130    118.98%           
+                               8          168    153.76%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -60,133 +61,133 @@ system.cpu.commit.COM:refs_1                     1791                       # Nu
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
 system.cpu.commit.COM:swp_count_0                   0                       # Number of s/w prefetches committed
 system.cpu.commit.COM:swp_count_1                   0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               874                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts               885                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts          11281                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls              34                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts            7769                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts            7777                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts_0                      5623                       # Number of Instructions Simulated
 system.cpu.committedInsts_1                      5624                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                 11247                       # Number of Instructions Simulated
-system.cpu.cpi_0                             1.952516                       # CPI: Cycles Per Instruction
-system.cpu.cpi_1                             1.952169                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         0.976171                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               2969                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_accesses_0             2969                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency_0  7072.992701                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0  6972.361809                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   2695                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_hits_0                 2695                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        1938000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_latency_0      1938000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate_0        0.092287                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                  274                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_misses_0                274                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                75                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_hits_0              75                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency      1387500                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_latency_0      1387500                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate_0     0.067026                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses             199                       # number of ReadReq MSHR misses
-system.cpu.dcache.ReadReq_mshr_misses_0           199                       # number of ReadReq MSHR misses
+system.cpu.cpi_0                             1.952872                       # CPI: Cycles Per Instruction
+system.cpu.cpi_1                             1.952525                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                         0.976349                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               2981                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_accesses_0             2981                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency_0  7040.892193                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0  6979.591837                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   2712                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits_0                 2712                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency        1894000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency_0      1894000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate_0        0.090238                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                  269                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_misses_0                269                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits                73                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_hits_0              73                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency      1368000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency_0      1368000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate_0     0.065750                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses             196                       # number of ReadReq MSHR misses
+system.cpu.dcache.ReadReq_mshr_misses_0           196                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses              1624                       # number of WriteReq accesses(hits+misses)
 system.cpu.dcache.WriteReq_accesses_0            1624                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency_0  5352.409639                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0  5859.589041                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                  1126                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_hits_0                1126                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       2665500                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_latency_0      2665500                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate_0       0.306650                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                 498                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_misses_0               498                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_hits              352                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_hits_0            352                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency       855500                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_latency_0       855500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_avg_miss_latency_0  5306.613226                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0  5852.739726                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits                  1125                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_hits_0                1125                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency       2648000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency_0      2648000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate_0       0.307266                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                 499                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_misses_0               499                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_hits              353                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_hits_0            353                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_miss_latency       854500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency_0       854500                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate_0     0.089901                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses            146                       # number of WriteReq MSHR misses
 system.cpu.dcache.WriteReq_mshr_misses_0          146                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  11.075362                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  11.219298                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                4593                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_accesses_0              4593                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses                4605                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses_0              4605                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_accesses_1                 0                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.dcache.demand_avg_miss_latency_0  5963.082902                       # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency_0  5914.062500                       # average overall miss latency
 system.cpu.dcache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency_0  6501.449275                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency_0  6498.538012                       # average overall mshr miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    3821                       # number of demand (read+write) hits
-system.cpu.dcache.demand_hits_0                  3821                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits                    3837                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits_0                  3837                       # number of demand (read+write) hits
 system.cpu.dcache.demand_hits_1                     0                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         4603500                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_latency_0       4603500                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency         4542000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency_0       4542000                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_latency_1             0                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_rate       <err: div-0>                       # miss rate for demand accesses
-system.cpu.dcache.demand_miss_rate_0         0.168082                       # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate_0         0.166775                       # miss rate for demand accesses
 system.cpu.dcache.demand_miss_rate_1     <err: div-0>                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   772                       # number of demand (read+write) misses
-system.cpu.dcache.demand_misses_0                 772                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses                   768                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses_0                 768                       # number of demand (read+write) misses
 system.cpu.dcache.demand_misses_1                   0                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                427                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_hits_0              427                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits                426                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits_0              426                       # number of demand (read+write) MSHR hits
 system.cpu.dcache.demand_mshr_hits_1                0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      2243000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_latency_0      2243000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency      2222500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency_0      2222500                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_rate  <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_miss_rate_0     0.075114                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate_0     0.074267                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              345                       # number of demand (read+write) MSHR misses
-system.cpu.dcache.demand_mshr_misses_0            345                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_misses              342                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_misses_0            342                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.demand_mshr_misses_1              0                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.mshr_cap_events_0                 0                       # number of times MSHR cap was activated
 system.cpu.dcache.mshr_cap_events_1                 0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               4593                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_accesses_0             4593                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses               4605                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses_0             4605                       # number of overall (read+write) accesses
 system.cpu.dcache.overall_accesses_1                0                       # number of overall (read+write) accesses
 system.cpu.dcache.overall_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.dcache.overall_avg_miss_latency_0  5963.082902                       # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency_0  5914.062500                       # average overall miss latency
 system.cpu.dcache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency_0  6501.449275                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency_0  6498.538012                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   3821                       # number of overall hits
-system.cpu.dcache.overall_hits_0                 3821                       # number of overall hits
+system.cpu.dcache.overall_hits                   3837                       # number of overall hits
+system.cpu.dcache.overall_hits_0                 3837                       # number of overall hits
 system.cpu.dcache.overall_hits_1                    0                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        4603500                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_latency_0      4603500                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency        4542000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency_0      4542000                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_latency_1            0                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_rate      <err: div-0>                       # miss rate for overall accesses
-system.cpu.dcache.overall_miss_rate_0        0.168082                       # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate_0        0.166775                       # miss rate for overall accesses
 system.cpu.dcache.overall_miss_rate_1    <err: div-0>                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  772                       # number of overall misses
-system.cpu.dcache.overall_misses_0                772                       # number of overall misses
+system.cpu.dcache.overall_misses                  768                       # number of overall misses
+system.cpu.dcache.overall_misses_0                768                       # number of overall misses
 system.cpu.dcache.overall_misses_1                  0                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits               427                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_hits_0             427                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits               426                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits_0             426                       # number of overall MSHR hits
 system.cpu.dcache.overall_mshr_hits_1               0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      2243000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_latency_0      2243000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency      2222500                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency_0      2222500                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_rate <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_miss_rate_0     0.075114                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate_0     0.074267                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             345                       # number of overall MSHR misses
-system.cpu.dcache.overall_mshr_misses_0           345                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_misses             342                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_misses_0           342                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_misses_1             0                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@@ -206,149 +207,149 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.replacements_0                    0                       # number of replacements
 system.cpu.dcache.replacements_1                    0                       # number of replacements
-system.cpu.dcache.sampled_refs                    345                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                    342                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                221.724795                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     3821                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                221.287284                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     3837                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
 system.cpu.dcache.writebacks_0                      0                       # number of writebacks
 system.cpu.dcache.writebacks_1                      0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles           1857                       # Number of cycles decode is blocked
-system.cpu.decode.DECODE:BranchMispred            251                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           346                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           21806                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles             14535                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               3658                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles            1498                       # Number of cycles decode is squashing
-system.cpu.decode.DECODE:SquashedInsts            351                       # Number of squashed instructions handled by decode
+system.cpu.decode.DECODE:BlockedCycles           1876                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BranchMispred            246                       # Number of times decode detected a branch misprediction
+system.cpu.decode.DECODE:BranchResolved           345                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts           21769                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles             14522                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles               3673                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles            1511                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:SquashedInsts            346                       # Number of squashed instructions handled by decode
 system.cpu.decode.DECODE:UnblockCycles            145                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        3964                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      2983                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          6940                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   525                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          24033                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                    1178                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.361053                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               2983                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches               1206                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        2.188997                       # Number of inst fetches per cycle
+system.cpu.fetch.Branches                        3940                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                      3009                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                          6972                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   537                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                          23897                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                    1189                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.358802                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles               3009                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches               1221                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        2.176213                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples               10979                      
+system.cpu.fetch.rateDist.samples               10981                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0         7023   6396.76%           
-                               1          285    259.59%           
-                               2          224    204.03%           
-                               3          248    225.89%           
-                               4          335    305.13%           
-                               5          281    255.94%           
-                               6          301    274.16%           
-                               7          251    228.62%           
-                               8         2031   1849.90%           
+                               0         7019   6391.95%           
+                               1          293    266.82%           
+                               2          225    204.90%           
+                               3          260    236.77%           
+                               4          345    314.18%           
+                               5          288    262.27%           
+                               6          304    276.84%           
+                               7          246    224.02%           
+                               8         2001   1822.24%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses               2983                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_accesses_0             2983                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency_0  5910.313901                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency_0  5152.173913                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   2314                       # number of ReadReq hits
-system.cpu.icache.ReadReq_hits_0                 2314                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        3954000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_latency_0      3954000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate_0        0.224271                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  669                       # number of ReadReq misses
-system.cpu.icache.ReadReq_misses_0                669                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits                48                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_hits_0              48                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      3199500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_latency_0      3199500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate_0     0.208180                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             621                       # number of ReadReq MSHR misses
-system.cpu.icache.ReadReq_mshr_misses_0           621                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_accesses               3009                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_accesses_0             3009                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency_0  5911.144578                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency_0  5119.774920                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   2345                       # number of ReadReq hits
+system.cpu.icache.ReadReq_hits_0                 2345                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        3925000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency_0      3925000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate_0        0.220671                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  664                       # number of ReadReq misses
+system.cpu.icache.ReadReq_misses_0                664                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits                42                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_hits_0              42                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency      3184500                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency_0      3184500                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate_0     0.206713                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses             622                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_misses_0           622                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                   3.726248                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                   3.770096                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                2983                       # number of demand (read+write) accesses
-system.cpu.icache.demand_accesses_0              2983                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses                3009                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses_0              3009                       # number of demand (read+write) accesses
 system.cpu.icache.demand_accesses_1                 0                       # number of demand (read+write) accesses
 system.cpu.icache.demand_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.icache.demand_avg_miss_latency_0  5910.313901                       # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency_0  5911.144578                       # average overall miss latency
 system.cpu.icache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency_0  5152.173913                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency_0  5119.774920                       # average overall mshr miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    2314                       # number of demand (read+write) hits
-system.cpu.icache.demand_hits_0                  2314                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits                    2345                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits_0                  2345                       # number of demand (read+write) hits
 system.cpu.icache.demand_hits_1                     0                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         3954000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_latency_0       3954000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         3925000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency_0       3925000                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_latency_1             0                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_rate       <err: div-0>                       # miss rate for demand accesses
-system.cpu.icache.demand_miss_rate_0         0.224271                       # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate_0         0.220671                       # miss rate for demand accesses
 system.cpu.icache.demand_miss_rate_1     <err: div-0>                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   669                       # number of demand (read+write) misses
-system.cpu.icache.demand_misses_0                 669                       # number of demand (read+write) misses
+system.cpu.icache.demand_misses                   664                       # number of demand (read+write) misses
+system.cpu.icache.demand_misses_0                 664                       # number of demand (read+write) misses
 system.cpu.icache.demand_misses_1                   0                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                 48                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_hits_0               48                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits                 42                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits_0               42                       # number of demand (read+write) MSHR hits
 system.cpu.icache.demand_mshr_hits_1                0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      3199500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_latency_0      3199500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency      3184500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency_0      3184500                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_rate  <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_miss_rate_0     0.208180                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate_0     0.206713                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              621                       # number of demand (read+write) MSHR misses
-system.cpu.icache.demand_mshr_misses_0            621                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses              622                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses_0            622                       # number of demand (read+write) MSHR misses
 system.cpu.icache.demand_mshr_misses_1              0                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.mshr_cap_events_0                 0                       # number of times MSHR cap was activated
 system.cpu.icache.mshr_cap_events_1                 0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               2983                       # number of overall (read+write) accesses
-system.cpu.icache.overall_accesses_0             2983                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses               3009                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses_0             3009                       # number of overall (read+write) accesses
 system.cpu.icache.overall_accesses_1                0                       # number of overall (read+write) accesses
 system.cpu.icache.overall_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.icache.overall_avg_miss_latency_0  5910.313901                       # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency_0  5911.144578                       # average overall miss latency
 system.cpu.icache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.icache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency_0  5152.173913                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency_0  5119.774920                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   2314                       # number of overall hits
-system.cpu.icache.overall_hits_0                 2314                       # number of overall hits
+system.cpu.icache.overall_hits                   2345                       # number of overall hits
+system.cpu.icache.overall_hits_0                 2345                       # number of overall hits
 system.cpu.icache.overall_hits_1                    0                       # number of overall hits
-system.cpu.icache.overall_miss_latency        3954000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_latency_0      3954000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        3925000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency_0      3925000                       # number of overall miss cycles
 system.cpu.icache.overall_miss_latency_1            0                       # number of overall miss cycles
 system.cpu.icache.overall_miss_rate      <err: div-0>                       # miss rate for overall accesses
-system.cpu.icache.overall_miss_rate_0        0.224271                       # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate_0        0.220671                       # miss rate for overall accesses
 system.cpu.icache.overall_miss_rate_1    <err: div-0>                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  669                       # number of overall misses
-system.cpu.icache.overall_misses_0                669                       # number of overall misses
+system.cpu.icache.overall_misses                  664                       # number of overall misses
+system.cpu.icache.overall_misses_0                664                       # number of overall misses
 system.cpu.icache.overall_misses_1                  0                       # number of overall misses
-system.cpu.icache.overall_mshr_hits                48                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_hits_0              48                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits                42                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits_0              42                       # number of overall MSHR hits
 system.cpu.icache.overall_mshr_hits_1               0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      3199500                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_latency_0      3199500                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency      3184500                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency_0      3184500                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_rate <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_miss_rate_0     0.208180                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate_0     0.206713                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             621                       # number of overall MSHR misses
-system.cpu.icache.overall_mshr_misses_0           621                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses             622                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses_0           622                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_misses_1             0                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@@ -368,104 +369,104 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      9                       # number of replacements
 system.cpu.icache.replacements_0                    9                       # number of replacements
 system.cpu.icache.replacements_1                    0                       # number of replacements
-system.cpu.icache.sampled_refs                    621                       # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs                    622                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                322.894952                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     2314                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                323.196356                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     2345                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.icache.writebacks_0                      0                       # number of writebacks
 system.cpu.icache.writebacks_1                      0                       # number of writebacks
-system.cpu.idleCycles                            1998                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     2367                       # Number of branches executed
-system.cpu.iew.EXEC:branches_0                   1185                       # Number of branches executed
-system.cpu.iew.EXEC:branches_1                   1182                       # Number of branches executed
-system.cpu.iew.EXEC:nop                            73                       # number of nop insts executed
+system.cpu.idleCycles                            2997                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                     2377                       # Number of branches executed
+system.cpu.iew.EXEC:branches_0                   1192                       # Number of branches executed
+system.cpu.iew.EXEC:branches_1                   1185                       # Number of branches executed
+system.cpu.iew.EXEC:nop                            72                       # number of nop insts executed
 system.cpu.iew.EXEC:nop_0                          37                       # number of nop insts executed
-system.cpu.iew.EXEC:nop_1                          36                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     1.416158                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         4978                       # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_0                       2514                       # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_1                       2464                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                       1867                       # Number of stores executed
-system.cpu.iew.EXEC:stores_0                      938                       # Number of stores executed
-system.cpu.iew.EXEC:stores_1                      929                       # Number of stores executed
+system.cpu.iew.EXEC:nop_1                          35                       # number of nop insts executed
+system.cpu.iew.EXEC:rate                     1.419725                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         5002                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_0                       2507                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_1                       2495                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                       1874                       # Number of stores executed
+system.cpu.iew.EXEC:stores_0                      933                       # Number of stores executed
+system.cpu.iew.EXEC:stores_1                      941                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
 system.cpu.iew.EXEC:swp_0                           0                       # number of swp insts executed
 system.cpu.iew.EXEC:swp_1                           0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                     10219                       # num instructions consuming a value
-system.cpu.iew.WB:consumers_0                    5113                       # num instructions consuming a value
-system.cpu.iew.WB:consumers_1                    5106                       # num instructions consuming a value
-system.cpu.iew.WB:count                         14974                       # cumulative count of insts written-back
-system.cpu.iew.WB:count_0                        7532                       # cumulative count of insts written-back
-system.cpu.iew.WB:count_1                        7442                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     1.526960                       # average fanout of values written-back
-system.cpu.iew.WB:fanout_0                   0.762957                       # average fanout of values written-back
-system.cpu.iew.WB:fanout_1                   0.764003                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                     10260                       # num instructions consuming a value
+system.cpu.iew.WB:consumers_0                    5135                       # num instructions consuming a value
+system.cpu.iew.WB:consumers_1                    5125                       # num instructions consuming a value
+system.cpu.iew.WB:count                         14994                       # cumulative count of insts written-back
+system.cpu.iew.WB:count_0                        7526                       # cumulative count of insts written-back
+system.cpu.iew.WB:count_1                        7468                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     1.530607                       # average fanout of values written-back
+system.cpu.iew.WB:fanout_0                   0.763778                       # average fanout of values written-back
+system.cpu.iew.WB:fanout_1                   0.766829                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_0                       0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_1                       0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:penalized_rate_0                  0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:penalized_rate_1                  0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      7802                       # num instructions producing a value
-system.cpu.iew.WB:producers_0                    3901                       # num instructions producing a value
-system.cpu.iew.WB:producers_1                    3901                       # num instructions producing a value
-system.cpu.iew.WB:rate                       1.363876                       # insts written-back per cycle
-system.cpu.iew.WB:rate_0                     0.686037                       # insts written-back per cycle
-system.cpu.iew.WB:rate_1                     0.677840                       # insts written-back per cycle
-system.cpu.iew.WB:sent                          15105                       # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_0                         7590                       # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_1                         7515                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  941                       # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles                       7                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  3823                       # Number of dispatched load instructions
+system.cpu.iew.WB:producers                      7852                       # num instructions producing a value
+system.cpu.iew.WB:producers_0                    3922                       # num instructions producing a value
+system.cpu.iew.WB:producers_1                    3930                       # num instructions producing a value
+system.cpu.iew.WB:rate                       1.365449                       # insts written-back per cycle
+system.cpu.iew.WB:rate_0                     0.685366                       # insts written-back per cycle
+system.cpu.iew.WB:rate_1                     0.680084                       # insts written-back per cycle
+system.cpu.iew.WB:sent                          15132                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_0                         7582                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_1                         7550                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                  958                       # Number of branch mispredicts detected at execute
+system.cpu.iew.iewBlockCycles                       6                       # Number of cycles IEW is blocking
+system.cpu.iew.iewDispLoadInsts                  3837                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                 42                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts               501                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 2178                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts               19078                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  3111                       # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_0                1576                       # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_1                1535                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               864                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                 15548                       # Number of executed instructions
+system.cpu.iew.iewDispSquashedInsts               445                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                 2172                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts               19086                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                  3128                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_0                1574                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_1                1554                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               852                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                 15590                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                   1498                       # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles                   1511                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                     0                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.0.forwLoads              42                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.forwLoads              43                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses            4                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           63                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           64                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads          946                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores          276                       # Number of stores squashed
+system.cpu.iew.lsq.thread.0.squashedLoads          955                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores          270                       # Number of stores squashed
 system.cpu.iew.lsq.thread.1.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.1.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.1.forwLoads              38                       # Number of loads that had data forwarded from stores
-system.cpu.iew.lsq.thread.1.ignoredResponses            0                       # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.lsq.thread.1.forwLoads              42                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.1.ignoredResponses            2                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.1.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.1.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.1.memOrderViolation           54                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.1.memOrderViolation           58                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.1.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.1.squashedLoads          919                       # Number of loads squashed
+system.cpu.iew.lsq.thread.1.squashedLoads          924                       # Number of loads squashed
 system.cpu.iew.lsq.thread.1.squashedStores          278                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents            117                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          761                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect            180                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc_0                             0.512160                       # IPC: Instructions Per Cycle
-system.cpu.ipc_1                             0.512251                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         1.024410                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                    8232                       # Type of FU issued
+system.cpu.iew.memOrderViolationEvents            122                       # Number of memory order violations
+system.cpu.iew.predictedNotTakenIncorrect          767                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect            191                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc_0                             0.512066                       # IPC: Instructions Per Cycle
+system.cpu.ipc_1                             0.512157                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         1.024224                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0                    8235                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                       No_OpClass            2      0.02%            # Type of FU issued
-                          IntAlu         5551     67.43%            # Type of FU issued
+                          IntAlu         5567     67.60%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            2      0.02%            # Type of FU issued
@@ -474,15 +475,15 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         1704     20.70%            # Type of FU issued
-                        MemWrite          972     11.81%            # Type of FU issued
+                         MemRead         1702     20.67%            # Type of FU issued
+                        MemWrite          961     11.67%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:FU_type_1                    8180                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_1                    8207                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_1.start_dist
                       No_OpClass            2      0.02%            # Type of FU issued
-                          IntAlu         5536     67.68%            # Type of FU issued
+                          IntAlu         5547     67.59%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            2      0.02%            # Type of FU issued
@@ -491,15 +492,15 @@ system.cpu.iq.ISSUE:FU_type_1.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         1681     20.55%            # Type of FU issued
-                        MemWrite          958     11.71%            # Type of FU issued
+                         MemRead         1690     20.59%            # Type of FU issued
+                        MemWrite          965     11.76%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_1.end_dist
-system.cpu.iq.ISSUE:FU_type                     16412                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type                     16442                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type.start_dist
                       No_OpClass            4      0.02%            # Type of FU issued
-                          IntAlu        11087     67.55%            # Type of FU issued
+                          IntAlu        11114     67.60%            # Type of FU issued
                          IntMult            2      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            4      0.02%            # Type of FU issued
@@ -508,20 +509,20 @@ system.cpu.iq.ISSUE:FU_type.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         3385     20.63%            # Type of FU issued
-                        MemWrite         1930     11.76%            # Type of FU issued
+                         MemRead         3392     20.63%            # Type of FU issued
+                        MemWrite         1926     11.71%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                   180                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_0                  92                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_1                  88                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.010968                       # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_0           0.005606                       # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_1           0.005362                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt                   189                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_0                  98                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_1                  91                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate             0.011495                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_0           0.005960                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_1           0.005535                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                       No_OpClass            0      0.00%            # attempts to use FU when none available
-                          IntAlu           16      8.89%            # attempts to use FU when none available
+                          IntAlu           14      7.41%            # attempts to use FU when none available
                          IntMult            0      0.00%            # attempts to use FU when none available
                           IntDiv            0      0.00%            # attempts to use FU when none available
                         FloatAdd            0      0.00%            # attempts to use FU when none available
@@ -530,104 +531,104 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                        FloatMult            0      0.00%            # attempts to use FU when none available
                         FloatDiv            0      0.00%            # attempts to use FU when none available
                        FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           97     53.89%            # attempts to use FU when none available
-                        MemWrite           67     37.22%            # attempts to use FU when none available
+                         MemRead          107     56.61%            # attempts to use FU when none available
+                        MemWrite           68     35.98%            # attempts to use FU when none available
                        IprAccess            0      0.00%            # attempts to use FU when none available
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples        10979                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples        10981                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0         4788   4361.05%           
-                               1         1816   1654.07%           
-                               2         1657   1509.24%           
-                               3         1039    946.35%           
-                               4          774    704.98%           
-                               5          501    456.33%           
-                               6          289    263.23%           
-                               7           90     81.97%           
-                               8           25     22.77%           
+                               0         4775   4348.42%           
+                               1         1817   1654.68%           
+                               2         1638   1491.67%           
+                               3         1107   1008.10%           
+                               4          745    678.44%           
+                               5          490    446.23%           
+                               6          287    261.36%           
+                               7          100     91.07%           
+                               8           22     20.03%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     1.494854                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                      18963                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                     16412                       # Number of instructions issued
+system.cpu.iq.ISSUE:rate                     1.497314                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                      18972                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                     16442                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                  42                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined            6896                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued                34                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedInstsExamined            6918                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued                63                       # Number of squashed instructions issued
 system.cpu.iq.iqSquashedNonSpecRemoved              8                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined         4313                       # Number of squashed operands that are examined and possibly removed from graph
-system.cpu.l2cache.ReadReq_accesses               963                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_accesses_0             963                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency_0  5220.374220                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0  2725.051975                       # average ReadReq mshr miss latency
+system.cpu.iq.iqSquashedOperandsExamined         4274                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.l2cache.ReadReq_accesses               962                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_accesses_0             962                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency_0  5208.636837                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0  2724.765869                       # average ReadReq mshr miss latency
 system.cpu.l2cache.ReadReq_hits                     1                       # number of ReadReq hits
 system.cpu.l2cache.ReadReq_hits_0                   1                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency       5022000                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_latency_0      5022000                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate_0       0.998962                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 962                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_misses_0               962                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      2621500                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_latency_0      2621500                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate_0     0.998962                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            962                       # number of ReadReq MSHR misses
-system.cpu.l2cache.ReadReq_mshr_misses_0          962                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_miss_latency       5005500                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency_0      5005500                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate_0       0.998960                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses                 961                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_misses_0               961                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency      2618500                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency_0      2618500                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate_0     0.998960                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses            961                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses_0          961                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                  0.001040                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  0.001041                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                963                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_accesses_0              963                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses                962                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses_0              962                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_accesses_1                0                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.l2cache.demand_avg_miss_latency_0  5220.374220                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency_0  5208.636837                       # average overall miss latency
 system.cpu.l2cache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency_0  2725.051975                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency_0  2724.765869                       # average overall mshr miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      1                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_hits_0                    1                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_hits_1                    0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        5022000                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_latency_0      5022000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        5005500                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency_0      5005500                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_latency_1            0                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate      <err: div-0>                       # miss rate for demand accesses
-system.cpu.l2cache.demand_miss_rate_0        0.998962                       # miss rate for demand accesses
+system.cpu.l2cache.demand_miss_rate_0        0.998960                       # miss rate for demand accesses
 system.cpu.l2cache.demand_miss_rate_1    <err: div-0>                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses                  962                       # number of demand (read+write) misses
-system.cpu.l2cache.demand_misses_0                962                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses                  961                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses_0                961                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_misses_1                  0                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_hits_0               0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_hits_1               0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      2621500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_latency_0      2621500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency      2618500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency_0      2618500                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_miss_rate_0     0.998962                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_miss_rate_0     0.998960                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses             962                       # number of demand (read+write) MSHR misses
-system.cpu.l2cache.demand_mshr_misses_0           962                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses             961                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses_0           961                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.demand_mshr_misses_1             0                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.mshr_cap_events_0                0                       # number of times MSHR cap was activated
 system.cpu.l2cache.mshr_cap_events_1                0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               963                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_accesses_0             963                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses               962                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses_0             962                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_accesses_1               0                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.l2cache.overall_avg_miss_latency_0  5220.374220                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency_0  5208.636837                       # average overall miss latency
 system.cpu.l2cache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency_0  2725.051975                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency_0  2724.765869                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
@@ -635,26 +636,26 @@ system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>
 system.cpu.l2cache.overall_hits                     1                       # number of overall hits
 system.cpu.l2cache.overall_hits_0                   1                       # number of overall hits
 system.cpu.l2cache.overall_hits_1                   0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       5022000                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_latency_0      5022000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       5005500                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency_0      5005500                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_latency_1            0                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate     <err: div-0>                       # miss rate for overall accesses
-system.cpu.l2cache.overall_miss_rate_0       0.998962                       # miss rate for overall accesses
+system.cpu.l2cache.overall_miss_rate_0       0.998960                       # miss rate for overall accesses
 system.cpu.l2cache.overall_miss_rate_1   <err: div-0>                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses                 962                       # number of overall misses
-system.cpu.l2cache.overall_misses_0               962                       # number of overall misses
+system.cpu.l2cache.overall_misses                 961                       # number of overall misses
+system.cpu.l2cache.overall_misses_0               961                       # number of overall misses
 system.cpu.l2cache.overall_misses_1                 0                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_hits_0              0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_hits_1              0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      2621500                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_latency_0      2621500                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency      2618500                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency_0      2618500                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_miss_rate_0     0.998962                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_miss_rate_0     0.998960                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses            962                       # number of overall MSHR misses
-system.cpu.l2cache.overall_mshr_misses_0          962                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses            961                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses_0          961                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_misses_1            0                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@@ -674,33 +675,33 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.replacements_0                   0                       # number of replacements
 system.cpu.l2cache.replacements_1                   0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   962                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   961                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.l2cache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.l2cache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               545.133409                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               545.318204                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       1                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.l2cache.writebacks_0                     0                       # number of writebacks
 system.cpu.l2cache.writebacks_1                     0                       # number of writebacks
-system.cpu.numCycles                            10979                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles              614                       # Number of cycles rename is blocking
+system.cpu.numCycles                            10981                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles              612                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           8102                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IdleCycles             14840                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents            684                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:RenameLookups          26359                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           20748                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands        15612                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               3480                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles            1498                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles            744                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps              7510                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles          517                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:IdleCycles             14828                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents            692                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:RenameLookups          26356                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts           20731                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands        15606                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles               3494                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles            1511                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles            761                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps              7504                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles          521                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts           48                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts               2147                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:skidInsts               2159                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts           37                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                               2                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.timesIdled                               3                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload0.PROG:num_syscalls             17                       # Number of system calls
 system.cpu.workload1.PROG:num_syscalls             17                       # Number of system calls
 
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
index 6f3d2a7c5..76288ac1d 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
@@ -7,9 +7,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:06:20
-M5 started Sun Jun 10 14:22:38 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
+M5 compiled Jun 21 2007 21:25:27
+M5 started Fri Jun 22 00:04:51 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 5490000 because target called exit()
+Exiting @ tick 5491500 because target called exit()
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini
index 61102139c..0ef239ef4 100644
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini
@@ -21,6 +21,7 @@ SQEntries=32
 SSITSize=1024
 activity=0
 backComSize=5
+cachePorts=200
 choiceCtrBits=2
 choicePredictorSize=8192
 clock=500
@@ -74,6 +75,15 @@ renameToFetchDelay=1
 renameToIEWDelay=2
 renameToROBDelay=1
 renameWidth=8
+smtCommitPolicy=RoundRobin
+smtFetchPolicy=SingleThread
+smtIQPolicy=Partitioned
+smtIQThreshold=100
+smtLSQPolicy=Partitioned
+smtLSQThreshold=100
+smtNumFetchingThreads=1
+smtROBPolicy=Partitioned
+smtROBThreshold=100
 squashWidth=8
 system=system
 trapLatency=13
@@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side
 [system.cpu.dcache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -99,7 +110,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -261,6 +272,7 @@ opLat=3
 [system.cpu.icache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -274,7 +286,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0]
 [system.cpu.l2cache]
 type=BaseCache
 adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@@ -312,7 +325,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -366,7 +379,7 @@ bus_id=0
 clock=1000
 responder_set=false
 width=64
-port=system.physmem.port system.cpu.l2cache.mem_side
+port=system.physmem.port[0] system.cpu.l2cache.mem_side
 
 [system.physmem]
 type=PhysicalMemory
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out
index 70564f749..bdf29a72a 100644
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out
@@ -275,7 +275,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
@@ -312,7 +312,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
@@ -349,7 +349,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt
index 7859d5c2b..ca9f1caa8 100644
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt
@@ -1,40 +1,40 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                         2726                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      7230                       # Number of BTB lookups
+global.BPredUnit.BTBHits                         2589                       # Number of BTB hits
+global.BPredUnit.BTBLookups                      6396                       # Number of BTB lookups
 global.BPredUnit.RASInCorrect                       0                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                   2062                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   7954                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         7954                       # Number of BP lookups
+global.BPredUnit.condIncorrect                   2002                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted                   6955                       # Number of conditional branches predicted
+global.BPredUnit.lookups                         6955                       # Number of BP lookups
 global.BPredUnit.usedRAS                            0                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  37089                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 154932                       # Number of bytes of host memory used
-host_seconds                                     0.30                       # Real time elapsed on the host
-host_tick_rate                               53780846                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  33806                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 154936                       # Number of bytes of host memory used
+host_seconds                                     0.32                       # Real time elapsed on the host
+host_tick_rate                               48256964                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                 10                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                 0                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  3198                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 2970                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                  2999                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 2872                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                       10976                       # Number of instructions simulated
 sim_seconds                                  0.000016                       # Number of seconds simulated
-sim_ticks                                    15931500                       # Number of ticks simulated
+sim_ticks                                    15682500                       # Number of ticks simulated
 system.cpu.commit.COM:branches                   2152                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events               146                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events               199                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples        28801                      
+system.cpu.commit.COM:committed_per_cycle.samples        28561                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0        23411   8128.54%           
-                               1         2862    993.72%           
-                               2         1174    407.62%           
-                               3          608    211.10%           
-                               4          359    124.65%           
-                               5          123     42.71%           
-                               6          103     35.76%           
-                               7           15      5.21%           
-                               8          146     50.69%           
+                               0        23237   8135.92%           
+                               1         2855    999.61%           
+                               2         1132    396.34%           
+                               3          638    223.38%           
+                               4          273     95.58%           
+                               5          119     41.67%           
+                               6           92     32.21%           
+                               7           16      5.60%           
+                               8          199     69.68%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -43,71 +43,71 @@ system.cpu.commit.COM:loads                      1462                       # Nu
 system.cpu.commit.COM:membars                       0                       # Number of memory barriers committed
 system.cpu.commit.COM:refs                       2760                       # Number of memory references committed
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts              2062                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts              2002                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts          10976                       # The number of committed instructions
-system.cpu.commit.commitNonSpecStalls             327                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts           14297                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitNonSpecStalls             329                       # The number of times commit has been forced to stall to communicate backwards
+system.cpu.commit.commitSquashedInsts           12659                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                       10976                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                 10976                       # Number of Instructions Simulated
-system.cpu.cpi                               2.903061                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         2.903061                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               2743                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  5392.857143                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency  4696.969697                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   2659                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         453000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.030623                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                   84                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                18                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       310000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.024061                       # mshr miss rate for ReadReq accesses
+system.cpu.cpi                               2.857598                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                         2.857598                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               2313                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  5451.807229                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  4719.696970                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   2230                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency         452500                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.035884                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                   83                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits                17                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency       311500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.028534                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses              66                       # number of ReadReq MSHR misses
 system.cpu.dcache.SwapReq_accesses                  6                       # number of SwapReq accesses(hits+misses)
 system.cpu.dcache.SwapReq_hits                      6                       # number of SwapReq hits
 system.cpu.dcache.WriteReq_accesses              1292                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency         5505                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency  5522.613065                       # average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency  4802.325581                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                  1092                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       1101000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.154799                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                 200                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_hits              114                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_hits                  1093                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency       1099000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.154025                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                 199                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_hits              113                       # number of WriteReq MSHR hits
 system.cpu.dcache.WriteReq_mshr_miss_latency       413000                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.066563                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             86                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  24.717105                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  21.901316                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                4035                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  5471.830986                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  4756.578947                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    3751                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         1554000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.070384                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   284                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                132                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency       723000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.037670                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_accesses                3605                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  5501.773050                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  4766.447368                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                    3323                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         1551500                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.078225                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   282                       # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits                130                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency       724500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.042164                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses              152                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               4035                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  5471.830986                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  4756.578947                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses               3605                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  5501.773050                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  4766.447368                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   3751                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        1554000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.070384                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  284                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits               132                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency       723000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.037670                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_hits                   3323                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        1551500                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.078225                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  282                       # number of overall misses
+system.cpu.dcache.overall_mshr_hits               130                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency       724500                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.042164                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses             152                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -123,85 +123,85 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                    152                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                113.439038                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     3757                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                113.060803                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     3329                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles           4602                       # Number of cycles decode is blocked
-system.cpu.decode.DECODE:DecodedInsts           38937                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles             16098                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               7883                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles            3063                       # Number of cycles decode is squashing
-system.cpu.decode.DECODE:UnblockCycles            218                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        7954                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      4933                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                         14166                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   565                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          44421                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                    2121                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.249623                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               4933                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches               2726                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        1.394081                       # Number of inst fetches per cycle
+system.cpu.decode.DECODE:BlockedCycles           3802                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:DecodedInsts           34098                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles             15413                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles               9282                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles            2804                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:UnblockCycles             64                       # Number of cycles decode is unblocking
+system.cpu.fetch.Branches                        6955                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                      4655                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                         15062                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   489                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                          38520                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                    2061                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.221744                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles               4655                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches               2589                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        1.228121                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples               31864                      
+system.cpu.fetch.rateDist.samples               31365                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0        22632   7102.69%           
-                               1         2187    686.35%           
-                               2          562    176.37%           
-                               3          869    272.72%           
-                               4          521    163.51%           
-                               5          770    241.65%           
-                               6          886    278.06%           
-                               7          243     76.26%           
-                               8         3194   1002.39%           
+                               0        20959   6682.29%           
+                               1         4502   1435.36%           
+                               2          577    183.96%           
+                               3          682    217.44%           
+                               4          776    247.41%           
+                               5          629    200.54%           
+                               6          581    185.24%           
+                               7          189     60.26%           
+                               8         2470    787.50%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses               4933                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5310.666667                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4396.174863                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   4558                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        1991500                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.076019                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  375                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits                 9                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      1609000                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.074194                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_accesses               4655                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  5308.823529                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4382.513661                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   4281                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        1985500                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.080344                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  374                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits                 8                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency      1604000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.078625                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             366                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  12.453552                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                  11.696721                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                4933                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5310.666667                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4396.174863                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    4558                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         1991500                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.076019                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   375                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                  9                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      1609000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.074194                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_accesses                4655                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  5308.823529                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4382.513661                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                    4281                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency         1985500                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.080344                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   374                       # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits                  8                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency      1604000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.078625                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              366                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               4933                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5310.666667                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4396.174863                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses               4655                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency  5308.823529                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4382.513661                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   4558                       # number of overall hits
-system.cpu.icache.overall_miss_latency        1991500                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.076019                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  375                       # number of overall misses
-system.cpu.icache.overall_mshr_hits                 9                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      1609000                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.074194                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_hits                   4281                       # number of overall hits
+system.cpu.icache.overall_miss_latency        1985500                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.080344                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  374                       # number of overall misses
+system.cpu.icache.overall_mshr_hits                 8                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency      1604000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.078625                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             366                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -217,59 +217,59 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      1                       # number of replacements
 system.cpu.icache.sampled_refs                    366                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                233.760012                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     4558                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                232.692086                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     4281                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                             499                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     3548                       # Number of branches executed
+system.cpu.idleCycles                            1997                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                     3040                       # Number of branches executed
 system.cpu.iew.EXEC:nop                             0                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.670318                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         5385                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                       2502                       # Number of stores executed
+system.cpu.iew.EXEC:rate                     0.582082                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         4490                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                       2077                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                     10159                       # num instructions consuming a value
-system.cpu.iew.WB:count                         20199                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.790629                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                      8997                       # num instructions consuming a value
+system.cpu.iew.WB:count                         17565                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.831833                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      8032                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.633913                       # insts written-back per cycle
-system.cpu.iew.WB:sent                          20448                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                 2568                       # Number of branch mispredicts detected at execute
+system.cpu.iew.WB:producers                      7484                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.560019                       # insts written-back per cycle
+system.cpu.iew.WB:sent                          17724                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                 2199                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                       0                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  3198                       # Number of dispatched load instructions
-system.cpu.iew.iewDispNonSpecInsts                610                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts              2750                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 2970                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts               25274                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  2883                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts              1463                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                 21359                       # Number of executed instructions
+system.cpu.iew.iewDispLoadInsts                  2999                       # Number of dispatched load instructions
+system.cpu.iew.iewDispNonSpecInsts                609                       # Number of dispatched non-speculative instructions
+system.cpu.iew.iewDispSquashedInsts              1287                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                 2872                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts               23636                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                  2413                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts              3118                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                 18257                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                   3063                       # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles                   2804                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                     0                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.0.forwLoads              48                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.forwLoads              43                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses            8                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.memOrderViolation           52                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            0                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads         1736                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores         1672                       # Number of stores squashed
+system.cpu.iew.lsq.thread.0.squashedLoads         1537                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores         1574                       # Number of stores squashed
 system.cpu.iew.memOrderViolationEvents             52                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          958                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect           1610                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.344464                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.344464                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                   22822                       # Type of FU issued
+system.cpu.iew.predictedNotTakenIncorrect          682                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect           1517                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc                               0.349944                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.349944                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0                   21375                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
-                          (null)         1826      8.00%            # Type of FU issued
-                          IntAlu        15247     66.81%            # Type of FU issued
+                      No_OpClass         1750      8.19%            # Type of FU issued
+                          IntAlu        14209     66.47%            # Type of FU issued
                          IntMult            0      0.00%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            0      0.00%            # Type of FU issued
@@ -278,16 +278,16 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         3042     13.33%            # Type of FU issued
-                        MemWrite         2707     11.86%            # Type of FU issued
+                         MemRead         2832     13.25%            # Type of FU issued
+                        MemWrite         2584     12.09%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                   190                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.008325                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt                   160                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate             0.007485                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
-                          (null)            0      0.00%            # attempts to use FU when none available
-                          IntAlu           50     26.32%            # attempts to use FU when none available
+                      No_OpClass            0      0.00%            # attempts to use FU when none available
+                          IntAlu           27     16.88%            # attempts to use FU when none available
                          IntMult            0      0.00%            # attempts to use FU when none available
                           IntDiv            0      0.00%            # attempts to use FU when none available
                         FloatAdd            0      0.00%            # attempts to use FU when none available
@@ -296,41 +296,41 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                        FloatMult            0      0.00%            # attempts to use FU when none available
                         FloatDiv            0      0.00%            # attempts to use FU when none available
                        FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           25     13.16%            # attempts to use FU when none available
-                        MemWrite          115     60.53%            # attempts to use FU when none available
+                         MemRead           23     14.37%            # attempts to use FU when none available
+                        MemWrite          110     68.75%            # attempts to use FU when none available
                        IprAccess            0      0.00%            # attempts to use FU when none available
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples        31864                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples        31365                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0        22879   7180.20%           
-                               1         3824   1200.10%           
-                               2         1304    409.24%           
-                               3         1251    392.61%           
-                               4         1252    392.92%           
-                               5          751    235.69%           
-                               6          414    129.93%           
-                               7          122     38.29%           
-                               8           67     21.03%           
+                               0        21827   6959.03%           
+                               1         4212   1342.90%           
+                               2         2084    664.43%           
+                               3         1568    499.92%           
+                               4          766    244.22%           
+                               5          454    144.75%           
+                               6          283     90.23%           
+                               7          109     34.75%           
+                               8           62     19.77%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.716231                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                      24664                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                     22822                       # Number of instructions issued
-system.cpu.iq.iqNonSpecInstsAdded                 610                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined           11119                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued                83                       # Number of squashed instructions issued
-system.cpu.iq.iqSquashedNonSpecRemoved            283                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined         5685                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.iq.ISSUE:rate                     0.681492                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                      23027                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                     21375                       # Number of instructions issued
+system.cpu.iq.iqNonSpecInstsAdded                 609                       # Number of non-speculative instructions added to the IQ
+system.cpu.iq.iqSquashedInstsExamined           10843                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued                99                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedNonSpecRemoved            280                       # Number of squashed non-spec instructions that were removed
+system.cpu.iq.iqSquashedOperandsExamined         7823                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses               514                       # number of ReadReq accesses(hits+misses)
 system.cpu.l2cache.ReadReq_avg_miss_latency  4458.171206                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2373.540856                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2375.486381                       # average ReadReq mshr miss latency
 system.cpu.l2cache.ReadReq_miss_latency       2291500                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses                 514                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      1220000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency      1221000                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_mshr_misses            514                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@@ -343,13 +343,13 @@ system.cpu.l2cache.blocked_cycles_no_targets            0
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                514                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_avg_miss_latency  4458.171206                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2373.540856                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2375.486381                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_miss_latency        2291500                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  514                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      1220000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency      1221000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses             514                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
@@ -357,14 +357,14 @@ system.cpu.l2cache.mshr_cap_events                  0                       # nu
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               514                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_avg_miss_latency  4458.171206                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2373.540856                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2375.486381                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
 system.cpu.l2cache.overall_miss_latency       2291500                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 514                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      1220000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency      1221000                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses            514                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -381,26 +381,25 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.sampled_refs                   514                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               345.564898                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               344.125692                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                            31864                       # number of cpu cycles simulated
+system.cpu.numCycles                            31365                       # number of cpu cycles simulated
 system.cpu.rename.RENAME:CommittedMaps           9868                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IdleCycles             16082                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:RenameLookups          44650                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           29655                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands        24195                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               7618                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles            3063                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:SquashedInsts           8815                       # Number of squashed instructions processed by rename
-system.cpu.rename.RENAME:UnblockCycles            684                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps             14327                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles         3915                       # count of cycles rename stalled for serializing inst
-system.cpu.rename.RENAME:serializingInsts          631                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts               4702                       # count of insts added to the skid buffer
-system.cpu.rename.RENAME:tempSerializingInsts          623                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                               1                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.rename.RENAME:IdleCycles             16585                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:RenameLookups          46161                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts           26550                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands        21893                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles               8196                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles            2804                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles            229                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps             12025                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles         3551                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializingInsts          628                       # count of serializing insts renamed
+system.cpu.rename.RENAME:skidInsts               4297                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:tempSerializingInsts          640                       # count of temporary serializing insts renamed
+system.cpu.timesIdled                               3                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls               8                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout
index 0b6e54449..692223ccd 100644
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout
@@ -16,9 +16,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled May 15 2007 13:02:31
-M5 started Tue May 15 17:00:06 2007
+M5 compiled Jun 21 2007 21:15:48
+M5 started Fri Jun 22 00:32:08 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/o3-timing tests/run.py quick/02.insttest/sparc/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 15931500 because target called exit()
+Exiting @ tick 15682500 because target called exit()
-- 
cgit v1.2.3