33 files changed, 1377 insertions, 1017 deletions
diff --git a/src/arch/sparc/miscregfile.cc b/src/arch/sparc/miscregfile.cc
index 5bd572d38..f511ef454 100644
--- a/src/arch/sparc/miscregfile.cc
+++ b/src/arch/sparc/miscregfile.cc
@@ -647,11 +647,9 @@ void MiscRegFile::setReg(int miscReg,
         return;
       case MISCREG_CWP:
         new_val = val >= NWindows ? NWindows - 1 : val;
-        if (val >= NWindows) {
+        if (val >= NWindows)
             new_val = NWindows - 1;
-            warn("Attempted to set the CWP to %d with NWindows = %d\n",
-                    val, NWindows);
-        }
+
         tc->changeRegFileContext(CONTEXT_CWP, new_val);
         break;
       case MISCREG_GL:
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 3fd85595f..65e36d99a 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -96,7 +96,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
     if (policy == "aggressive"){
         commitPolicy = Aggressive;
 
-        DPRINTF(Commit,"Commit Policy set to Aggressive.");
+//        DPRINTF(Commit,"Commit Policy set to Aggressive.");
     } else if (policy == "roundrobin"){
         commitPolicy = RoundRobin;
 
@@ -105,11 +105,11 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
             priority_list.push_back(tid);
         }
 
-        DPRINTF(Commit,"Commit Policy set to Round Robin.");
+//        DPRINTF(Commit,"Commit Policy set to Round Robin.");
     } else if (policy == "oldestready"){
         commitPolicy = OldestReady;
 
-        DPRINTF(Commit,"Commit Policy set to Oldest Ready.");
+//        DPRINTF(Commit,"Commit Policy set to Oldest Ready.");
     } else {
         assert(0 && "Invalid SMT Commit Policy. Options Are: {Aggressive,"
                "RoundRobin,OldestReady}");
@@ -229,8 +229,8 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting CPU pointer.\n");
     cpu = cpu_ptr;
+    DPRINTF(Commit, "Commit: Setting CPU pointer.\n");
 
     // Commit must broadcast the number of free entries it has at the start of
     // the simulation, so it starts as active.
@@ -250,7 +250,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to send information back to IEW.
@@ -264,7 +263,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting fetch queue pointer.\n");
     fetchQueue = fq_ptr;
 
     // Setup wire to get instructions from rename (for the ROB).
@@ -275,7 +273,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting rename queue pointer.\n");
     renameQueue = rq_ptr;
 
     // Setup wire to get instructions from rename (for the ROB).
@@ -286,7 +283,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n");
     iewQueue = iq_ptr;
 
     // Setup wire to get instructions from IEW.
@@ -304,7 +300,6 @@ template<class Impl>
 void
 DefaultCommit<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -312,8 +307,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setRenameMap(RenameMap rm_ptr[])
 {
-    DPRINTF(Commit, "Setting rename map pointers.\n");
-
     for (int i=0; i < numThreads; i++) {
         renameMap[i] = &rm_ptr[i];
     }
@@ -323,7 +316,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setROB(ROB *rob_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting ROB pointer.\n");
     rob = rob_ptr;
 }
 
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 79a0bfdbf..93d02bfcd 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -114,15 +114,14 @@ template<class Impl>
 void
 DefaultDecode<Impl>::setCPU(O3CPU *cpu_ptr)
 {
-    DPRINTF(Decode, "Setting CPU pointer.\n");
     cpu = cpu_ptr;
+    DPRINTF(Decode, "Setting CPU pointer.\n");
 }
 
 template<class Impl>
 void
 DefaultDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(Decode, "Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to write information back to fetch.
@@ -138,7 +137,6 @@ template<class Impl>
 void
 DefaultDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
 {
-    DPRINTF(Decode, "Setting decode queue pointer.\n");
     decodeQueue = dq_ptr;
 
     // Setup wire to write information to proper place in decode queue.
@@ -149,7 +147,6 @@ template<class Impl>
 void
 DefaultDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 {
-    DPRINTF(Decode, "Setting fetch queue pointer.\n");
     fetchQueue = fq_ptr;
 
     // Setup wire to read information from fetch queue.
@@ -160,7 +157,6 @@ template<class Impl>
 void
 DefaultDecode<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Decode, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 34b06420d..85885906d 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -266,8 +266,8 @@ template<class Impl>
 void
 DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
 {
-    DPRINTF(Fetch, "Setting the CPU pointer.\n");
     cpu = cpu_ptr;
+    DPRINTF(Fetch, "Setting the CPU pointer.\n");
 
     // Name is finally available, so create the port.
     icachePort = new IcachePort(this);
@@ -292,7 +292,6 @@ template<class Impl>
 void
 DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
 {
-    DPRINTF(Fetch, "Setting the time buffer pointer.\n");
     timeBuffer = time_buffer;
 
     // Create wires to get information from proper places in time buffer.
@@ -306,7 +305,6 @@ template<class Impl>
 void
 DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Fetch, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -314,7 +312,6 @@ template<class Impl>
 void
 DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 {
-    DPRINTF(Fetch, "Setting the fetch queue pointer.\n");
     fetchQueue = fq_ptr;
 
     // Create wire to write information to proper place in fetch queue.
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 4883e5a5c..d2948a525 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -282,8 +282,8 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setCPU(O3CPU *cpu_ptr)
 {
-    DPRINTF(IEW, "Setting CPU pointer.\n");
     cpu = cpu_ptr;
+    DPRINTF(IEW, "Setting CPU pointer.\n");
 
     instQueue.setCPU(cpu_ptr);
     ldstQueue.setCPU(cpu_ptr);
@@ -295,7 +295,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(IEW, "Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to read information from time buffer, from commit.
@@ -314,7 +313,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
 {
-    DPRINTF(IEW, "Setting rename queue pointer.\n");
     renameQueue = rq_ptr;
 
     // Setup wire to read information from rename queue.
@@ -325,7 +323,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
 {
-    DPRINTF(IEW, "Setting IEW queue pointer.\n");
     iewQueue = iq_ptr;
 
     // Setup wire to write instructions to commit.
@@ -336,7 +333,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(IEW, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 
     ldstQueue.setActiveThreads(at_ptr);
@@ -347,7 +343,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr)
 {
-    DPRINTF(IEW, "Setting scoreboard pointer.\n");
     scoreboard = sb_ptr;
 }
 
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 79e03d4bf..4d99fb520 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -81,8 +81,6 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
     // Set the number of physical registers as the number of int + float
     numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
 
-    DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs);
-
     //Create an entry for each physical register within the
     //dependency graph.
     dependGraph.resize(numPhysRegs);
@@ -124,8 +122,10 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
             maxEntries[i] = part_amt;
         }
 
+/*
         DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
                 "%i entries per thread.\n",part_amt);
+*/
 
     } else if (policy == "threshold") {
         iqPolicy = Threshold;
@@ -139,8 +139,10 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
             maxEntries[i] = thresholdIQ;
         }
 
+/*
         DPRINTF(IQ, "IQ sharing policy set to Threshold:"
                 "%i entries per thread.\n",thresholdIQ);
+*/
    } else {
        assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic,"
               "Partitioned, Threshold}");
@@ -360,7 +362,6 @@ template <class Impl>
 void
 InstructionQueue<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(IQ, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -368,15 +369,13 @@ template <class Impl>
 void
 InstructionQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
 {
-    DPRINTF(IQ, "Set the issue to execute queue.\n");
-    issueToExecuteQueue = i2e_ptr;
+      issueToExecuteQueue = i2e_ptr;
 }
 
 template <class Impl>
 void
 InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(IQ, "Set the time buffer.\n");
     timeBuffer = tb_ptr;
 
     fromCommit = timeBuffer->getWire(-commitToIEWDelay);
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index d4994fcb7..02cc5784c 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -112,8 +112,6 @@ LSQ<Impl>::LSQ(Params *params)
       SQEntries(params->SQEntries), numThreads(params->numberOfThreads),
       retryTid(-1)
 {
-    DPRINTF(LSQ, "Creating LSQ object.\n");
-
     dcachePort.snoopRangeSent = false;
 
     //**********************************************/
@@ -131,20 +129,20 @@ LSQ<Impl>::LSQ(Params *params)
 
         maxLQEntries = LQEntries;
         maxSQEntries = SQEntries;
-
+/*
         DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
-
+*/
     } else if (policy == "partitioned") {
         lsqPolicy = Partitioned;
 
         //@todo:make work if part_amt doesnt divide evenly.
         maxLQEntries = LQEntries / numThreads;
         maxSQEntries = SQEntries / numThreads;
-
+/*
         DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
                 "%i entries per LQ | %i entries per SQ",
                 maxLQEntries,maxSQEntries);
-
+*/
     } else if (policy == "threshold") {
         lsqPolicy = Threshold;
 
@@ -156,10 +154,11 @@ LSQ<Impl>::LSQ(Params *params)
         //amount of the LSQ
         maxLQEntries  = params->smtLSQThreshold;
         maxSQEntries  = params->smtLSQThreshold;
-
+/*
         DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
                 "%i entries per LQ | %i entries per SQ",
                 maxLQEntries,maxSQEntries);
+*/
 
     } else {
         assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index e70c960b3..0a3021046 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -112,7 +112,7 @@ void
 LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
                     unsigned maxSQEntries, unsigned id)
 {
-    DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
+//    DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
 
     switchedOut = false;
 
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index e303f1cee..eb04ca733 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -168,15 +168,14 @@ template <class Impl>
 void
 DefaultRename<Impl>::setCPU(O3CPU *cpu_ptr)
 {
-    DPRINTF(Rename, "Setting CPU pointer.\n");
     cpu = cpu_ptr;
+    DPRINTF(Rename, "Setting CPU pointer.\n");
 }
 
 template <class Impl>
 void
 DefaultRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(Rename, "Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to read information from time buffer, from IEW stage.
@@ -193,7 +192,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
 {
-    DPRINTF(Rename, "Setting rename queue pointer.\n");
     renameQueue = rq_ptr;
 
     // Setup wire to write information to future stages.
@@ -204,7 +202,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
 {
-    DPRINTF(Rename, "Setting decode queue pointer.\n");
     decodeQueue = dq_ptr;
 
     // Setup wire to get information from decode.
@@ -228,7 +225,6 @@ template<class Impl>
 void
 DefaultRename<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Rename, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -237,8 +233,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setRenameMap(RenameMap rm_ptr[])
 {
-    DPRINTF(Rename, "Setting rename map pointers.\n");
-
     for (int i=0; i<numThreads; i++) {
         renameMap[i] = &rm_ptr[i];
     }
@@ -248,7 +242,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setFreeList(FreeList *fl_ptr)
 {
-    DPRINTF(Rename, "Setting free list pointer.\n");
     freeList = fl_ptr;
 }
 
@@ -256,7 +249,6 @@ template<class Impl>
 void
 DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
 {
-    DPRINTF(Rename, "Setting scoreboard pointer.\n");
     scoreboard = _scoreboard;
 }
 
diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh
index fde636754..975aba379 100644
--- a/src/cpu/o3/rob_impl.hh
+++ b/src/cpu/o3/rob_impl.hh
@@ -66,7 +66,7 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
 
     } else if (policy == "partitioned") {
         robPolicy = Partitioned;
-        DPRINTF(Fetch, "ROB sharing policy set to Partitioned\n");
+//	DPRINTF(Fetch, "ROB sharing policy set to Partitioned\n");
 
         //@todo:make work if part_amt doesnt divide evenly.
         int part_amt = numEntries / numThreads;
@@ -78,7 +78,7 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
 
     } else if (policy == "threshold") {
         robPolicy = Threshold;
-        DPRINTF(Fetch, "ROB sharing policy set to Threshold\n");
+//	DPRINTF(Fetch, "ROB sharing policy set to Threshold\n");
 
         int threshold =  _smtROBThreshold;;
 
diff --git a/src/cpu/o3/sparc/cpu_builder.cc b/src/cpu/o3/sparc/cpu_builder.cc
index 3cac89bad..35badce2c 100644
--- a/src/cpu/o3/sparc/cpu_builder.cc
+++ b/src/cpu/o3/sparc/cpu_builder.cc
@@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
     Param<int> clock;
     Param<int> phase;
     Param<int> numThreads;
+    Param<int> cpu_id;
     Param<int> activity;
 
 #if FULL_SYSTEM
     SimObjectParam<System *> system;
-    Param<int> cpu_id;
     SimObjectParam<SparcISA::ITB *> itb;
     SimObjectParam<SparcISA::DTB *> dtb;
     Param<Tick> profile;
@@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
     INIT_PARAM(clock, "clock speed"),
     INIT_PARAM_DFLT(phase, "clock phase", 0),
     INIT_PARAM(numThreads, "number of HW thread contexts"),
+    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM_DFLT(activity, "Initial activity count", 0),
 
 #if FULL_SYSTEM
     INIT_PARAM(system, "System object"),
-    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(itb, "Instruction translation buffer"),
     INIT_PARAM(dtb, "Data translation buffer"),
     INIT_PARAM(profile, ""),
@@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU)
     SparcSimpleParams *params = new SparcSimpleParams;
 
     params->clock = clock;
+    params->phase = phase;
 
     params->name = getInstanceName();
     params->numberOfThreads = actual_num_threads;
+    params->cpu_id = cpu_id;
     params->activity = activity;
 
 #if FULL_SYSTEM
     params->system = system;
-    params->cpu_id = cpu_id;
     params->itb = itb;
     params->dtb = dtb;
     params->profile = profile;
diff --git a/src/dev/i8254xGBe.cc b/src/dev/i8254xGBe.cc
index c38a9e873..3d08bca1e 100644
--- a/src/dev/i8254xGBe.cc
+++ b/src/dev/i8254xGBe.cc
@@ -55,10 +55,10 @@ using namespace iGbReg;
 using namespace Net;
 
 IGbE::IGbE(Params *p)
-    : PciDev(p), etherInt(NULL),  useFlowControl(p->use_flow_control),
+    : PciDev(p), etherInt(NULL),  drainEvent(NULL), useFlowControl(p->use_flow_control),
       rxFifo(p->rx_fifo_size), txFifo(p->tx_fifo_size), rxTick(false),
-      txTick(false), rdtrEvent(this), radvEvent(this), tadvEvent(this),
-      tidvEvent(this), tickEvent(this), interEvent(this),
+      txTick(false), txFifoTick(false), rdtrEvent(this), radvEvent(this),
+      tadvEvent(this), tidvEvent(this), tickEvent(this), interEvent(this),
       rxDescCache(this, name()+".RxDesc", p->rx_desc_cache_size),
       txDescCache(this, name()+".TxDesc", p->tx_desc_cache_size), clock(p->clock)
 {
@@ -223,6 +223,7 @@ IGbE::read(PacketPtr pkt)
         pkt->set<uint32_t>(regs.rdtr());
         if (regs.rdtr.fpd()) {
             rxDescCache.writeback(0);
+            DPRINTF(EthernetIntr, "Posting interrupt because of RDTR.FPD write\n");
             postInterrupt(IT_RXT);
             regs.rdtr.fpd(0);
         }
@@ -411,6 +412,7 @@ IGbE::write(PacketPtr pkt)
         regs.itr = val;
         break;
       case REG_ICS:
+        DPRINTF(EthernetIntr, "Posting interrupt because of ICS write\n");
         postInterrupt((IntTypes)val);
         break;
        case REG_IMS:
@@ -429,6 +431,7 @@ IGbE::write(PacketPtr pkt)
         regs.rctl = val;
         if (regs.rctl.rst()) {
             rxDescCache.reset();
+            DPRINTF(EthernetSM, "RXS: Got RESET!\n");
             rxFifo.clear();
             regs.rctl.rst(0);
         }
@@ -568,8 +571,8 @@ IGbE::postInterrupt(IntTypes t, bool now)
         } else {
            DPRINTF(EthernetIntr, "EINT: Scheduling timer interrupt for %d ticks\n",
                     Clock::Int::ns * 256 * regs.itr.interval());
-           assert(!interEvent.scheduled());
-           interEvent.schedule(curTick + Clock::Int::ns * 256 * regs.itr.interval());
+           if (!interEvent.scheduled())
+               interEvent.schedule(curTick + Clock::Int::ns * 256 * regs.itr.interval());
         }
     }
 }
@@ -676,39 +679,39 @@ IGbE::RxDescCache::pktComplete()
     // no support for anything but starting at 0
     assert(igbe->regs.rxcsum.pcss() == 0);
 
-    DPRINTF(EthernetDesc, "RxDesc: Packet written to memory updating Descriptor\n");
+    DPRINTF(EthernetDesc, "Packet written to memory updating Descriptor\n");
 
     uint8_t status = RXDS_DD | RXDS_EOP;
     uint8_t err = 0;
     IpPtr ip(pktPtr);
     if (ip) {
         if (igbe->regs.rxcsum.ipofld()) {
-            DPRINTF(EthernetDesc, "RxDesc: Checking IP checksum\n");
+            DPRINTF(EthernetDesc, "Checking IP checksum\n");
             status |= RXDS_IPCS;
             desc->csum = htole(cksum(ip));
             if (cksum(ip) != 0) {
                 err |= RXDE_IPE;
-                DPRINTF(EthernetDesc, "RxDesc: Checksum is bad!!\n");
+                DPRINTF(EthernetDesc, "Checksum is bad!!\n");
             }
         }
         TcpPtr tcp(ip);
         if (tcp && igbe->regs.rxcsum.tuofld()) {
-            DPRINTF(EthernetDesc, "RxDesc: Checking TCP checksum\n");
+            DPRINTF(EthernetDesc, "Checking TCP checksum\n");
             status |= RXDS_TCPCS;
             desc->csum = htole(cksum(tcp));
             if (cksum(tcp) != 0) {
-                DPRINTF(EthernetDesc, "RxDesc: Checksum is bad!!\n");
+                DPRINTF(EthernetDesc, "Checksum is bad!!\n");
                 err |= RXDE_TCPE;
             }
         }
 
         UdpPtr udp(ip);
         if (udp && igbe->regs.rxcsum.tuofld()) {
-            DPRINTF(EthernetDesc, "RxDesc: Checking UDP checksum\n");
+            DPRINTF(EthernetDesc, "Checking UDP checksum\n");
             status |= RXDS_UDPCS;
             desc->csum = htole(cksum(udp));
             if (cksum(tcp) != 0) {
-                DPRINTF(EthernetDesc, "RxDesc: Checksum is bad!!\n");
+                DPRINTF(EthernetDesc, "Checksum is bad!!\n");
                 err |= RXDE_TCPE;
             }
         }
@@ -748,15 +751,18 @@ IGbE::RxDescCache::pktComplete()
 
     // If the packet is small enough, interrupt appropriately
     // I wonder if this is delayed or not?!
-    if (pktPtr->length <= igbe->regs.rsrpd.idv())
+    if (pktPtr->length <= igbe->regs.rsrpd.idv()) {
+        DPRINTF(EthernetSM, "RXS: Posting IT_SRPD beacuse small packet received\n");
         igbe->postInterrupt(IT_SRPD);
+    }
 
-    DPRINTF(EthernetDesc, "RxDesc: Processing of this descriptor complete\n");
+    DPRINTF(EthernetDesc, "Processing of this descriptor complete\n");
     unusedCache.pop_front();
     usedCache.push_back(desc);
     pktPtr = NULL;
     enableSm();
     pktDone = true;
+    igbe->checkDrain();
 }
 
 void
@@ -776,11 +782,33 @@ IGbE::RxDescCache::packetDone()
     return false;
 }
 
+bool
+IGbE::RxDescCache::hasOutstandingEvents()
+{
+    return pktEvent.scheduled() || wbEvent.scheduled() ||
+        fetchEvent.scheduled();
+}
+
+void
+IGbE::RxDescCache::serialize(std::ostream &os)
+{
+    DescCache<RxDesc>::serialize(os);
+    SERIALIZE_SCALAR(pktDone);
+}
+
+void
+IGbE::RxDescCache::unserialize(Checkpoint *cp, const std::string &section)
+{
+    DescCache<RxDesc>::unserialize(cp, section);
+    UNSERIALIZE_SCALAR(pktDone);
+}
+
+
 ///////////////////////////////////// IGbE::TxDesc /////////////////////////////////
 
 IGbE::TxDescCache::TxDescCache(IGbE *i, const std::string n, int s)
     : DescCache<TxDesc>(i,n, s), pktDone(false), isTcp(false), pktWaiting(false),
-      hLen(0), pktEvent(this)
+       pktEvent(this)
 
 {
 }
@@ -792,10 +820,10 @@ IGbE::TxDescCache::getPacketSize()
 
     TxDesc *desc;
 
-    DPRINTF(EthernetDesc, "TxDesc: Starting processing of descriptor\n");
+    DPRINTF(EthernetDesc, "Starting processing of descriptor\n");
 
     while (unusedCache.size() && TxdOp::isContext(unusedCache.front())) {
-        DPRINTF(EthernetDesc, "TxDesc: Got context descriptor type... skipping\n");
+        DPRINTF(EthernetDesc, "Got context descriptor type... skipping\n");
 
         // I think we can just ignore these for now?
         desc = unusedCache.front();
@@ -813,7 +841,7 @@ IGbE::TxDescCache::getPacketSize()
     if (!unusedCache.size())
         return -1;
 
-    DPRINTF(EthernetDesc, "TxDesc: Next TX packet is %d bytes\n",
+    DPRINTF(EthernetDesc, "Next TX packet is %d bytes\n",
             TxdOp::getLen(unusedCache.front()));
 
     return TxdOp::getLen(unusedCache.front());
@@ -833,9 +861,9 @@ IGbE::TxDescCache::getPacketData(EthPacketPtr p)
 
     pktWaiting = true;
 
-    DPRINTF(EthernetDesc, "TxDesc: Starting DMA of packet\n");
+    DPRINTF(EthernetDesc, "Starting DMA of packet\n");
     igbe->dmaRead(igbe->platform->pciToDma(TxdOp::getBuf(desc)),
-            TxdOp::getLen(desc), &pktEvent, p->data + hLen);
+            TxdOp::getLen(desc), &pktEvent, p->data + p->length);
 
 
 }
@@ -848,7 +876,7 @@ IGbE::TxDescCache::pktComplete()
     assert(unusedCache.size());
     assert(pktPtr);
 
-    DPRINTF(EthernetDesc, "TxDesc: DMA of packet complete\n");
+    DPRINTF(EthernetDesc, "DMA of packet complete\n");
 
 
     desc = unusedCache.front();
@@ -857,20 +885,21 @@ IGbE::TxDescCache::pktComplete()
     DPRINTF(EthernetDesc, "TxDescriptor data d1: %#llx d2: %#llx\n", desc->d1, desc->d2);
 
     if (!TxdOp::eop(desc)) {
-        assert(hLen == 0);
-        hLen = TxdOp::getLen(desc);
+        // This only supports two descriptors per tx packet
+        assert(pktPtr->length == 0);
+        pktPtr->length = TxdOp::getLen(desc);
         unusedCache.pop_front();
         usedCache.push_back(desc);
         pktDone = true;
         pktWaiting = false;
         pktPtr = NULL;
 
-        DPRINTF(EthernetDesc, "TxDesc: Partial Packet Descriptor Done\n");
+        DPRINTF(EthernetDesc, "Partial Packet Descriptor Done\n");
         return;
     }
 
     // Set the length of the data in the EtherPacket
-    pktPtr->length = TxdOp::getLen(desc) + hLen;
+    pktPtr->length += TxdOp::getLen(desc);
 
     // no support for vlans
     assert(!TxdOp::vle(desc));
@@ -888,33 +917,33 @@ IGbE::TxDescCache::pktComplete()
 
     // Checksums are only ofloaded for new descriptor types
     if (TxdOp::isData(desc) && ( TxdOp::ixsm(desc) || TxdOp::txsm(desc)) ) {
-        DPRINTF(EthernetDesc, "TxDesc: Calculating checksums for packet\n");
+        DPRINTF(EthernetDesc, "Calculating checksums for packet\n");
         IpPtr ip(pktPtr);
         if (TxdOp::ixsm(desc)) {
             ip->sum(0);
             ip->sum(cksum(ip));
-            DPRINTF(EthernetDesc, "TxDesc: Calculated IP checksum\n");
+            DPRINTF(EthernetDesc, "Calculated IP checksum\n");
         }
        if (TxdOp::txsm(desc)) {
            if (isTcp) {
                 TcpPtr tcp(ip);
                 tcp->sum(0);
                 tcp->sum(cksum(tcp));
-                DPRINTF(EthernetDesc, "TxDesc: Calculated TCP checksum\n");
+                DPRINTF(EthernetDesc, "Calculated TCP checksum\n");
            } else {
                 UdpPtr udp(ip);
                 udp->sum(0);
                 udp->sum(cksum(udp));
-                DPRINTF(EthernetDesc, "TxDesc: Calculated UDP checksum\n");
+                DPRINTF(EthernetDesc, "Calculated UDP checksum\n");
            }
         }
     }
 
     if (TxdOp::ide(desc)) {
         // Deal with the rx timer interrupts
-        DPRINTF(EthernetDesc, "TxDesc: Descriptor had IDE set\n");
+        DPRINTF(EthernetDesc, "Descriptor had IDE set\n");
         if (igbe->regs.tidv.idv()) {
-            DPRINTF(EthernetDesc, "TxDesc: setting tidv\n");
+            DPRINTF(EthernetDesc, "setting tidv\n");
             if (igbe->tidvEvent.scheduled())
                 igbe->tidvEvent.reschedule(curTick + igbe->regs.tidv.idv() *
                         igbe->intClock());
@@ -924,7 +953,7 @@ IGbE::TxDescCache::pktComplete()
         }
 
         if (igbe->regs.tadv.idv() && igbe->regs.tidv.idv()) {
-            DPRINTF(EthernetDesc, "TxDesc: setting tadv\n");
+            DPRINTF(EthernetDesc, "setting tadv\n");
             if (!igbe->tadvEvent.scheduled())
                 igbe->tadvEvent.schedule(curTick + igbe->regs.tadv.idv() *
                         igbe->intClock());
@@ -939,17 +968,34 @@ IGbE::TxDescCache::pktComplete()
     pktWaiting = false;
     pktPtr = NULL;
 
-    hLen = 0;
-    DPRINTF(EthernetDesc, "TxDesc: Descriptor Done\n");
+    DPRINTF(EthernetDesc, "Descriptor Done\n");
 
     if (igbe->regs.txdctl.wthresh() == 0) {
-        DPRINTF(EthernetDesc, "TxDesc: WTHRESH == 0, writing back descriptor\n");
+        DPRINTF(EthernetDesc, "WTHRESH == 0, writing back descriptor\n");
         writeback(0);
     } else if (igbe->regs.txdctl.wthresh() >= usedCache.size()) {
-        DPRINTF(EthernetDesc, "TxDesc: used > WTHRESH, writing back descriptor\n");
+        DPRINTF(EthernetDesc, "used > WTHRESH, writing back descriptor\n");
         writeback((igbe->cacheBlockSize()-1)>>4);
     }
+    igbe->checkDrain();
+}
 
+void
+IGbE::TxDescCache::serialize(std::ostream &os)
+{
+    DescCache<TxDesc>::serialize(os);
+    SERIALIZE_SCALAR(pktDone);
+    SERIALIZE_SCALAR(isTcp);
+    SERIALIZE_SCALAR(pktWaiting);
+}
+
+void
+IGbE::TxDescCache::unserialize(Checkpoint *cp, const std::string &section)
+{
+    DescCache<TxDesc>::unserialize(cp, section);
+    UNSERIALIZE_SCALAR(pktDone);
+    UNSERIALIZE_SCALAR(isTcp);
+    UNSERIALIZE_SCALAR(pktWaiting);
 }
 
 bool
@@ -969,7 +1015,12 @@ IGbE::TxDescCache::enableSm()
     igbe->restartClock();
 }
 
-
+bool
+IGbE::TxDescCache::hasOutstandingEvents()
+{
+    return pktEvent.scheduled() || wbEvent.scheduled() ||
+        fetchEvent.scheduled();
+}
 
 
 ///////////////////////////////////// IGbE /////////////////////////////////
@@ -977,10 +1028,61 @@ IGbE::TxDescCache::enableSm()
 void
 IGbE::restartClock()
 {
-    if (!tickEvent.scheduled() && (rxTick || txTick))
+    if (!tickEvent.scheduled() && (rxTick || txTick) && getState() ==
+            SimObject::Running)
         tickEvent.schedule((curTick/cycles(1)) * cycles(1) + cycles(1));
 }
 
+unsigned int
+IGbE::drain(Event *de)
+{
+    unsigned int count;
+    count = pioPort->drain(de) + dmaPort->drain(de);
+    if (rxDescCache.hasOutstandingEvents() ||
+            txDescCache.hasOutstandingEvents()) {
+        count++;
+        drainEvent = de;
+    }
+
+    txFifoTick = false;
+    txTick = false;
+    rxTick = false;
+
+    if (tickEvent.scheduled())
+        tickEvent.deschedule();
+
+    if (count)
+        changeState(Draining);
+    else
+        changeState(Drained);
+
+    return count;
+}
+
+void
+IGbE::resume()
+{
+    SimObject::resume();
+
+    txFifoTick = true;
+    txTick = true;
+    rxTick = true;
+
+    restartClock();
+}
+
+void
+IGbE::checkDrain()
+{
+    if (!drainEvent)
+        return;
+
+    if (rxDescCache.hasOutstandingEvents() ||
+            txDescCache.hasOutstandingEvents()) {
+        drainEvent->process();
+        drainEvent = NULL;
+    }
+}
 
 void
 IGbE::txStateMachine()
@@ -998,8 +1100,10 @@ IGbE::txStateMachine()
         bool success;
         DPRINTF(EthernetSM, "TXS: packet placed in TX FIFO\n");
         success = txFifo.push(txPacket);
+        txFifoTick = true;
         assert(success);
         txPacket = NULL;
+        txDescCache.writeback((cacheBlockSize()-1)>>4);
         return;
     }
 
@@ -1021,6 +1125,7 @@ IGbE::txStateMachine()
             txDescCache.writeback(0);
             txTick = false;
             postInterrupt(IT_TXQE, true);
+            return;
         }
 
 
@@ -1038,11 +1143,17 @@ IGbE::txStateMachine()
                     "DMA of next packet\n", size);
             txFifo.reserve(size);
             txDescCache.getPacketData(txPacket);
-        } else {
+        } else if (size <= 0) {
             DPRINTF(EthernetSM, "TXS: No packets to get, writing back used descriptors\n");
             txDescCache.writeback(0);
+        } else {
+            DPRINTF(EthernetSM, "TXS: FIFO full, stopping ticking until space "
+                    "available in FIFO\n");
+            txDescCache.writeback((cacheBlockSize()-1)>>4);
+            txTick = false;
         }
 
+
         return;
     }
 }
@@ -1095,9 +1206,9 @@ IGbE::rxStateMachine()
         }
 
         if (descLeft == 0) {
-            DPRINTF(EthernetSM, "RXS: No descriptors left in ring, forcing writeback\n");
+            DPRINTF(EthernetSM, "RXS: No descriptors left in ring, forcing"
+                    " writeback and stopping ticking\n");
             rxDescCache.writeback(0);
-            DPRINTF(EthernetSM, "RXS: No descriptors left, stopping ticking\n");
             rxTick = false;
         }
 
@@ -1119,9 +1230,9 @@ IGbE::rxStateMachine()
         }
 
         if (rxDescCache.descUnused() == 0) {
-            DPRINTF(EthernetSM, "RXS: No descriptors available in cache, stopping ticking\n");
+            DPRINTF(EthernetSM, "RXS: No descriptors available in cache, "
+                    "fetching descriptors and stopping ticking\n");
             rxTick = false;
-            DPRINTF(EthernetSM, "RXS: Fetching descriptors because none available\n");
             rxDescCache.fetchDescriptors();
         }
         return;
@@ -1159,15 +1270,18 @@ void
 IGbE::txWire()
 {
     if (txFifo.empty()) {
+        txFifoTick = false;
         return;
     }
 
-    txTick = true;
 
     if (etherInt->sendPacket(txFifo.front())) {
-        DPRINTF(Ethernet, "TxFIFO: Successful transmit, bytes in fifo: %d\n",
+        DPRINTF(EthernetSM, "TxFIFO: Successful transmit, bytes available in fifo: %d\n",
                 txFifo.avail());
         txFifo.pop();
+    } else {
+        // We'll get woken up when the packet ethTxDone() gets called
+        txFifoTick = false;
     }
 
 }
@@ -1180,34 +1294,133 @@ IGbE::tick()
     if (rxTick)
         rxStateMachine();
 
-    if (txTick) {
+    if (txTick)
         txStateMachine();
+
+    if (txFifoTick)
         txWire();
-    }
 
-    if (rxTick || txTick)
+
+    if (rxTick || txTick || txFifoTick)
         tickEvent.schedule(curTick + cycles(1));
 }
 
 void
 IGbE::ethTxDone()
 {
-    // restart the state machines if they are stopped
+    // restart the tx state machines if they are stopped
+    // fifo to send another packet
+    // tx sm to put more data into the fifo
+    txFifoTick = true;
     txTick = true;
+
     restartClock();
-    DPRINTF(Ethernet, "TxFIFO: Transmission complete\n");
+    DPRINTF(EthernetSM, "TxFIFO: Transmission complete\n");
 }
 
 void
 IGbE::serialize(std::ostream &os)
 {
-    panic("Need to implemenet\n");
+    PciDev::serialize(os);
+
+    regs.serialize(os);
+    SERIALIZE_SCALAR(eeOpBits);
+    SERIALIZE_SCALAR(eeAddrBits);
+    SERIALIZE_SCALAR(eeDataBits);
+    SERIALIZE_SCALAR(eeOpcode);
+    SERIALIZE_SCALAR(eeAddr);
+    SERIALIZE_ARRAY(flash,iGbReg::EEPROM_SIZE);
+
+    rxFifo.serialize("rxfifo", os);
+    txFifo.serialize("txfifo", os);
+
+    bool txPktExists = txPacket;
+    SERIALIZE_SCALAR(txPktExists);
+    if (txPktExists)
+        txPacket->serialize("txpacket", os);
+
+    Tick rdtr_time = 0, radv_time = 0, tidv_time = 0, tadv_time = 0,
+         inter_time = 0;
+
+    if (rdtrEvent.scheduled())
+       rdtr_time = rdtrEvent.when();
+    SERIALIZE_SCALAR(rdtr_time);
+
+    if (radvEvent.scheduled())
+       radv_time = radvEvent.when();
+    SERIALIZE_SCALAR(radv_time);
+
+    if (tidvEvent.scheduled())
+       rdtr_time = tidvEvent.when();
+    SERIALIZE_SCALAR(tidv_time);
+
+    if (tadvEvent.scheduled())
+       rdtr_time = tadvEvent.when();
+    SERIALIZE_SCALAR(tadv_time);
+
+    if (interEvent.scheduled())
+       rdtr_time = interEvent.when();
+    SERIALIZE_SCALAR(inter_time);
+
+    nameOut(os, csprintf("%s.TxDescCache", name()));
+    txDescCache.serialize(os);
+
+    nameOut(os, csprintf("%s.RxDescCache", name()));
+    rxDescCache.serialize(os);
 }
 
 void
 IGbE::unserialize(Checkpoint *cp, const std::string &section)
 {
-    panic("Need to implemenet\n");
+    PciDev::unserialize(cp, section);
+
+    regs.unserialize(cp, section);
+    UNSERIALIZE_SCALAR(eeOpBits);
+    UNSERIALIZE_SCALAR(eeAddrBits);
+    UNSERIALIZE_SCALAR(eeDataBits);
+    UNSERIALIZE_SCALAR(eeOpcode);
+    UNSERIALIZE_SCALAR(eeAddr);
+    UNSERIALIZE_ARRAY(flash,iGbReg::EEPROM_SIZE);
+
+    rxFifo.unserialize("rxfifo", cp, section);
+    txFifo.unserialize("txfifo", cp, section);
+
+    bool txPktExists;
+    UNSERIALIZE_SCALAR(txPktExists);
+    if (txPktExists) {
+        txPacket = new EthPacketData(16384);
+        txPacket->unserialize("txpacket", cp, section);
+    }
+
+    rxTick = true;
+    txTick = true;
+    txFifoTick = true;
+
+    Tick rdtr_time, radv_time, tidv_time, tadv_time, inter_time;
+    UNSERIALIZE_SCALAR(rdtr_time);
+    UNSERIALIZE_SCALAR(radv_time);
+    UNSERIALIZE_SCALAR(tidv_time);
+    UNSERIALIZE_SCALAR(tadv_time);
+    UNSERIALIZE_SCALAR(inter_time);
+
+    if (rdtr_time)
+        rdtrEvent.schedule(rdtr_time);
+
+    if (radv_time)
+        radvEvent.schedule(radv_time);
+
+    if (tidv_time)
+        tidvEvent.schedule(tidv_time);
+
+    if (tadv_time)
+        tadvEvent.schedule(tadv_time);
+
+    if (inter_time)
+        interEvent.schedule(inter_time);
+
+    txDescCache.unserialize(cp, csprintf("%s.TxDescCache", section));
+
+    rxDescCache.unserialize(cp, csprintf("%s.RxDescCache", section));
 }
 
 
diff --git a/src/dev/i8254xGBe.hh b/src/dev/i8254xGBe.hh
index a2b9f38d5..2dec3b08c 100644
--- a/src/dev/i8254xGBe.hh
+++ b/src/dev/i8254xGBe.hh
@@ -62,8 +62,10 @@ class IGbE : public PciDev
     uint8_t eeOpcode, eeAddr;
     uint16_t flash[iGbReg::EEPROM_SIZE];
 
+    // The drain event if we have one
+    Event *drainEvent;
+
     // cached parameters from params struct
-    Tick tickRate;
     bool useFlowControl;
 
     // packet fifos
@@ -76,24 +78,44 @@ class IGbE : public PciDev
     // Should to Rx/Tx State machine tick?
     bool rxTick;
     bool txTick;
+    bool txFifoTick;
 
     // Event and function to deal with RDTR timer expiring
-    void rdtrProcess() { rxDescCache.writeback(0); postInterrupt(iGbReg::IT_RXT, true); }
+    void rdtrProcess() {
+        rxDescCache.writeback(0);
+        DPRINTF(EthernetIntr, "Posting RXT interrupt because RDTR timer expired\n");
+        postInterrupt(iGbReg::IT_RXT, true);
+    }
+
     //friend class EventWrapper<IGbE, &IGbE::rdtrProcess>;
     EventWrapper<IGbE, &IGbE::rdtrProcess> rdtrEvent;
 
     // Event and function to deal with RADV timer expiring
-    void radvProcess() { rxDescCache.writeback(0); postInterrupt(iGbReg::IT_RXT, true); }
+    void radvProcess() {
+        rxDescCache.writeback(0);
+        DPRINTF(EthernetIntr, "Posting RXT interrupt because RADV timer expired\n");
+        postInterrupt(iGbReg::IT_RXT, true);
+    }
+
     //friend class EventWrapper<IGbE, &IGbE::radvProcess>;
     EventWrapper<IGbE, &IGbE::radvProcess> radvEvent;
 
     // Event and function to deal with TADV timer expiring
-    void tadvProcess() { postInterrupt(iGbReg::IT_TXDW, true); }
+    void tadvProcess() {
+        txDescCache.writeback(0);
+        DPRINTF(EthernetIntr, "Posting TXDW interrupt because TADV timer expired\n");
+        postInterrupt(iGbReg::IT_TXDW, true);
+    }
+
     //friend class EventWrapper<IGbE, &IGbE::tadvProcess>;
     EventWrapper<IGbE, &IGbE::tadvProcess> tadvEvent;
 
     // Event and function to deal with TIDV timer expiring
-    void tidvProcess() { postInterrupt(iGbReg::IT_TXDW, true); };
+    void tidvProcess() {
+        txDescCache.writeback(0);
+        DPRINTF(EthernetIntr, "Posting TXDW interrupt because TIDV timer expired\n");
+        postInterrupt(iGbReg::IT_TXDW, true);
+    }
     //friend class EventWrapper<IGbE, &IGbE::tidvProcess>;
     EventWrapper<IGbE, &IGbE::tidvProcess> tidvEvent;
 
@@ -131,8 +153,15 @@ class IGbE : public PciDev
 
     Tick intClock() { return Clock::Int::ns * 1024; }
 
+    /** This function is used to restart the clock so it can handle things like
+     * draining and resume in one place. */
     void restartClock();
 
+    /** Check if all the draining things that need to occur have occured and
+     * handle the drain event if so.
+     */
+    void checkDrain();
+
     template<class T>
     class DescCache
     {
@@ -202,8 +231,10 @@ class IGbE : public PciDev
          */
         void areaChanged()
         {
-            if (usedCache.size() > 0 || unusedCache.size() > 0)
+            if (usedCache.size() > 0 || curFetching || wbOut)
                 panic("Descriptor Address, Length or Head changed. Bad\n");
+            reset();
+
         }
 
         void writeback(Addr aMask)
@@ -229,7 +260,7 @@ class IGbE : public PciDev
             moreToWb = false;
             wbAlignment = aMask;
 
-            if (max_to_wb + curHead > descLen()) {
+            if (max_to_wb + curHead >= descLen()) {
                 max_to_wb = descLen() - curHead;
                 moreToWb = true;
                 // this is by definition aligned correctly
@@ -265,10 +296,14 @@ class IGbE : public PciDev
          */
         void fetchDescriptors()
         {
-            size_t max_to_fetch = descTail() - cachePnt;
-            if (max_to_fetch < 0)
+            size_t max_to_fetch;
+
+            if (descTail() >= cachePnt)
+                max_to_fetch = descTail() - cachePnt;
+            else
                 max_to_fetch = descLen() - cachePnt;
 
+
             max_to_fetch = std::min(max_to_fetch, (size - usedCache.size() -
                         unusedCache.size()));
 
@@ -311,8 +346,9 @@ class IGbE : public PciDev
 #endif
 
             cachePnt += curFetching;
-            if (cachePnt > descLen())
-                cachePnt -= descLen();
+            assert(cachePnt <= descLen());
+            if (cachePnt == descLen())
+                cachePnt = 0;
 
             curFetching = 0;
 
@@ -320,7 +356,7 @@ class IGbE : public PciDev
                     oldCp, cachePnt);
 
             enableSm();
-
+            igbe->checkDrain();
         }
 
         EventWrapper<DescCache, &DescCache::fetchComplete> fetchEvent;
@@ -337,8 +373,8 @@ class IGbE : public PciDev
             curHead += wbOut;
             wbOut = 0;
 
-            if (curHead > descLen())
-                curHead = 0;
+            if (curHead >= descLen())
+                curHead -= descLen();
 
             // Update the head
             updateHead(curHead);
@@ -352,6 +388,7 @@ class IGbE : public PciDev
                 writeback(wbAlignment);
             }
             intAfterWb();
+            igbe->checkDrain();
         }
 
 
@@ -390,6 +427,63 @@ class IGbE : public PciDev
 
             usedCache.clear();
             unusedCache.clear();
+
+            cachePnt = 0;
+
+        }
+
+        virtual void serialize(std::ostream &os)
+        {
+            SERIALIZE_SCALAR(cachePnt);
+            SERIALIZE_SCALAR(curFetching);
+            SERIALIZE_SCALAR(wbOut);
+            SERIALIZE_SCALAR(moreToWb);
+            SERIALIZE_SCALAR(wbAlignment);
+
+            int usedCacheSize = usedCache.size();
+            SERIALIZE_SCALAR(usedCacheSize);
+            for(int x = 0; x < usedCacheSize; x++) {
+                arrayParamOut(os, csprintf("usedCache_%d", x),
+                        (uint8_t*)usedCache[x],sizeof(T));
+            }
+
+            int unusedCacheSize = unusedCache.size();
+            SERIALIZE_SCALAR(unusedCacheSize);
+            for(int x = 0; x < unusedCacheSize; x++) {
+                arrayParamOut(os, csprintf("unusedCache_%d", x),
+                        (uint8_t*)unusedCache[x],sizeof(T));
+            }
+        }
+
+        virtual void unserialize(Checkpoint *cp, const std::string &section)
+        {
+            UNSERIALIZE_SCALAR(cachePnt);
+            UNSERIALIZE_SCALAR(curFetching);
+            UNSERIALIZE_SCALAR(wbOut);
+            UNSERIALIZE_SCALAR(moreToWb);
+            UNSERIALIZE_SCALAR(wbAlignment);
+
+            int usedCacheSize;
+            UNSERIALIZE_SCALAR(usedCacheSize);
+            T *temp;
+            for(int x = 0; x < usedCacheSize; x++) {
+                temp = new T;
+                arrayParamIn(cp, section, csprintf("usedCache_%d", x),
+                        (uint8_t*)temp,sizeof(T));
+                usedCache.push_back(temp);
+            }
+
+            int unusedCacheSize;
+            UNSERIALIZE_SCALAR(unusedCacheSize);
+            for(int x = 0; x < unusedCacheSize; x++) {
+                temp = new T;
+                arrayParamIn(cp, section, csprintf("unusedCache_%d", x),
+                        (uint8_t*)temp,sizeof(T));
+                unusedCache.push_back(temp);
+            }
+        }
+        virtual bool hasOutstandingEvents() {
+            return wbEvent.scheduled() || fetchEvent.scheduled();
         }
 
      };
@@ -428,6 +522,10 @@ class IGbE : public PciDev
 
         EventWrapper<RxDescCache, &RxDescCache::pktComplete> pktEvent;
 
+        virtual bool hasOutstandingEvents();
+
+        virtual void serialize(std::ostream &os);
+        virtual void unserialize(Checkpoint *cp, const std::string &section);
     };
     friend class RxDescCache;
 
@@ -447,7 +545,6 @@ class IGbE : public PciDev
         bool pktDone;
         bool isTcp;
         bool pktWaiting;
-        int hLen;
 
       public:
         TxDescCache(IGbE *i, std::string n, int s);
@@ -475,6 +572,11 @@ class IGbE : public PciDev
         void pktComplete();
         EventWrapper<TxDescCache, &TxDescCache::pktComplete> pktEvent;
 
+        virtual bool hasOutstandingEvents();
+
+        virtual void serialize(std::ostream &os);
+        virtual void unserialize(Checkpoint *cp, const std::string &section);
+
     };
     friend class TxDescCache;
 
@@ -513,7 +615,8 @@ class IGbE : public PciDev
 
     virtual void serialize(std::ostream &os);
     virtual void unserialize(Checkpoint *cp, const std::string &section);
-
+    virtual unsigned int drain(Event *de);
+    virtual void resume();
 
 };
 
diff --git a/src/dev/i8254xGBe_defs.hh b/src/dev/i8254xGBe_defs.hh
index 8538c155b..91b3eacc9 100644
--- a/src/dev/i8254xGBe_defs.hh
+++ b/src/dev/i8254xGBe_defs.hh
@@ -162,7 +162,7 @@ struct TxDesc {
 
 namespace TxdOp {
 const uint8_t TXD_CNXT = 0x0;
-const uint8_t TXD_DATA = 0x0;
+const uint8_t TXD_DATA = 0x1;
 
 bool isLegacy(TxDesc *d) { return !bits(d->d2,29,29); }
 uint8_t getType(TxDesc *d) { return bits(d->d2, 23,20); }
@@ -220,6 +220,14 @@ struct Regs {
         bool operator==(T d) { return d == _data; }
         void operator()(T d) { _data = d; }
         Reg() { _data = 0; }
+        void serialize(std::ostream &os)
+        {
+            SERIALIZE_SCALAR(_data);
+        }
+        void unserialize(Checkpoint *cp, const std::string &section)
+        {
+            UNSERIALIZE_SCALAR(_data);
+        }
     };
 
     struct CTRL : public Reg<uint32_t> { // 0x0000 CTRL Register
@@ -595,6 +603,79 @@ struct Regs {
         ADD_FIELD32(smbclkout,30,1); // smb clock out
     };
     MANC manc;
-};
 
-}; // iGbReg namespace
+    void serialize(std::ostream &os)
+    {
+        paramOut(os, "ctrl", ctrl._data);
+        paramOut(os, "sts", sts._data);
+        paramOut(os, "eecd", eecd._data);
+        paramOut(os, "eerd", eerd._data);
+        paramOut(os, "ctrl_ext", ctrl_ext._data);
+        paramOut(os, "mdic", mdic._data);
+        paramOut(os, "icr", icr._data);
+        SERIALIZE_SCALAR(imr);
+        paramOut(os, "itr", itr._data);
+        SERIALIZE_SCALAR(iam);
+        paramOut(os, "rctl", rctl._data);
+        paramOut(os, "fcttv", fcttv._data);
+        paramOut(os, "tctl", tctl._data);
+        paramOut(os, "pba", pba._data);
+        paramOut(os, "fcrtl", fcrtl._data);
+        paramOut(os, "fcrth", fcrth._data);
+        paramOut(os, "rdba", rdba._data);
+        paramOut(os, "rdlen", rdlen._data);
+        paramOut(os, "rdh", rdh._data);
+        paramOut(os, "rdt", rdt._data);
+        paramOut(os, "rdtr", rdtr._data);
+        paramOut(os, "rxdctl", rxdctl._data);
+        paramOut(os, "radv", radv._data);
+        paramOut(os, "rsrpd", rsrpd._data);
+        paramOut(os, "tdba", tdba._data);
+        paramOut(os, "tdlen", tdlen._data);
+        paramOut(os, "tdh", tdh._data);
+        paramOut(os, "tdt", tdt._data);
+        paramOut(os, "tidv", tidv._data);
+        paramOut(os, "txdctl", txdctl._data);
+        paramOut(os, "tadv", tadv._data);
+        paramOut(os, "rxcsum", rxcsum._data);
+        paramOut(os, "manc", manc._data);
+    }
+
+    void unserialize(Checkpoint *cp, const std::string &section)
+    {
+        paramIn(cp, section, "ctrl", ctrl._data);
+        paramIn(cp, section, "sts", sts._data);
+        paramIn(cp, section, "eecd", eecd._data);
+        paramIn(cp, section, "eerd", eerd._data);
+        paramIn(cp, section, "ctrl_ext", ctrl_ext._data);
+        paramIn(cp, section, "mdic", mdic._data);
+        paramIn(cp, section, "icr", icr._data);
+        UNSERIALIZE_SCALAR(imr);
+        paramIn(cp, section, "itr", itr._data);
+        UNSERIALIZE_SCALAR(iam);
+        paramIn(cp, section, "rctl", rctl._data);
+        paramIn(cp, section, "fcttv", fcttv._data);
+        paramIn(cp, section, "tctl", tctl._data);
+        paramIn(cp, section, "pba", pba._data);
+        paramIn(cp, section, "fcrtl", fcrtl._data);
+        paramIn(cp, section, "fcrth", fcrth._data);
+        paramIn(cp, section, "rdba", rdba._data);
+        paramIn(cp, section, "rdlen", rdlen._data);
+        paramIn(cp, section, "rdh", rdh._data);
+        paramIn(cp, section, "rdt", rdt._data);
+        paramIn(cp, section, "rdtr", rdtr._data);
+        paramIn(cp, section, "rxdctl", rxdctl._data);
+        paramIn(cp, section, "radv", radv._data);
+        paramIn(cp, section, "rsrpd", rsrpd._data);
+        paramIn(cp, section, "tdba", tdba._data);
+        paramIn(cp, section, "tdlen", tdlen._data);
+        paramIn(cp, section, "tdh", tdh._data);
+        paramIn(cp, section, "tdt", tdt._data);
+        paramIn(cp, section, "tidv", tidv._data);
+        paramIn(cp, section, "txdctl", txdctl._data);
+        paramIn(cp, section, "tadv", tadv._data);
+        paramIn(cp, section, "rxcsum", rxcsum._data);
+        paramIn(cp, section, "manc", manc._data);
+    }
+};
+} // iGbReg namespace
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
index cc4477d68..4d44e14fe 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
@@ -115,7 +115,7 @@ split=false
 split_size=0
 store_compressed=false
 subblock_size=0
-tgts_per_mshr=5
+tgts_per_mshr=20
 trace_addr=0
 two_queue=false
 write_buffers=8
@@ -291,7 +291,7 @@ split=false
 split_size=0
 store_compressed=false
 subblock_size=0
-tgts_per_mshr=5
+tgts_per_mshr=20
 trace_addr=0
 two_queue=false
 write_buffers=8
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
index f50559125..686c3b2f6 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
@@ -251,7 +251,7 @@ assoc=2
 block_size=64
 latency=1
 mshrs=10
-tgts_per_mshr=5
+tgts_per_mshr=20
 write_buffers=8
 prioritizeRequests=false
 protocol=null
@@ -289,7 +289,7 @@ assoc=2
 block_size=64
 latency=1
 mshrs=10
-tgts_per_mshr=5
+tgts_per_mshr=20
 write_buffers=8
 prioritizeRequests=false
 protocol=null
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
index 4b323618c..988584966 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
@@ -1,40 +1,40 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                          669                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      2338                       # Number of BTB lookups
-global.BPredUnit.RASInCorrect                      76                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                    437                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   1559                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         5224                       # Number of BP lookups
-global.BPredUnit.usedRAS                         2821                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  12539                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 156028                       # Number of bytes of host memory used
-host_seconds                                     0.45                       # Real time elapsed on the host
-host_tick_rate                                3120138                       # Simulator tick rate (ticks/s)
+global.BPredUnit.BTBHits                          615                       # Number of BTB hits
+global.BPredUnit.BTBLookups                      1663                       # Number of BTB lookups
+global.BPredUnit.RASInCorrect                      78                       # Number of incorrect RAS predictions.
+global.BPredUnit.condIncorrect                    439                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted                   1180                       # Number of conditional branches predicted
+global.BPredUnit.lookups                         2032                       # Number of BP lookups
+global.BPredUnit.usedRAS                          304                       # Number of times the RAS was used to get a target.
+host_inst_rate                                  15105                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 154056                       # Number of bytes of host memory used
+host_seconds                                     0.37                       # Real time elapsed on the host
+host_tick_rate                                3572881                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                 24                       # Number of conflicting loads.
-memdepunit.memDep.conflictingStores                12                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  3770                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 3723                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.conflictingStores                13                       # Number of conflicting stores.
+memdepunit.memDep.insertedLoads                  2144                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 1221                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        5623                       # Number of instructions simulated
 sim_seconds                                  0.000001                       # Number of seconds simulated
-sim_ticks                                     1400134                       # Number of ticks simulated
+sim_ticks                                     1331134                       # Number of ticks simulated
 system.cpu.commit.COM:branches                    862                       # Number of branches committed
 system.cpu.commit.COM:bw_lim_events               101                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples        52214                      
+system.cpu.commit.COM:committed_per_cycle.samples        30311                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0        49499   9480.02%           
-                               1         1576    301.83%           
-                               2          483     92.50%           
-                               3          233     44.62%           
-                               4          133     25.47%           
-                               5          102     19.53%           
-                               6           60     11.49%           
-                               7           27      5.17%           
-                               8          101     19.34%           
+                               0        27595   9103.96%           
+                               1         1579    520.93%           
+                               2          482    159.02%           
+                               3          232     76.54%           
+                               4          131     43.22%           
+                               5          104     34.31%           
+                               6           60     19.79%           
+                               7           27      8.91%           
+                               8          101     33.32%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -43,70 +43,70 @@ system.cpu.commit.COM:loads                       979                       # Nu
 system.cpu.commit.COM:membars                       0                       # Number of memory barriers committed
 system.cpu.commit.COM:refs                       1791                       # Number of memory references committed
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               368                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts               370                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts           5640                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls              17                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts           13804                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts            4834                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                        5623                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                  5623                       # Number of Instructions Simulated
-system.cpu.cpi                             249.001245                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                       249.001245                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               1596                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  6986.684848                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency  6882.626263                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   1431                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        1152803                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.103383                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                  165                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                66                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       681380                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.062030                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses              99                       # number of ReadReq MSHR misses
+system.cpu.cpi                             236.730215                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                       236.730215                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               1606                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  7256.076023                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  7095.200000                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   1435                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency        1240789                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.106476                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                  171                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits                71                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency       709520                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.062267                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses             100                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses               812                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  5293.200787                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency  5141.095890                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                   558                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       1344473                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.312808                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                 254                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_hits              181                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency       375300                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_avg_miss_latency  8026.070225                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  7200.452055                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits                   456                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency       2857281                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.438424                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                 356                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_hits              283                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_miss_latency       525633                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.089901                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             73                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_blocked_cycles_no_targets  3366.930233                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  11.563953                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs                  10.930636                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
-system.cpu.dcache.blocked_no_targets               43                       # number of cycles access was blocked
+system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_targets       144778                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                2408                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  5960.085919                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  6143.488372                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1989                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         2497276                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.174003                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   419                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                247                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      1056680                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.071429                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              172                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_accesses                2418                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  7776.223909                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  7139.612717                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                    1891                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         4098070                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.217949                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   527                       # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits                354                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency      1235153                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.071547                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses              173                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               2408                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  5960.085919                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  6143.488372                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses               2418                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  7776.223909                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  7139.612717                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1989                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        2497276                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.174003                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  419                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits               247                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      1056680                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.071429                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             172                       # number of overall MSHR misses
+system.cpu.dcache.overall_hits                   1891                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        4098070                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.217949                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  527                       # number of overall misses
+system.cpu.dcache.overall_mshr_hits               354                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency      1235153                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.071547                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses             173                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -119,90 +119,90 @@ system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.dcache.replacements                      0                       # number of replacements
-system.cpu.dcache.sampled_refs                    172                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                    173                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                101.349670                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     1989                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                102.478227                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     1891                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles          17501                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BlockedCycles          17469                       # Number of cycles decode is blocked
 system.cpu.decode.DECODE:BranchMispred             70                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           167                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           29609                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles             29114                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               5540                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles            2527                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:BranchResolved           169                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts           11765                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles             10684                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles               2098                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles             907                       # Number of cycles decode is squashing
 system.cpu.decode.DECODE:SquashedInsts            200                       # Number of squashed instructions handled by decode
-system.cpu.decode.DECODE:UnblockCycles             60                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        5224                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      6367                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                         13308                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   295                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          35526                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                    2057                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.095429                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               7360                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches               3490                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        0.648972                       # Number of inst fetches per cycle
+system.cpu.decode.DECODE:UnblockCycles             61                       # Number of cycles decode is unblocking
+system.cpu.fetch.Branches                        2032                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                      1710                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                          3962                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   268                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                          12603                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                     472                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.065089                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles               1710                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches                919                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        0.403696                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples               54742                      
+system.cpu.fetch.rateDist.samples               31219                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0        47805   8732.78%           
-                               1          199     36.35%           
-                               2          500     91.34%           
-                               3         1426    260.49%           
-                               4         1459    266.52%           
-                               5          244     44.57%           
-                               6          327     59.73%           
-                               7         1225    223.78%           
-                               8         1557    284.43%           
+                               0        28979   9282.49%           
+                               1          197     63.10%           
+                               2          198     63.42%           
+                               3          167     53.49%           
+                               4          197     63.10%           
+                               5          187     59.90%           
+                               6          222     71.11%           
+                               7          122     39.08%           
+                               8          950    304.30%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses               6366                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5085.923937                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4278.032258                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   5919                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        2273408                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.070217                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  447                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits               137                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      1326190                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.048696                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_accesses               1710                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  5139.251163                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4349.151613                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   1280                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        2209878                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.251462                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  430                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits               120                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency      1348237                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.181287                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             310                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_blocked_cycles_no_targets  3443.500000                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  19.093548                       # Average number of references to valid blocks.
+system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.icache.avg_refs                   4.129032                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
-system.cpu.icache.blocked_no_targets                8                       # number of cycles access was blocked
+system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.icache.blocked_cycles_no_targets        27548                       # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                6366                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5085.923937                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4278.032258                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    5919                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         2273408                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.070217                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   447                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                137                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      1326190                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.048696                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_accesses                1710                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  5139.251163                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4349.151613                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                    1280                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency         2209878                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.251462                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   430                       # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits                120                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency      1348237                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.181287                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              310                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               6366                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5085.923937                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4278.032258                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses               1710                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency  5139.251163                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4349.151613                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   5919                       # number of overall hits
-system.cpu.icache.overall_miss_latency        2273408                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.070217                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  447                       # number of overall misses
-system.cpu.icache.overall_mshr_hits               137                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      1326190                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.048696                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_hits                   1280                       # number of overall hits
+system.cpu.icache.overall_miss_latency        2209878                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.251462                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  430                       # number of overall misses
+system.cpu.icache.overall_mshr_hits               120                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency      1348237                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.181287                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             310                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -218,74 +218,74 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    310                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                147.070711                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     5919                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                148.421347                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     1280                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                         1345393                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     2362                       # Number of branches executed
+system.cpu.idleCycles                         1299916                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                     1267                       # Number of branches executed
 system.cpu.iew.EXEC:nop                            48                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.247123                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         5464                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                       2131                       # Number of stores executed
+system.cpu.iew.EXEC:rate                     0.270476                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         2748                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                       1031                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                      6466                       # num instructions consuming a value
-system.cpu.iew.WB:count                         11625                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.798948                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                      5354                       # num instructions consuming a value
+system.cpu.iew.WB:count                          8160                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.757378                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      5166                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.212360                       # insts written-back per cycle
-system.cpu.iew.WB:sent                          11698                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  401                       # Number of branch mispredicts detected at execute
+system.cpu.iew.WB:producers                      4055                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.261379                       # insts written-back per cycle
+system.cpu.iew.WB:sent                           8228                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                  404                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                    7230                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  3770                       # Number of dispatched load instructions
+system.cpu.iew.iewDispLoadInsts                  2144                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                 24                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts              2547                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 3723                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts               19439                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  3333                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               305                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                 13528                       # Number of executed instructions
+system.cpu.iew.iewDispSquashedInsts               179                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                 1221                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts               10469                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                  1717                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               299                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                  8444                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                     10                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     1                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                   2527                       # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles                    907                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                    39                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
-system.cpu.iew.lsq.thread.0.cacheBlocked         1656                       # Number of times an access to memory failed due to the cache being blocked
+system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
 system.cpu.iew.lsq.thread.0.forwLoads              81                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses            3                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           61                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           60                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads         2791                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores         2911                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents             61                       # Number of memory order violations
+system.cpu.iew.lsq.thread.0.squashedLoads         1165                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores          409                       # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents             60                       # Number of memory order violations
 system.cpu.iew.predictedNotTakenIncorrect          279                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect            122                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.004016                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.004016                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                   13833                       # Type of FU issued
+system.cpu.iew.predictedTakenIncorrect            125                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc                               0.004224                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.004224                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0                    8743                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
-                          (null)            2      0.01%            # Type of FU issued
-                          IntAlu         8240     59.57%            # Type of FU issued
+                          (null)            2      0.02%            # Type of FU issued
+                          IntAlu         5868     67.12%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
-                        FloatAdd            2      0.01%            # Type of FU issued
+                        FloatAdd            2      0.02%            # Type of FU issued
                         FloatCmp            0      0.00%            # Type of FU issued
                         FloatCvt            0      0.00%            # Type of FU issued
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         3428     24.78%            # Type of FU issued
-                        MemWrite         2160     15.61%            # Type of FU issued
+                         MemRead         1809     20.69%            # Type of FU issued
+                        MemWrite         1061     12.14%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
 system.cpu.iq.ISSUE:fu_busy_cnt                    87                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.006289                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate             0.009951                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                           (null)            0      0.00%            # attempts to use FU when none available
                           IntAlu            1      1.15%            # attempts to use FU when none available
@@ -303,37 +303,37 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples        54742                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples        31219                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0        47874   8745.39%           
-                               1         3270    597.35%           
-                               2         1302    237.84%           
-                               3         1673    305.62%           
-                               4          327     59.73%           
-                               5          188     34.34%           
-                               6           75     13.70%           
-                               7           22      4.02%           
-                               8           11      2.01%           
+                               0        27042   8662.03%           
+                               1         1845    590.99%           
+                               2         1151    368.69%           
+                               3          572    183.22%           
+                               4          318    101.86%           
+                               5          182     58.30%           
+                               6           76     24.34%           
+                               7           22      7.05%           
+                               8           11      3.52%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.252694                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                      19367                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                     13833                       # Number of instructions issued
+system.cpu.iq.ISSUE:rate                     0.280054                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                      10397                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                      8743                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                  24                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined           13339                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued                73                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedInstsExamined            4378                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued                68                       # Number of squashed instructions issued
 system.cpu.iq.iqSquashedNonSpecRemoved              7                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined         9527                       # Number of squashed operands that are examined and possibly removed from graph
-system.cpu.l2cache.ReadReq_accesses               480                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4520.693750                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2303.372917                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       2169933                       # number of ReadReq miss cycles
+system.cpu.iq.iqSquashedOperandsExamined         2580                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.l2cache.ReadReq_accesses               481                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency  4807.594595                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2390.114345                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency       2312453                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 480                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      1105619                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_misses                 481                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency      1149645                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            480                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses            481                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_refs                         0                       # Average number of references to valid blocks.
@@ -342,32 +342,32 @@ system.cpu.l2cache.blocked_no_targets               0                       # nu
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                480                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4520.693750                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2303.372917                       # average overall mshr miss latency
+system.cpu.l2cache.demand_accesses                481                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency  4807.594595                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2390.114345                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        2169933                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        2312453                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses                  480                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses                  481                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      1105619                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency      1149645                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses             480                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses             481                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               480                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4520.693750                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2303.372917                       # average overall mshr miss latency
+system.cpu.l2cache.overall_accesses               481                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency  4807.594595                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2390.114345                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       2169933                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       2312453                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses                 480                       # number of overall misses
+system.cpu.l2cache.overall_misses                 481                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      1105619                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency      1149645                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses            480                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses            481                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -380,31 +380,31 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   480                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   481                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               248.469469                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               250.999286                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                            54742                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles             7851                       # Number of cycles rename is blocking
+system.cpu.numCycles                            31219                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles             7810                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           4051                       # Number of HB maps that are committed
 system.cpu.rename.RENAME:IQFullEvents               2                       # Number of times rename has blocked due to IQ full
-system.cpu.rename.RENAME:IdleCycles             29263                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents            458                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:ROBFullEvents              8                       # Number of times rename has blocked due to ROB full
-system.cpu.rename.RENAME:RenameLookups          35953                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           29156                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands        20115                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               5451                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles            2527                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles            486                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps             16064                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:IdleCycles             10837                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents            465                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:ROBFullEvents              6                       # Number of times rename has blocked due to ROB full
+system.cpu.rename.RENAME:RenameLookups          14384                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts           11306                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands         8499                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles               2010                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles             907                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles            491                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps              4448                       # Number of HB maps that are undone due to squashing
 system.cpu.rename.RENAME:serializeStallCycles         9164                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts           27                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts                831                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:skidInsts                825                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts           21                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                             369                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.timesIdled                             365                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls              17                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
index 684350ff9..8053728f7 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
@@ -1,3 +1,3 @@
-0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
+0: system.remote_gdb.listener: listening for remote gdb on port 7001
 warn: Entering event queue @ 0.  Starting simulation...
 warn: Increasing stack size by one page.
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
index cbdc4ee25..ef47b0265 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
@@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Mar 24 2007 13:51:02
-M5 started Sat Mar 24 13:51:12 2007
-M5 executing on zizzer.eecs.umich.edu
-command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
+M5 compiled Mar 30 2007 13:12:55
+M5 started Fri Mar 30 13:13:02 2007
+M5 executing on zamp.eecs.umich.edu
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 1400134 because target called exit()
+Exiting @ tick 1331134 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
index ea499f4f1..bd6b9bcdc 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
@@ -115,7 +115,7 @@ split=false
 split_size=0
 store_compressed=false
 subblock_size=0
-tgts_per_mshr=5
+tgts_per_mshr=20
 trace_addr=0
 two_queue=false
 write_buffers=8
@@ -291,7 +291,7 @@ split=false
 split_size=0
 store_compressed=false
 subblock_size=0
-tgts_per_mshr=5
+tgts_per_mshr=20
 trace_addr=0
 two_queue=false
 write_buffers=8
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
index 6672039dd..58df46dcb 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
@@ -251,7 +251,7 @@ assoc=2
 block_size=64
 latency=1
 mshrs=10
-tgts_per_mshr=5
+tgts_per_mshr=20
 write_buffers=8
 prioritizeRequests=false
 protocol=null
@@ -289,7 +289,7 @@ assoc=2
 block_size=64
 latency=1
 mshrs=10
-tgts_per_mshr=5
+tgts_per_mshr=20
 write_buffers=8
 prioritizeRequests=false
 protocol=null
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
index f855ff850..1919ca3fe 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
@@ -1,40 +1,40 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                          200                       # Number of BTB hits
-global.BPredUnit.BTBLookups                       718                       # Number of BTB lookups
-global.BPredUnit.RASInCorrect                      42                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                    218                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                    459                       # Number of conditional branches predicted
-global.BPredUnit.lookups                          898                       # Number of BP lookups
-global.BPredUnit.usedRAS                          171                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  12517                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 155528                       # Number of bytes of host memory used
-host_seconds                                     0.19                       # Real time elapsed on the host
-host_tick_rate                                3937113                       # Simulator tick rate (ticks/s)
-memdepunit.memDep.conflictingLoads                 10                       # Number of conflicting loads.
+global.BPredUnit.BTBHits                          187                       # Number of BTB hits
+global.BPredUnit.BTBLookups                       653                       # Number of BTB lookups
+global.BPredUnit.RASInCorrect                      41                       # Number of incorrect RAS predictions.
+global.BPredUnit.condIncorrect                    217                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted                    426                       # Number of conditional branches predicted
+global.BPredUnit.lookups                          832                       # Number of BP lookups
+global.BPredUnit.usedRAS                          170                       # Number of times the RAS was used to get a target.
+host_inst_rate                                  19984                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 153584                       # Number of bytes of host memory used
+host_seconds                                     0.12                       # Real time elapsed on the host
+host_tick_rate                                6228839                       # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads                  9                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                 8                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                   783                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                  381                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                   701                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                  382                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        2387                       # Number of instructions simulated
 sim_seconds                                  0.000001                       # Number of seconds simulated
-sim_ticks                                      752028                       # Number of ticks simulated
+sim_ticks                                      746028                       # Number of ticks simulated
 system.cpu.commit.COM:branches                    396                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events                51                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events                52                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples        28200                      
+system.cpu.commit.COM:committed_per_cycle.samples        29809                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0        27273   9671.28%           
-                               1          240     85.11%           
-                               2          328    116.31%           
-                               3          127     45.04%           
-                               4           80     28.37%           
-                               5           54     19.15%           
-                               6           28      9.93%           
-                               7           19      6.74%           
-                               8           51     18.09%           
+                               0        28885   9690.03%           
+                               1          239     80.18%           
+                               2          325    109.03%           
+                               3          129     43.28%           
+                               4           78     26.17%           
+                               5           53     17.78%           
+                               6           29      9.73%           
+                               7           19      6.37%           
+                               8           52     17.44%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -43,69 +43,69 @@ system.cpu.commit.COM:loads                       415                       # Nu
 system.cpu.commit.COM:membars                       0                       # Number of memory barriers committed
 system.cpu.commit.COM:refs                        709                       # Number of memory references committed
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               141                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts               140                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts           2576                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls               4                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts            1703                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts            1536                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                        2387                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                  2387                       # Number of Instructions Simulated
-system.cpu.cpi                             315.051529                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                       315.051529                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses                560                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  7232.163043                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency  7288.491803                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                    468                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         665359                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.164286                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                   92                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                31                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       444598                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.108929                       # mshr miss rate for ReadReq accesses
+system.cpu.cpi                             312.537914                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                       312.537914                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses                565                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  7055.843750                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  7158.016393                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                    469                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency         677361                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.169912                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                   96                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits                35                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency       436639                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.107965                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses              61                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses               294                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  6647.685714                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency  6571.666667                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                   224                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency        465338                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.238095                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                  70                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_hits               46                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency       157720                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_avg_miss_latency  7089.086420                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  6946.208333                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits                   213                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency        574216                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.275510                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                  81                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_hits               57                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_miss_latency       166709                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.081633                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             24                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_blocked_cycles_no_targets  2980.375000                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                   8.141176                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs                   8.023529                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
-system.cpu.dcache.blocked_no_targets                8                       # number of cycles access was blocked
+system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_targets        23843                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                 854                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  6979.611111                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  7086.094118                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                     692                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         1130697                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.189696                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   162                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                 77                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency       602318                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.099532                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_accesses                 859                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  7071.056497                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  7098.211765                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                     682                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         1251577                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.206054                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   177                       # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits                 92                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency       603348                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.098952                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses               85                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses                854                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  6979.611111                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  7086.094118                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses                859                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  7071.056497                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  7098.211765                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                    692                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        1130697                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.189696                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  162                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits                77                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency       602318                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.099532                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_hits                    682                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        1251577                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.206054                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  177                       # number of overall misses
+system.cpu.dcache.overall_mshr_hits                92                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency       603348                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.098952                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses              85                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -121,88 +121,88 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                     85                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 46.684988                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                      692                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                 46.650284                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                      682                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles          21870                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BlockedCycles          23701                       # Number of cycles decode is blocked
 system.cpu.decode.DECODE:BranchMispred             79                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           150                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts            4900                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles              5406                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles                923                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles             336                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:BranchResolved           129                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts            4617                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles              5228                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles                877                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles             297                       # Number of cycles decode is squashing
 system.cpu.decode.DECODE:SquashedInsts            286                       # Number of squashed instructions handled by decode
-system.cpu.decode.DECODE:UnblockCycles              2                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                         898                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                       813                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          1769                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   146                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                           5593                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                     258                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.031468                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles                813                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches                371                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        0.195991                       # Number of inst fetches per cycle
+system.cpu.decode.DECODE:UnblockCycles              4                       # Number of cycles decode is unblocking
+system.cpu.fetch.Branches                         832                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                       760                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                          1674                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   131                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                           5310                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                     230                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.027635                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles                760                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches                357                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        0.176371                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples               28537                      
+system.cpu.fetch.rateDist.samples               30107                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0        27581   9665.00%           
-                               1           50     17.52%           
-                               2           84     29.44%           
-                               3           78     27.33%           
-                               4          118     41.35%           
-                               5           67     23.48%           
-                               6           41     14.37%           
-                               7           56     19.62%           
-                               8          462    161.90%           
+                               0        29196   9697.41%           
+                               1           37     12.29%           
+                               2           87     28.90%           
+                               3           73     24.25%           
+                               4          125     41.52%           
+                               5           66     21.92%           
+                               6           42     13.95%           
+                               7           50     16.61%           
+                               8          431    143.16%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses                813                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  4955.454183                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4151.815217                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                    562                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        1243819                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.308733                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  251                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits                67                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency       763934                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.226322                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_accesses                760                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  4979.783333                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4157.255435                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                    520                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        1195148                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.315789                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  240                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits                56                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency       764935                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.242105                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             184                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_blocked_cycles_no_targets         3445                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                   3.054348                       # Average number of references to valid blocks.
+system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.icache.avg_refs                   2.826087                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
-system.cpu.icache.blocked_no_targets                4                       # number of cycles access was blocked
+system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.icache.blocked_cycles_no_targets        13780                       # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                 813                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  4955.454183                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4151.815217                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                     562                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         1243819                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.308733                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   251                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                 67                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency       763934                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.226322                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_accesses                 760                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  4979.783333                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4157.255435                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                     520                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency         1195148                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.315789                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   240                       # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits                 56                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency       764935                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.242105                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              184                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses                813                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  4955.454183                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4151.815217                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses                760                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency  4979.783333                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4157.255435                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                    562                       # number of overall hits
-system.cpu.icache.overall_miss_latency        1243819                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.308733                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  251                       # number of overall misses
-system.cpu.icache.overall_mshr_hits                67                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency       763934                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.226322                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_hits                    520                       # number of overall hits
+system.cpu.icache.overall_miss_latency        1195148                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.315789                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  240                       # number of overall misses
+system.cpu.icache.overall_mshr_hits                56                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency       764935                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.242105                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             184                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -218,59 +218,59 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    184                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                 91.596649                       # Cycle average of tags in use
-system.cpu.icache.total_refs                      562                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                 91.559894                       # Cycle average of tags in use
+system.cpu.icache.total_refs                      520                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                          723492                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                      566                       # Number of branches executed
-system.cpu.iew.EXEC:nop                           267                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.118022                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         1013                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                        341                       # Number of stores executed
+system.cpu.idleCycles                          715922                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                      547                       # Number of branches executed
+system.cpu.iew.EXEC:nop                           269                       # number of nop insts executed
+system.cpu.iew.EXEC:rate                     0.108081                       # Inst execution rate
+system.cpu.iew.EXEC:refs                          940                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                        340                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                      1860                       # num instructions consuming a value
-system.cpu.iew.WB:count                          3219                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.786022                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                      1841                       # num instructions consuming a value
+system.cpu.iew.WB:count                          3178                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.788702                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      1462                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.112801                       # insts written-back per cycle
-system.cpu.iew.WB:sent                           3234                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  152                       # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles                   14743                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                   783                       # Number of dispatched load instructions
+system.cpu.iew.WB:producers                      1452                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.105557                       # insts written-back per cycle
+system.cpu.iew.WB:sent                           3194                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                  151                       # Number of branch mispredicts detected at execute
+system.cpu.iew.iewBlockCycles                   16588                       # Number of cycles IEW is blocking
+system.cpu.iew.iewDispLoadInsts                   701                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                  6                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts                79                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                  381                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts                4280                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                   672                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               123                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                  3368                       # Number of executed instructions
-system.cpu.iew.iewIQFullEvents                      8                       # Number of times the IQ has become full, causing a stall
+system.cpu.iew.iewDispSquashedInsts                62                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                  382                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts                4113                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                   600                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               110                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                  3254                       # Number of executed instructions
+system.cpu.iew.iewIQFullEvents                      9                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                    336                       # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles                    297                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                    12                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
-system.cpu.iew.lsq.thread.0.cacheBlocked           82                       # Number of times an access to memory failed due to the cache being blocked
+system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
 system.cpu.iew.lsq.thread.0.forwLoads              29                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses            0                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.memOrderViolation           15                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            0                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads          368                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores           87                       # Number of stores squashed
+system.cpu.iew.lsq.thread.0.squashedLoads          286                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores           88                       # Number of stores squashed
 system.cpu.iew.memOrderViolationEvents             15                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect           95                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect             57                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.003174                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.003174                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                    3491                       # Type of FU issued
+system.cpu.iew.predictedNotTakenIncorrect           96                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect             55                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc                               0.003200                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.003200                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0                    3364                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                           (null)            0      0.00%            # Type of FU issued
-                          IntAlu         2447     70.09%            # Type of FU issued
+                          IntAlu         2398     71.28%            # Type of FU issued
                          IntMult            1      0.03%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            0      0.00%            # Type of FU issued
@@ -279,13 +279,13 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead          694     19.88%            # Type of FU issued
-                        MemWrite          349     10.00%            # Type of FU issued
+                         MemRead          618     18.37%            # Type of FU issued
+                        MemWrite          347     10.32%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
 system.cpu.iq.ISSUE:fu_busy_cnt                    34                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.009739                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate             0.010107                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                           (null)            0      0.00%            # attempts to use FU when none available
                           IntAlu            1      2.94%            # attempts to use FU when none available
@@ -303,35 +303,35 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples        28537                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples        30107                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0        27014   9466.31%           
-                               1          617    216.21%           
-                               2          351    123.00%           
-                               3          247     86.55%           
-                               4          178     62.38%           
-                               5           82     28.73%           
-                               6           32     11.21%           
-                               7           11      3.85%           
-                               8            5      1.75%           
+                               0        28628   9508.75%           
+                               1          616    204.60%           
+                               2          335    111.27%           
+                               3          225     74.73%           
+                               4          177     58.79%           
+                               5           80     26.57%           
+                               6           31     10.30%           
+                               7           11      3.65%           
+                               8            4      1.33%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.122332                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                       4007                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                      3491                       # Number of instructions issued
+system.cpu.iq.ISSUE:rate                     0.111735                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                       3838                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                      3364                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                   6                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined            1470                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued                25                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedInstsExamined            1301                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued                35                       # Number of squashed instructions issued
 system.cpu.iq.iqSquashedNonSpecRemoved              2                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined          801                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.iq.iqSquashedOperandsExamined          682                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses               269                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4621.754647                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2296.408922                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       1243252                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_avg_miss_latency  4610.717472                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2315.289963                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency       1240283                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses                 269                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency       617734                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency       622813                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_mshr_misses            269                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@@ -343,29 +343,29 @@ system.cpu.l2cache.blocked_cycles_no_mshrs            0                       #
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                269                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4621.754647                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2296.408922                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_miss_latency  4610.717472                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2315.289963                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        1243252                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        1240283                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  269                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency       617734                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency       622813                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses             269                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               269                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4621.754647                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2296.408922                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_miss_latency  4610.717472                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2315.289963                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       1243252                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       1240283                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 269                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency       617734                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency       622813                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses            269                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -382,29 +382,28 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.sampled_refs                   269                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               138.802893                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               138.742329                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                            28537                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles            14784                       # Number of cycles rename is blocking
+system.cpu.numCycles                            30107                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles            16613                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           1768                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IQFullEvents              18                       # Number of times rename has blocked due to IQ full
-system.cpu.rename.RENAME:IdleCycles              5489                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:IQFullEvents              14                       # Number of times rename has blocked due to IQ full
+system.cpu.rename.RENAME:IdleCycles              5311                       # Number of cycles rename is idle
 system.cpu.rename.RENAME:LSQFullEvents              1                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:ROBFullEvents              2                       # Number of times rename has blocked due to ROB full
-system.cpu.rename.RENAME:RenameLookups           5285                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts            4708                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands         3399                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles                847                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles             336                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles             25                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps              1631                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles         7056                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:RenameLookups           5020                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts            4436                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands         3192                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles                802                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles             297                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles             23                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps              1424                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles         7061                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts            8                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts                 88                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:skidInsts                 78                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts            6                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                             211                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.timesIdled                             207                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls               4                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
index 313de3c46..e582c15a8 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
@@ -1,4 +1,4 @@
-0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
+0: system.remote_gdb.listener: listening for remote gdb on port 7001
 warn: Entering event queue @ 0.  Starting simulation...
 warn: Increasing stack size by one page.
 warn: ignoring syscall sigprocmask(1, 18446744073709547831, ...)
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
index 233834343..25e5ec43b 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
@@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Mar 24 2007 13:51:02
-M5 started Sat Mar 24 13:51:14 2007
-M5 executing on zizzer.eecs.umich.edu
-command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
+M5 compiled Mar 30 2007 13:12:55
+M5 started Fri Mar 30 13:13:05 2007
+M5 executing on zamp.eecs.umich.edu
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 752028 because target called exit()
+Exiting @ tick 746028 because target called exit()
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
index e75a10c54..e11ca74dd 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
@@ -115,7 +115,7 @@ split=false
 split_size=0
 store_compressed=false
 subblock_size=0
-tgts_per_mshr=5
+tgts_per_mshr=20
 trace_addr=0
 two_queue=false
 write_buffers=8
@@ -291,7 +291,7 @@ split=false
 split_size=0
 store_compressed=false
 subblock_size=0
-tgts_per_mshr=5
+tgts_per_mshr=20
 trace_addr=0
 two_queue=false
 write_buffers=8
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
index 9489e27c0..0d9c5215b 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
@@ -267,7 +267,7 @@ assoc=2
 block_size=64
 latency=1
 mshrs=10
-tgts_per_mshr=5
+tgts_per_mshr=20
 write_buffers=8
 prioritizeRequests=false
 protocol=null
@@ -305,7 +305,7 @@ assoc=2
 block_size=64
 latency=1
 mshrs=10
-tgts_per_mshr=5
+tgts_per_mshr=20
 write_buffers=8
 prioritizeRequests=false
 protocol=null
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
index 74e8f8d83..684314d31 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
@@ -1,29 +1,29 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                         1320                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      6181                       # Number of BTB lookups
-global.BPredUnit.RASInCorrect                     173                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                   1181                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   4228                       # Number of conditional branches predicted
-global.BPredUnit.lookups                        12535                       # Number of BP lookups
-global.BPredUnit.usedRAS                         6333                       # Number of times the RAS was used to get a target.
-host_inst_rate                                   6990                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 156628                       # Number of bytes of host memory used
-host_seconds                                     1.61                       # Real time elapsed on the host
-host_tick_rate                                1386962                       # Simulator tick rate (ticks/s)
+global.BPredUnit.BTBHits                          827                       # Number of BTB hits
+global.BPredUnit.BTBLookups                      3697                       # Number of BTB lookups
+global.BPredUnit.RASInCorrect                     179                       # Number of incorrect RAS predictions.
+global.BPredUnit.condIncorrect                   1207                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted                   2534                       # Number of conditional branches predicted
+global.BPredUnit.lookups                         4455                       # Number of BP lookups
+global.BPredUnit.usedRAS                          640                       # Number of times the RAS was used to get a target.
+host_inst_rate                                  15344                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 154676                       # Number of bytes of host memory used
+host_seconds                                     0.73                       # Real time elapsed on the host
+host_tick_rate                                2857242                       # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads                 24                       # Number of conflicting loads.
 memdepunit.memDep.conflictingLoads                 26                       # Number of conflicting loads.
-memdepunit.memDep.conflictingLoads                 23                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                 4                       # Number of conflicting stores.
-memdepunit.memDep.conflictingStores                 1                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  3657                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedLoads                  5285                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 1780                       # Number of stores inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 4439                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.conflictingStores                 5                       # Number of conflicting stores.
+memdepunit.memDep.insertedLoads                  2132                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                  2142                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 1150                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 1138                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                       11247                       # Number of instructions simulated
 sim_seconds                                  0.000002                       # Number of seconds simulated
-sim_ticks                                     2232164                       # Number of ticks simulated
+sim_ticks                                     2095164                       # Number of ticks simulated
 system.cpu.commit.COM:branches                   1724                       # Number of branches committed
 system.cpu.commit.COM:branches_0                  862                       # Number of branches committed
 system.cpu.commit.COM:branches_1                  862                       # Number of branches committed
@@ -32,23 +32,23 @@ system.cpu.commit.COM:bw_limited                    0                       # nu
 system.cpu.commit.COM:bw_limited_0                  0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:bw_limited_1                  0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples       189138                      
+system.cpu.commit.COM:committed_per_cycle.samples       165684                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0       183476   9700.64%           
-                               1         3161    167.13%           
-                               2         1212     64.08%           
-                               3          544     28.76%           
-                               4          279     14.75%           
-                               5          155      8.20%           
-                               6          127      6.71%           
-                               7           61      3.23%           
-                               8          123      6.50%           
+                               0       159919   9652.05%           
+                               1         3333    201.17%           
+                               2         1165     70.31%           
+                               3          515     31.08%           
+                               4          270     16.30%           
+                               5          201     12.13%           
+                               6          102      6.16%           
+                               7           56      3.38%           
+                               8          123      7.42%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
 system.cpu.commit.COM:count                     11281                       # Number of instructions committed
-system.cpu.commit.COM:count_0                    5641                       # Number of instructions committed
-system.cpu.commit.COM:count_1                    5640                       # Number of instructions committed
+system.cpu.commit.COM:count_0                    5640                       # Number of instructions committed
+system.cpu.commit.COM:count_1                    5641                       # Number of instructions committed
 system.cpu.commit.COM:loads                      1958                       # Number of loads committed
 system.cpu.commit.COM:loads_0                     979                       # Number of loads committed
 system.cpu.commit.COM:loads_1                     979                       # Number of loads committed
@@ -61,141 +61,141 @@ system.cpu.commit.COM:refs_1                     1791                       # Nu
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
 system.cpu.commit.COM:swp_count_0                   0                       # Number of s/w prefetches committed
 system.cpu.commit.COM:swp_count_1                   0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               938                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts               947                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts          11281                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls              34                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts           29588                       # The number of squashed insts skipped by commit
-system.cpu.committedInsts_0                      5624                       # Number of Instructions Simulated
-system.cpu.committedInsts_1                      5623                       # Number of Instructions Simulated
+system.cpu.commit.commitSquashedInsts            9432                       # The number of squashed insts skipped by commit
+system.cpu.committedInsts_0                      5623                       # Number of Instructions Simulated
+system.cpu.committedInsts_1                      5624                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                 11247                       # Number of Instructions Simulated
-system.cpu.cpi_0                           396.899716                       # CPI: Cycles Per Instruction
-system.cpu.cpi_1                           396.970301                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                       198.467502                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               3176                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_accesses_0             3176                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  9976.257143                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_miss_latency_0  9976.257143                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 10425.356784                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 10425.356784                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   2861                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_hits_0                 2861                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        3142521                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_latency_0      3142521                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.099181                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_miss_rate_0        0.099181                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                  315                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_misses_0                315                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits               116                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_hits_0             116                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency      2074646                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_latency_0      2074646                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.062657                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_miss_rate_0     0.062657                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses             199                       # number of ReadReq MSHR misses
-system.cpu.dcache.ReadReq_mshr_misses_0           199                       # number of ReadReq MSHR misses
+system.cpu.cpi_0                           372.606082                       # CPI: Cycles Per Instruction
+system.cpu.cpi_1                           372.539829                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                       186.286476                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               3234                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_accesses_0             3234                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency 10308.511696                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency_0 10308.511696                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 10789.975000                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 10789.975000                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   2892                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits_0                 2892                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency        3525511                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency_0      3525511                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.105751                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_miss_rate_0        0.105751                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                  342                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_misses_0                342                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits               142                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_hits_0             142                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency      2157995                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency_0      2157995                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.061843                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_rate_0     0.061843                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses             200                       # number of ReadReq MSHR misses
+system.cpu.dcache.ReadReq_mshr_misses_0           200                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses              1624                       # number of WriteReq accesses(hits+misses)
 system.cpu.dcache.WriteReq_accesses_0            1624                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  6512.846154                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_miss_latency_0  6512.846154                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency  7776.006849                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0  7776.006849                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                  1117                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_hits_0                1117                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       3302013                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_latency_0      3302013                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.312192                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_miss_rate_0       0.312192                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                 507                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_misses_0               507                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_hits              361                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_hits_0            361                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency      1135297                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_latency_0      1135297                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_avg_miss_latency  8945.050491                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency_0  8945.050491                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  9931.897260                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0  9931.897260                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits                   911                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_hits_0                 911                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency       6377821                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency_0      6377821                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.439039                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_miss_rate_0       0.439039                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                 713                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_misses_0               713                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_hits              567                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_hits_0            567                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_miss_latency      1450057                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency_0      1450057                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.089901                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_miss_rate_0     0.089901                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses            146                       # number of WriteReq MSHR misses
 system.cpu.dcache.WriteReq_mshr_misses_0          146                       # number of WriteReq MSHR misses
-system.cpu.dcache.avg_blocked_cycles_no_mshrs         3973                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_blocked_cycles_no_targets  3613.488095                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  11.563953                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_blocked_cycles_no_mshrs          994                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs                  10.991329                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  1                       # number of cycles access was blocked
-system.cpu.dcache.blocked_no_targets               84                       # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_mshrs         3973                       # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_targets       303533                       # number of cycles access was blocked
+system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_mshrs          994                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                4800                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_accesses_0              4800                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses                4858                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses_0              4858                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_accesses_1                 0                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  7840.065693                       # average overall miss latency
-system.cpu.dcache.demand_avg_miss_latency_0  7840.065693                       # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency  9387.044550                       # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency_0  9387.044550                       # average overall miss latency
 system.cpu.dcache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  9304.182609                       # average overall mshr miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency_0  9304.182609                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 10427.895954                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency_0 10427.895954                       # average overall mshr miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    3978                       # number of demand (read+write) hits
-system.cpu.dcache.demand_hits_0                  3978                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits                    3803                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits_0                  3803                       # number of demand (read+write) hits
 system.cpu.dcache.demand_hits_1                     0                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         6444534                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_latency_0       6444534                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency         9903332                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency_0       9903332                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_latency_1             0                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.171250                       # miss rate for demand accesses
-system.cpu.dcache.demand_miss_rate_0         0.171250                       # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate           0.217168                       # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate_0         0.217168                       # miss rate for demand accesses
 system.cpu.dcache.demand_miss_rate_1     <err: div-0>                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   822                       # number of demand (read+write) misses
-system.cpu.dcache.demand_misses_0                 822                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses                  1055                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses_0                1055                       # number of demand (read+write) misses
 system.cpu.dcache.demand_misses_1                   0                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                477                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_hits_0              477                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits                709                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits_0              709                       # number of demand (read+write) MSHR hits
 system.cpu.dcache.demand_mshr_hits_1                0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      3209943                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_latency_0      3209943                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency      3608052                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency_0      3608052                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.071875                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_miss_rate_0     0.071875                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate      0.071223                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate_0     0.071223                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              345                       # number of demand (read+write) MSHR misses
-system.cpu.dcache.demand_mshr_misses_0            345                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_misses              346                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_misses_0            346                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.demand_mshr_misses_1              0                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.mshr_cap_events_0                 0                       # number of times MSHR cap was activated
 system.cpu.dcache.mshr_cap_events_1                 0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               4800                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_accesses_0             4800                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses               4858                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses_0             4858                       # number of overall (read+write) accesses
 system.cpu.dcache.overall_accesses_1                0                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  7840.065693                       # average overall miss latency
-system.cpu.dcache.overall_avg_miss_latency_0  7840.065693                       # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency  9387.044550                       # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency_0  9387.044550                       # average overall miss latency
 system.cpu.dcache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  9304.182609                       # average overall mshr miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency_0  9304.182609                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 10427.895954                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency_0 10427.895954                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   3978                       # number of overall hits
-system.cpu.dcache.overall_hits_0                 3978                       # number of overall hits
+system.cpu.dcache.overall_hits                   3803                       # number of overall hits
+system.cpu.dcache.overall_hits_0                 3803                       # number of overall hits
 system.cpu.dcache.overall_hits_1                    0                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        6444534                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_latency_0      6444534                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency        9903332                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency_0      9903332                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_latency_1            0                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.171250                       # miss rate for overall accesses
-system.cpu.dcache.overall_miss_rate_0        0.171250                       # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate          0.217168                       # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate_0        0.217168                       # miss rate for overall accesses
 system.cpu.dcache.overall_miss_rate_1    <err: div-0>                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  822                       # number of overall misses
-system.cpu.dcache.overall_misses_0                822                       # number of overall misses
+system.cpu.dcache.overall_misses                 1055                       # number of overall misses
+system.cpu.dcache.overall_misses_0               1055                       # number of overall misses
 system.cpu.dcache.overall_misses_1                  0                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits               477                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_hits_0             477                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits               709                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits_0             709                       # number of overall MSHR hits
 system.cpu.dcache.overall_mshr_hits_1               0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      3209943                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_latency_0      3209943                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency      3608052                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency_0      3608052                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.071875                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_miss_rate_0     0.071875                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate     0.071223                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate_0     0.071223                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             345                       # number of overall MSHR misses
-system.cpu.dcache.overall_mshr_misses_0           345                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_misses             346                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_misses_0           346                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_misses_1             0                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@@ -215,153 +215,153 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.replacements_0                    0                       # number of replacements
 system.cpu.dcache.replacements_1                    0                       # number of replacements
-system.cpu.dcache.sampled_refs                    344                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                    346                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                198.340517                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     3978                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                200.098842                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     3803                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
 system.cpu.dcache.writebacks_0                      0                       # number of writebacks
 system.cpu.dcache.writebacks_1                      0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles          95932                       # Number of cycles decode is blocked
-system.cpu.decode.DECODE:BranchMispred            257                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           378                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           68233                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles            264032                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles              12255                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles            5733                       # Number of cycles decode is squashing
-system.cpu.decode.DECODE:SquashedInsts            618                       # Number of squashed instructions handled by decode
-system.cpu.decode.DECODE:UnblockCycles            167                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                       12535                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                     13184                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                         28123                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   886                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          80687                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                    4911                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.066271                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles              53960                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches               7653                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        0.426584                       # Number of inst fetches per cycle
+system.cpu.decode.DECODE:BlockedCycles         112235                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BranchMispred            273                       # Number of times decode detected a branch misprediction
+system.cpu.decode.DECODE:BranchResolved           396                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts           24032                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles            212833                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles               4096                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles            1856                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:SquashedInsts            672                       # Number of squashed instructions handled by decode
+system.cpu.decode.DECODE:UnblockCycles            181                       # Number of cycles decode is unblocking
+system.cpu.fetch.Branches                        4455                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                      3542                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                          8000                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   608                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                          26459                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                    1268                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.026888                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles               3542                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches               1467                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        0.159692                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples              189147                      
+system.cpu.fetch.rateDist.samples              165688                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0       174193   9209.40%           
-                               1          369     19.51%           
-                               2          281     14.86%           
-                               3         3638    192.34%           
-                               4         2283    120.70%           
-                               5         1005     53.13%           
-                               6          984     52.02%           
-                               7         2371    125.35%           
-                               8         4023    212.69%           
+                               0       161234   9731.18%           
+                               1          342     20.64%           
+                               2          283     17.08%           
+                               3          285     17.20%           
+                               4          390     23.54%           
+                               5          369     22.27%           
+                               6          367     22.15%           
+                               7          255     15.39%           
+                               8         2163    130.55%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses              13182                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_accesses_0            13182                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  7732.322368                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_miss_latency_0  7732.322368                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  7128.205742                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency_0  7128.205742                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                  12270                       # number of ReadReq hits
-system.cpu.icache.ReadReq_hits_0                12270                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        7051878                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_latency_0      7051878                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.069185                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_miss_rate_0        0.069185                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  912                       # number of ReadReq misses
-system.cpu.icache.ReadReq_misses_0                912                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits               285                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_hits_0             285                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      4469385                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_latency_0      4469385                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.047565                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_miss_rate_0     0.047565                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             627                       # number of ReadReq MSHR misses
-system.cpu.icache.ReadReq_mshr_misses_0           627                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_accesses               3542                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_accesses_0             3542                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  7880.839306                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_miss_latency_0  7880.839306                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  7272.060897                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency_0  7272.060897                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   2677                       # number of ReadReq hits
+system.cpu.icache.ReadReq_hits_0                 2677                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        6816926                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency_0      6816926                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.244212                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_miss_rate_0        0.244212                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  865                       # number of ReadReq misses
+system.cpu.icache.ReadReq_misses_0                865                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits               241                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_hits_0             241                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency      4537766                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency_0      4537766                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.176172                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_rate_0     0.176172                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses             624                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_misses_0           624                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_blocked_cycles_no_targets  5603.944444                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  19.569378                       # Average number of references to valid blocks.
+system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.icache.avg_refs                   4.290064                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
-system.cpu.icache.blocked_no_targets               18                       # number of cycles access was blocked
+system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.icache.blocked_cycles_no_targets       100871                       # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses               13182                       # number of demand (read+write) accesses
-system.cpu.icache.demand_accesses_0             13182                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses                3542                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses_0              3542                       # number of demand (read+write) accesses
 system.cpu.icache.demand_accesses_1                 0                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  7732.322368                       # average overall miss latency
-system.cpu.icache.demand_avg_miss_latency_0  7732.322368                       # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency  7880.839306                       # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency_0  7880.839306                       # average overall miss latency
 system.cpu.icache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  7128.205742                       # average overall mshr miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency_0  7128.205742                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  7272.060897                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency_0  7272.060897                       # average overall mshr miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                   12270                       # number of demand (read+write) hits
-system.cpu.icache.demand_hits_0                 12270                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits                    2677                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits_0                  2677                       # number of demand (read+write) hits
 system.cpu.icache.demand_hits_1                     0                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         7051878                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_latency_0       7051878                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         6816926                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency_0       6816926                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_latency_1             0                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.069185                       # miss rate for demand accesses
-system.cpu.icache.demand_miss_rate_0         0.069185                       # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate           0.244212                       # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate_0         0.244212                       # miss rate for demand accesses
 system.cpu.icache.demand_miss_rate_1     <err: div-0>                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   912                       # number of demand (read+write) misses
-system.cpu.icache.demand_misses_0                 912                       # number of demand (read+write) misses
+system.cpu.icache.demand_misses                   865                       # number of demand (read+write) misses
+system.cpu.icache.demand_misses_0                 865                       # number of demand (read+write) misses
 system.cpu.icache.demand_misses_1                   0                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                285                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_hits_0              285                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits                241                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits_0              241                       # number of demand (read+write) MSHR hits
 system.cpu.icache.demand_mshr_hits_1                0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      4469385                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_latency_0      4469385                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency      4537766                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency_0      4537766                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.047565                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_miss_rate_0     0.047565                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate      0.176172                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate_0     0.176172                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              627                       # number of demand (read+write) MSHR misses
-system.cpu.icache.demand_mshr_misses_0            627                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses              624                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses_0            624                       # number of demand (read+write) MSHR misses
 system.cpu.icache.demand_mshr_misses_1              0                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.mshr_cap_events_0                 0                       # number of times MSHR cap was activated
 system.cpu.icache.mshr_cap_events_1                 0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses              13182                       # number of overall (read+write) accesses
-system.cpu.icache.overall_accesses_0            13182                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses               3542                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses_0             3542                       # number of overall (read+write) accesses
 system.cpu.icache.overall_accesses_1                0                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  7732.322368                       # average overall miss latency
-system.cpu.icache.overall_avg_miss_latency_0  7732.322368                       # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency  7880.839306                       # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency_0  7880.839306                       # average overall miss latency
 system.cpu.icache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  7128.205742                       # average overall mshr miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency_0  7128.205742                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  7272.060897                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency_0  7272.060897                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                  12270                       # number of overall hits
-system.cpu.icache.overall_hits_0                12270                       # number of overall hits
+system.cpu.icache.overall_hits                   2677                       # number of overall hits
+system.cpu.icache.overall_hits_0                 2677                       # number of overall hits
 system.cpu.icache.overall_hits_1                    0                       # number of overall hits
-system.cpu.icache.overall_miss_latency        7051878                       # number of overall miss cycles
-system.cpu.icache.overall_miss_latency_0      7051878                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        6816926                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency_0      6816926                       # number of overall miss cycles
 system.cpu.icache.overall_miss_latency_1            0                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.069185                       # miss rate for overall accesses
-system.cpu.icache.overall_miss_rate_0        0.069185                       # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate          0.244212                       # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate_0        0.244212                       # miss rate for overall accesses
 system.cpu.icache.overall_miss_rate_1    <err: div-0>                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  912                       # number of overall misses
-system.cpu.icache.overall_misses_0                912                       # number of overall misses
+system.cpu.icache.overall_misses                  865                       # number of overall misses
+system.cpu.icache.overall_misses_0                865                       # number of overall misses
 system.cpu.icache.overall_misses_1                  0                       # number of overall misses
-system.cpu.icache.overall_mshr_hits               285                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_hits_0             285                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits               241                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits_0             241                       # number of overall MSHR hits
 system.cpu.icache.overall_mshr_hits_1               0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      4469385                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_latency_0      4469385                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency      4537766                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency_0      4537766                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.047565                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_miss_rate_0     0.047565                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate     0.176172                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate_0     0.176172                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             627                       # number of overall MSHR misses
-system.cpu.icache.overall_mshr_misses_0           627                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses             624                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses_0           624                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_misses_1             0                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@@ -381,104 +381,104 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      6                       # number of replacements
 system.cpu.icache.replacements_0                    6                       # number of replacements
 system.cpu.icache.replacements_1                    0                       # number of replacements
-system.cpu.icache.sampled_refs                    627                       # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs                    624                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                288.361956                       # Cycle average of tags in use
-system.cpu.icache.total_refs                    12270                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                289.929418                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     2677                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.icache.writebacks_0                      0                       # number of writebacks
 system.cpu.icache.writebacks_1                      0                       # number of writebacks
-system.cpu.idleCycles                         2043018                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     4024                       # Number of branches executed
-system.cpu.iew.EXEC:branches_0                   1569                       # Number of branches executed
-system.cpu.iew.EXEC:branches_1                   2455                       # Number of branches executed
+system.cpu.idleCycles                         1929477                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                     2535                       # Number of branches executed
+system.cpu.iew.EXEC:branches_0                   1269                       # Number of branches executed
+system.cpu.iew.EXEC:branches_1                   1266                       # Number of branches executed
 system.cpu.iew.EXEC:nop                            84                       # number of nop insts executed
 system.cpu.iew.EXEC:nop_0                          42                       # number of nop insts executed
 system.cpu.iew.EXEC:nop_1                          42                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.144523                       # Inst execution rate
-system.cpu.iew.EXEC:refs                        11361                       # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_0                       4575                       # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_1                       6786                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                       3833                       # Number of stores executed
-system.cpu.iew.EXEC:stores_0                     1337                       # Number of stores executed
-system.cpu.iew.EXEC:stores_1                     2496                       # Number of stores executed
+system.cpu.iew.EXEC:rate                     0.100864                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         5422                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_0                       2727                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_1                       2695                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                       1997                       # Number of stores executed
+system.cpu.iew.EXEC:stores_0                     1003                       # Number of stores executed
+system.cpu.iew.EXEC:stores_1                      994                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
 system.cpu.iew.EXEC:swp_0                           0                       # number of swp insts executed
 system.cpu.iew.EXEC:swp_1                           0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                     12385                       # num instructions consuming a value
-system.cpu.iew.WB:consumers_0                    5750                       # num instructions consuming a value
-system.cpu.iew.WB:consumers_1                    6635                       # num instructions consuming a value
-system.cpu.iew.WB:count                         22604                       # cumulative count of insts written-back
-system.cpu.iew.WB:count_0                       10240                       # cumulative count of insts written-back
-system.cpu.iew.WB:count_1                       12364                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.811385                       # average fanout of values written-back
-system.cpu.iew.WB:fanout_0                   0.800522                       # average fanout of values written-back
-system.cpu.iew.WB:fanout_1                   0.820799                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                     10258                       # num instructions consuming a value
+system.cpu.iew.WB:consumers_0                    5162                       # num instructions consuming a value
+system.cpu.iew.WB:consumers_1                    5096                       # num instructions consuming a value
+system.cpu.iew.WB:count                         16101                       # cumulative count of insts written-back
+system.cpu.iew.WB:count_0                        8089                       # cumulative count of insts written-back
+system.cpu.iew.WB:count_1                        8012                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.770326                       # average fanout of values written-back
+system.cpu.iew.WB:fanout_0                   0.768888                       # average fanout of values written-back
+system.cpu.iew.WB:fanout_1                   0.771782                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_0                       0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_1                       0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:penalized_rate_0                  0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:penalized_rate_1                  0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                     10049                       # num instructions producing a value
-system.cpu.iew.WB:producers_0                    4603                       # num instructions producing a value
-system.cpu.iew.WB:producers_1                    5446                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.119505                       # insts written-back per cycle
-system.cpu.iew.WB:rate_0                     0.054138                       # insts written-back per cycle
-system.cpu.iew.WB:rate_1                     0.065367                       # insts written-back per cycle
-system.cpu.iew.WB:sent                          22763                       # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_0                        10322                       # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_1                        12441                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                 1027                       # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles                   60103                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  8942                       # Number of dispatched load instructions
+system.cpu.iew.WB:producers                      7902                       # num instructions producing a value
+system.cpu.iew.WB:producers_0                    3969                       # num instructions producing a value
+system.cpu.iew.WB:producers_1                    3933                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.097177                       # insts written-back per cycle
+system.cpu.iew.WB:rate_0                     0.048821                       # insts written-back per cycle
+system.cpu.iew.WB:rate_1                     0.048356                       # insts written-back per cycle
+system.cpu.iew.WB:sent                          16249                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_0                         8166                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_1                         8083                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                 1031                       # Number of branch mispredicts detected at execute
+system.cpu.iew.iewBlockCycles                   84087                       # Number of cycles IEW is blocking
+system.cpu.iew.iewDispLoadInsts                  4274                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                 41                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts              5344                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 6219                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts               40858                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  7528                       # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_0                3238                       # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_1                4290                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               872                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                 27336                       # Number of executed instructions
-system.cpu.iew.iewIQFullEvents                     45                       # Number of times the IQ has become full, causing a stall
+system.cpu.iew.iewDispSquashedInsts               468                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                 2288                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts               20693                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                  3425                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_0                1724                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_1                1701                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               741                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                 16712                       # Number of executed instructions
+system.cpu.iew.iewIQFullEvents                     57                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     4                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                   5733                       # Number of cycles IEW is squashing
-system.cpu.iew.iewUnblockCycles                   122                       # Number of cycles IEW is unblocking
+system.cpu.iew.iewSquashCycles                   1856                       # Number of cycles IEW is squashing
+system.cpu.iew.iewUnblockCycles                   131                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
-system.cpu.iew.lsq.thread.0.cacheBlocked         1584                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.0.forwLoads              65                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
+system.cpu.iew.lsq.thread.0.forwLoads              70                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses           10                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           56                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           60                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads         2678                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores          968                       # Number of stores squashed
+system.cpu.iew.lsq.thread.0.squashedLoads         1153                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores          338                       # Number of stores squashed
 system.cpu.iew.lsq.thread.1.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
-system.cpu.iew.lsq.thread.1.cacheBlocked         2643                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.1.forwLoads              67                       # Number of loads that had data forwarded from stores
-system.cpu.iew.lsq.thread.1.ignoredResponses            7                       # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.lsq.thread.1.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
+system.cpu.iew.lsq.thread.1.forwLoads              65                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.1.ignoredResponses           12                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.1.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.1.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.1.memOrderViolation           54                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.1.memOrderViolation           59                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.1.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.1.squashedLoads         4306                       # Number of loads squashed
-system.cpu.iew.lsq.thread.1.squashedStores         3627                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents            110                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          796                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect            231                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc_0                             0.002520                       # IPC: Instructions Per Cycle
-system.cpu.ipc_1                             0.002519                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.005039                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                   12578                       # Type of FU issued
+system.cpu.iew.lsq.thread.1.squashedLoads         1163                       # Number of loads squashed
+system.cpu.iew.lsq.thread.1.squashedStores          326                       # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents            119                       # Number of memory order violations
+system.cpu.iew.predictedNotTakenIncorrect          791                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect            240                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc_0                             0.002684                       # IPC: Instructions Per Cycle
+system.cpu.ipc_1                             0.002684                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.005368                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0                    8768                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                           (null)            2      0.02%            # Type of FU issued
-                          IntAlu         7865     62.53%            # Type of FU issued
+                          IntAlu         5895     67.23%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            2      0.02%            # Type of FU issued
@@ -487,54 +487,54 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         3344     26.59%            # Type of FU issued
-                        MemWrite         1364     10.84%            # Type of FU issued
+                         MemRead         1838     20.96%            # Type of FU issued
+                        MemWrite         1030     11.75%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:FU_type_1                   15630                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_1                    8685                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_1.start_dist
-                          (null)            2      0.01%            # Type of FU issued
-                          IntAlu         8707     55.71%            # Type of FU issued
+                          (null)            2      0.02%            # Type of FU issued
+                          IntAlu         5859     67.46%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
-                        FloatAdd            2      0.01%            # Type of FU issued
+                        FloatAdd            2      0.02%            # Type of FU issued
                         FloatCmp            0      0.00%            # Type of FU issued
                         FloatCvt            0      0.00%            # Type of FU issued
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         4394     28.11%            # Type of FU issued
-                        MemWrite         2524     16.15%            # Type of FU issued
+                         MemRead         1800     20.73%            # Type of FU issued
+                        MemWrite         1021     11.76%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_1.end_dist
-system.cpu.iq.ISSUE:FU_type                     28208                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type                     17453                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type.start_dist
-                          (null)            4      0.01%            # Type of FU issued
-                          IntAlu        16572     58.75%            # Type of FU issued
+                          (null)            4      0.02%            # Type of FU issued
+                          IntAlu        11754     67.35%            # Type of FU issued
                          IntMult            2      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
-                        FloatAdd            4      0.01%            # Type of FU issued
+                        FloatAdd            4      0.02%            # Type of FU issued
                         FloatCmp            0      0.00%            # Type of FU issued
                         FloatCvt            0      0.00%            # Type of FU issued
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         7738     27.43%            # Type of FU issued
-                        MemWrite         3888     13.78%            # Type of FU issued
+                         MemRead         3638     20.84%            # Type of FU issued
+                        MemWrite         2051     11.75%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                   149                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_0                  72                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_1                  77                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.005282                       # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_0           0.002552                       # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_1           0.002730                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt                   133                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_0                  69                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_1                  64                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate             0.007620                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_0           0.003953                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_1           0.003667                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                           (null)            0      0.00%            # attempts to use FU when none available
-                          IntAlu            1      0.67%            # attempts to use FU when none available
+                          IntAlu            0      0.00%            # attempts to use FU when none available
                          IntMult            0      0.00%            # attempts to use FU when none available
                           IntDiv            0      0.00%            # attempts to use FU when none available
                         FloatAdd            0      0.00%            # attempts to use FU when none available
@@ -543,52 +543,52 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                        FloatMult            0      0.00%            # attempts to use FU when none available
                         FloatDiv            0      0.00%            # attempts to use FU when none available
                        FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           83     55.70%            # attempts to use FU when none available
-                        MemWrite           65     43.62%            # attempts to use FU when none available
+                         MemRead           79     59.40%            # attempts to use FU when none available
+                        MemWrite           54     40.60%            # attempts to use FU when none available
                        IprAccess            0      0.00%            # attempts to use FU when none available
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples       189147                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples       165688                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0       174626   9232.29%           
-                               1         7072    373.89%           
-                               2         3403    179.91%           
-                               3         2709    143.22%           
-                               4          713     37.70%           
-                               5          443     23.42%           
-                               6          143      7.56%           
-                               7           26      1.37%           
-                               8           12      0.63%           
+                               0       156701   9457.59%           
+                               1         4387    264.77%           
+                               2         2473    149.26%           
+                               3         1076     64.94%           
+                               4          569     34.34%           
+                               5          325     19.62%           
+                               6          120      7.24%           
+                               7           25      1.51%           
+                               8           12      0.72%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.149133                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                      40733                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                     28208                       # Number of instructions issued
+system.cpu.iq.ISSUE:rate                     0.105337                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                      20568                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                     17453                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                  41                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined           28495                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued               192                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedInstsExamined            8303                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued               214                       # Number of squashed instructions issued
 system.cpu.iq.iqSquashedNonSpecRemoved              7                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined        21369                       # Number of squashed operands that are examined and possibly removed from graph
-system.cpu.l2cache.ReadReq_accesses               970                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_accesses_0             970                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  6748.795876                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_miss_latency_0  6748.795876                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  3604.818557                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0  3604.818557                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       6546332                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_latency_0      6546332                       # number of ReadReq miss cycles
+system.cpu.iq.iqSquashedOperandsExamined         4870                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.l2cache.ReadReq_accesses               968                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_accesses_0             968                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency  7151.675620                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency_0  7151.675620                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  3855.918388                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0  3855.918388                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency       6922822                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency_0      6922822                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_miss_rate_0              1                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 970                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_misses_0               970                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      3496674                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_latency_0      3496674                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_misses                 968                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_misses_0               968                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency      3732529                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency_0      3732529                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_mshr_miss_rate_0            1                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            970                       # number of ReadReq MSHR misses
-system.cpu.l2cache.ReadReq_mshr_misses_0          970                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses            968                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses_0          968                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_refs                         0                       # Average number of references to valid blocks.
@@ -597,52 +597,52 @@ system.cpu.l2cache.blocked_no_targets               0                       # nu
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                970                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_accesses_0              970                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses                968                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses_0              968                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_accesses_1                0                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  6748.795876                       # average overall miss latency
-system.cpu.l2cache.demand_avg_miss_latency_0  6748.795876                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency  7151.675620                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency_0  7151.675620                       # average overall miss latency
 system.cpu.l2cache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  3604.818557                       # average overall mshr miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency_0  3604.818557                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  3855.918388                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency_0  3855.918388                       # average overall mshr miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_hits_0                    0                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_hits_1                    0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        6546332                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_latency_0      6546332                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        6922822                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency_0      6922822                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_latency_1            0                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_miss_rate_0               1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_miss_rate_1    <err: div-0>                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses                  970                       # number of demand (read+write) misses
-system.cpu.l2cache.demand_misses_0                970                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses                  968                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses_0                968                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_misses_1                  0                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_hits_0               0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_hits_1               0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      3496674                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_latency_0      3496674                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency      3732529                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency_0      3732529                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_miss_rate_0            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses             970                       # number of demand (read+write) MSHR misses
-system.cpu.l2cache.demand_mshr_misses_0           970                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses             968                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses_0           968                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.demand_mshr_misses_1             0                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.mshr_cap_events_0                0                       # number of times MSHR cap was activated
 system.cpu.l2cache.mshr_cap_events_1                0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               970                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_accesses_0             970                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses               968                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses_0             968                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_accesses_1               0                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  6748.795876                       # average overall miss latency
-system.cpu.l2cache.overall_avg_miss_latency_0  6748.795876                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency  7151.675620                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency_0  7151.675620                       # average overall miss latency
 system.cpu.l2cache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  3604.818557                       # average overall mshr miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency_0  3604.818557                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  3855.918388                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency_0  3855.918388                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
@@ -650,26 +650,26 @@ system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
 system.cpu.l2cache.overall_hits_0                   0                       # number of overall hits
 system.cpu.l2cache.overall_hits_1                   0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       6546332                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_latency_0      6546332                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       6922822                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency_0      6922822                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_latency_1            0                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_miss_rate_0              1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_miss_rate_1   <err: div-0>                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses                 970                       # number of overall misses
-system.cpu.l2cache.overall_misses_0               970                       # number of overall misses
+system.cpu.l2cache.overall_misses                 968                       # number of overall misses
+system.cpu.l2cache.overall_misses_0               968                       # number of overall misses
 system.cpu.l2cache.overall_misses_1                 0                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_hits_0              0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_hits_1              0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      3496674                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_latency_0      3496674                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency      3732529                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency_0      3732529                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_miss_rate_0            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses            970                       # number of overall MSHR misses
-system.cpu.l2cache.overall_mshr_misses_0          970                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses            968                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses_0          968                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_misses_1            0                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@@ -689,35 +689,35 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.replacements_0                   0                       # number of replacements
 system.cpu.l2cache.replacements_1                   0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   969                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   968                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.l2cache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.l2cache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               487.752870                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               491.189820                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.l2cache.writebacks_0                     0                       # number of writebacks
 system.cpu.l2cache.writebacks_1                     0                       # number of writebacks
-system.cpu.numCycles                           189147                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles            73147                       # Number of cycles rename is blocking
+system.cpu.numCycles                           165688                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles            87802                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           8102                       # Number of HB maps that are committed
 system.cpu.rename.RENAME:IQFullEvents              24                       # Number of times rename has blocked due to IQ full
-system.cpu.rename.RENAME:IdleCycles            265134                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents           2520                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:ROBFullEvents             31                       # Number of times rename has blocked due to ROB full
-system.cpu.rename.RENAME:RenameLookups          74254                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           61970                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands        45003                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles              11202                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles            5733                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles           2584                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps             36901                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles        20319                       # count of cycles rename stalled for serializing inst
-system.cpu.rename.RENAME:serializingInsts           49                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts               5114                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:IdleCycles            213369                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents           2127                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:ROBFullEvents             18                       # Number of times rename has blocked due to ROB full
+system.cpu.rename.RENAME:RenameLookups          28570                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts           22635                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands        17117                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles               3694                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles            1856                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles           2143                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps              9015                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles        22337                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializingInsts           51                       # count of serializing insts renamed
+system.cpu.rename.RENAME:skidInsts               4330                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts           37                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                             691                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.timesIdled                             688                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload0.PROG:num_syscalls             17                       # Number of system calls
 system.cpu.workload1.PROG:num_syscalls             17                       # Number of system calls
 
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
index d8ccd6207..54505c240 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
@@ -1,5 +1,5 @@
-0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
-0: system.remote_gdb.listener: listening for remote gdb #1 on port 7001
+0: system.remote_gdb.listener: listening for remote gdb on port 7001
+0: system.remote_gdb.listener: listening for remote gdb on port 7002
 warn: Entering event queue @ 0.  Starting simulation...
 warn: Increasing stack size by one page.
 warn: Increasing stack size by one page.
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
index 30a45522d..b4ae56cae 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
@@ -7,9 +7,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Mar 24 2007 13:51:02
-M5 started Sat Mar 24 13:51:16 2007
-M5 executing on zizzer.eecs.umich.edu
-command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
+M5 compiled Mar 30 2007 13:12:55
+M5 started Fri Mar 30 13:13:07 2007
+M5 executing on zamp.eecs.umich.edu
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 2232164 because target called exit()
+Exiting @ tick 2095164 because target called exit()
diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.ini b/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.ini
index ccb504cd3..06059c3eb 100644
--- a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.ini
+++ b/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.ini
@@ -1,11 +1,7 @@
 [root]
 type=Root
 children=system
-checkpoint=
-clock=1000000000000
-max_tick=0
-output_file=cout
-progress_interval=0
+dummy=0
 
 [system]
 type=System
diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out b/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out
index 392fec336..7f9a83d25 100644
--- a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out
+++ b/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out
@@ -1,9 +1,6 @@
 [root]
 type=Root
-clock=1000000000000
-max_tick=0
-progress_interval=0
-output_file=cout
+dummy=0
 
 [system.physmem]
 type=PhysicalMemory
diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/m5stats.txt b/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/m5stats.txt
index 4fe3d3732..1ed7d50eb 100644
--- a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/m5stats.txt
+++ b/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/m5stats.txt
@@ -1,18 +1,18 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 104057                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 179368                       # Number of bytes of host memory used
-host_seconds                                     0.10                       # Real time elapsed on the host
-host_tick_rate                                 103746                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  65718                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 179556                       # Number of bytes of host memory used
+host_seconds                                     0.17                       # Real time elapsed on the host
+host_tick_rate                                  65601                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                       10367                       # Number of instructions simulated
+sim_insts                                       11001                       # Number of instructions simulated
 sim_seconds                                  0.000000                       # Number of seconds simulated
-sim_ticks                                       10366                       # Number of ticks simulated
+sim_ticks                                       11000                       # Number of ticks simulated
 system.cpu.idle_fraction                            0                       # Percentage of idle cycles
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                            10367                       # number of cpu cycles simulated
-system.cpu.num_insts                            10367                       # Number of instructions executed
-system.cpu.num_refs                              2607                       # Number of memory references
+system.cpu.numCycles                            11001                       # number of cpu cycles simulated
+system.cpu.num_insts                            11001                       # Number of instructions executed
+system.cpu.num_refs                              2760                       # Number of memory references
 system.cpu.workload.PROG:num_syscalls               8                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/stdout b/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/stdout
index 567033922..c89235e64 100644
--- a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/stdout
+++ b/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/stdout
@@ -7,6 +7,7 @@ CASX FAIL:	Passed
 CASX WORK:	Passed
 LDTX:		Passed
 LDTW:		Passed
+STTW:		Passed
 Done
 M5 Simulator System
 
@@ -15,8 +16,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Mar  6 2007 15:43:35
-M5 started Tue Mar  6 15:52:39 2007
+M5 compiled Mar 29 2007 15:29:35
+M5 started Thu Mar 29 15:39:35 2007
 M5 executing on zeep
 command line: build/SPARC_SE/m5.debug -d build/SPARC_SE/tests/debug/quick/02.insttest/sparc/linux/simple-atomic tests/run.py quick/02.insttest/sparc/linux/simple-atomic
-Exiting @ tick 10366 because target called exit()
+Global frequency set at 1000000000000 ticks per second
+Exiting @ tick 11000 because target called exit()