75 files changed, 1478 insertions, 998 deletions
diff --git a/SConstruct b/SConstruct
index dac317fe8..6ca3d6a14 100644
--- a/SConstruct
+++ b/SConstruct
@@ -324,11 +324,11 @@ Usage: scons [scons options] [build options] [target(s)]
 Global sticky options:
 '''
 
-help_text += global_sticky_vars.GenerateHelpText(main)
-
 # Update main environment with values from ARGUMENTS & global_sticky_vars_file
 global_sticky_vars.Update(main)
 
+help_text += global_sticky_vars.GenerateHelpText(main)
+
 # Save sticky variable settings back to current variables file
 global_sticky_vars.Save(global_sticky_vars_file, main)
 
diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py
index 7ab7319cd..cf4c9b6f5 100644
--- a/configs/common/FSConfig.py
+++ b/configs/common/FSConfig.py
@@ -216,6 +216,8 @@ def makeX86System(mem_mode, numCPUs = 1, mdesc = None, self = None):
     mdesc.diskname = 'x86root.img'
     self.readfile = mdesc.script()
 
+    self.mem_mode = mem_mode
+
     # Physical memory
     self.membus = MemBus(bus_id=1)
     self.physmem = PhysicalMemory(range = AddrRange(mdesc.mem()))
diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa
index 52e124ad5..fe70e4d16 100644
--- a/src/arch/alpha/isa/decoder.isa
+++ b/src/arch/alpha/isa/decoder.isa
@@ -338,6 +338,31 @@ decode OPCODE default Unknown::unknown() {
         0x1c: decode INTFUNC {
             0x00: decode RA { 31: sextb({{ Rc.sb = Rb_or_imm< 7:0>; }}); }
             0x01: decode RA { 31: sextw({{ Rc.sw = Rb_or_imm<15:0>; }}); }
+
+            0x30: ctpop({{
+                             uint64_t count = 0;
+                             for (int i = 0; Rb<63:i>; ++i) {
+                                 if (Rb<i:i> == 0x1)
+                                     ++count;
+                             }
+                             Rc = count;
+                           }}, IntAluOp);
+
+            0x31: perr({{
+                             uint64_t temp = 0;
+                             int hi = 7;
+                             int lo = 0;
+                             for (int i = 0; i < 8; ++i) {
+                                 uint8_t ra_ub = Ra.uq<hi:lo>;
+                                 uint8_t rb_ub = Rb.uq<hi:lo>;
+                                 temp += (ra_ub >= rb_ub) ? 
+                                         (ra_ub - rb_ub) : (rb_ub - ra_ub);
+                                 hi += 8;
+                                 lo += 8;
+                             }
+                             Rc = temp;
+                           }});
+
             0x32: ctlz({{
                              uint64_t count = 0;
                              uint64_t temp = Rb;
@@ -359,26 +384,163 @@ decode OPCODE default Unknown::unknown() {
                              if (!(temp<7:0>)) { temp >>= 8; count += 8; }
                              if (!(temp<3:0>)) { temp >>= 4; count += 4; }
                              if (!(temp<1:0>)) { temp >>= 2; count += 2; }
+                             if (!(temp<0:0> & ULL(0x1))) { 
+                                 temp >>= 1; count += 1; 
+                             }
                              if (!(temp<0:0> & ULL(0x1))) count += 1;
                              Rc = count;
                            }}, IntAluOp);
 
-            format FailUnimpl {
-                0x30: ctpop();
-                0x31: perr();
-                0x34: unpkbw();
-                0x35: unpkbl();
-                0x36: pkwb();
-                0x37: pklb();
-                0x38: minsb8();
-                0x39: minsw4();
-                0x3a: minub8();
-                0x3b: minuw4();
-                0x3c: maxub8();
-                0x3d: maxuw4();
-                0x3e: maxsb8();
-                0x3f: maxsw4();
-            }
+
+            0x34: unpkbw({{ 
+                             Rc = (Rb.uq<7:0> 
+                                   | (Rb.uq<15:8> << 16) 
+                                   | (Rb.uq<23:16> << 32) 
+                                   | (Rb.uq<31:24> << 48)); 
+                           }}, IntAluOp);
+
+            0x35: unpkbl({{
+                             Rc = (Rb.uq<7:0> | (Rb.uq<15:8> << 32)); 
+                           }}, IntAluOp);
+
+            0x36: pkwb({{
+                             Rc = (Rb.uq<7:0> 
+                                   | (Rb.uq<23:16> << 8) 
+                                   | (Rb.uq<39:32> << 16) 
+                                   | (Rb.uq<55:48> << 24)); 
+                           }}, IntAluOp);
+
+            0x37: pklb({{
+                             Rc = (Rb.uq<7:0> | (Rb.uq<39:32> << 8)); 
+                           }}, IntAluOp);
+
+            0x38: minsb8({{
+                             uint64_t temp = 0;
+                             int hi = 63;
+                             int lo = 56;
+                             for (int i = 7; i >= 0; --i) {
+                                 int8_t ra_sb = Ra.uq<hi:lo>;
+                                 int8_t rb_sb = Rb.uq<hi:lo>;
+                                 temp = ((temp << 8) 
+                                         | ((ra_sb < rb_sb) ? Ra.uq<hi:lo>
+                                                          : Rb.uq<hi:lo>));
+                                 hi -= 8;
+                                 lo -= 8;
+                             }
+                             Rc = temp;
+                          }});
+
+            0x39: minsw4({{
+                             uint64_t temp = 0;
+                             int hi = 63;
+                             int lo = 48;
+                             for (int i = 3; i >= 0; --i) {
+                                 int16_t ra_sw = Ra.uq<hi:lo>;
+                                 int16_t rb_sw = Rb.uq<hi:lo>;
+                                 temp = ((temp << 16) 
+                                         | ((ra_sw < rb_sw) ? Ra.uq<hi:lo>
+                                                          : Rb.uq<hi:lo>));
+                                 hi -= 16;
+                                 lo -= 16;
+                             }
+                             Rc = temp;
+                          }});
+
+            0x3a: minub8({{
+                             uint64_t temp = 0;
+                             int hi = 63;
+                             int lo = 56;
+                             for (int i = 7; i >= 0; --i) {
+                                 uint8_t ra_ub = Ra.uq<hi:lo>;
+                                 uint8_t rb_ub = Rb.uq<hi:lo>;
+                                 temp = ((temp << 8) 
+                                         | ((ra_ub < rb_ub) ? Ra.uq<hi:lo>
+                                                          : Rb.uq<hi:lo>));
+                                 hi -= 8;
+                                 lo -= 8;
+                             }
+                             Rc = temp;
+                          }});
+
+            0x3b: minuw4({{
+                             uint64_t temp = 0;
+                             int hi = 63;
+                             int lo = 48;
+                             for (int i = 3; i >= 0; --i) {
+                                 uint16_t ra_sw = Ra.uq<hi:lo>;
+                                 uint16_t rb_sw = Rb.uq<hi:lo>;
+                                 temp = ((temp << 16) 
+                                         | ((ra_sw < rb_sw) ? Ra.uq<hi:lo>
+                                                          : Rb.uq<hi:lo>));
+                                 hi -= 16;
+                                 lo -= 16;
+                             }
+                             Rc = temp;
+                          }});
+
+            0x3c: maxub8({{
+                             uint64_t temp = 0;
+                             int hi = 63;
+                             int lo = 56;
+                             for (int i = 7; i >= 0; --i) {
+                                 uint8_t ra_ub = Ra.uq<hi:lo>;
+                                 uint8_t rb_ub = Rb.uq<hi:lo>;
+                                 temp = ((temp << 8) 
+                                         | ((ra_ub > rb_ub) ? Ra.uq<hi:lo>
+                                                          : Rb.uq<hi:lo>));
+                                 hi -= 8;
+                                 lo -= 8;
+                             }
+                             Rc = temp;
+                          }});
+
+            0x3d: maxuw4({{
+                             uint64_t temp = 0;
+                             int hi = 63;
+                             int lo = 48;
+                             for (int i = 3; i >= 0; --i) {
+                                 uint16_t ra_uw = Ra.uq<hi:lo>;
+                                 uint16_t rb_uw = Rb.uq<hi:lo>;
+                                 temp = ((temp << 16) 
+                                         | ((ra_uw > rb_uw) ? Ra.uq<hi:lo>
+                                                          : Rb.uq<hi:lo>));
+                                 hi -= 16;
+                                 lo -= 16;
+                             }
+                             Rc = temp;
+                          }});
+
+            0x3e: maxsb8({{
+                             uint64_t temp = 0;
+                             int hi = 63;
+                             int lo = 56;
+                             for (int i = 7; i >= 0; --i) {
+                                 int8_t ra_sb = Ra.uq<hi:lo>;
+                                 int8_t rb_sb = Rb.uq<hi:lo>;
+                                 temp = ((temp << 8) 
+                                         | ((ra_sb > rb_sb) ? Ra.uq<hi:lo>
+                                                          : Rb.uq<hi:lo>));
+                                 hi -= 8;
+                                 lo -= 8;
+                             }
+                             Rc = temp;
+                          }});
+
+            0x3f: maxsw4({{
+                             uint64_t temp = 0;
+                             int hi = 63;
+                             int lo = 48;
+                             for (int i = 3; i >= 0; --i) {
+                                 int16_t ra_sw = Ra.uq<hi:lo>;
+                                 int16_t rb_sw = Rb.uq<hi:lo>;
+                                 temp = ((temp << 16) 
+                                         | ((ra_sw > rb_sw) ? Ra.uq<hi:lo>
+                                                          : Rb.uq<hi:lo>));
+                                 hi -= 16;
+                                 lo -= 16;
+                             }
+                             Rc = temp;
+                          }});
 
             format BasicOperateWithNopCheck {
                 0x70: decode RB {
diff --git a/src/arch/alpha/process.cc b/src/arch/alpha/process.cc
index 9d75d5fa1..1c83f64b2 100644
--- a/src/arch/alpha/process.cc
+++ b/src/arch/alpha/process.cc
@@ -175,21 +175,22 @@ AlphaLiveProcess::argsInit(int intSize, int pageSize)
 void
 AlphaLiveProcess::startup()
 {
-    if (checkpointRestored)
+    ThreadContext *tc = system->getThreadContext(contextIds[0]);
+    tc->setMiscRegNoEffect(IPR_DTB_ASN, M5_pid << 57);
+
+    if (checkpointRestored) {
         return;
+    }
 
     Process::startup();
 
     argsInit(MachineBytes, VMPageSize);
 
-    ThreadContext *tc = system->getThreadContext(contextIds[0]);
     tc->setIntReg(GlobalPointerReg, objFile->globalPointer());
     //Operate in user mode
     tc->setMiscRegNoEffect(IPR_ICM, 0x18);
     //No super page mapping
     tc->setMiscRegNoEffect(IPR_MCSR, 0);
-    //Set this to 0 for now, but it should be unique for each process
-    tc->setMiscRegNoEffect(IPR_DTB_ASN, M5_pid << 57);
 }
 
 AlphaISA::IntReg
diff --git a/src/arch/mips/isa.cc b/src/arch/mips/isa.cc
index 3c8c9a986..1cad7e4be 100644
--- a/src/arch/mips/isa.cc
+++ b/src/arch/mips/isa.cc
@@ -91,12 +91,6 @@ ISA::ISA()
     init();
 }
 
-ISA::ISA(BaseCPU *_cpu)
-{
-    cpu = _cpu;
-    init();
-}
-
 void
 ISA::init()
 {
@@ -173,11 +167,10 @@ ISA::expandForMultithreading(ThreadID num_threads, unsigned num_vpes)
 //@TODO: Use MIPS STYLE CONSTANTS (e.g. TCHALT_H instead of TCH_H)
 void
 ISA::reset(std::string core_name, ThreadID num_threads,
-                   unsigned num_vpes, BaseCPU *_cpu)
+                   unsigned num_vpes, BaseCPU *cpu)
 {
     DPRINTF(MipsPRA, "Resetting CP0 State with %i TCs and %i VPEs\n",
             num_threads, num_vpes);
-    cpu = _cpu;
 
     MipsISA::CoreSpecific &cp = cpu->coreParams;
 
@@ -499,7 +492,7 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val,
 
     miscRegFile[misc_reg][reg_sel] = cp0_val;
 
-    scheduleCP0Update(1);
+    scheduleCP0Update(tc->getCpuPtr(), 1);
 }
 
 /**
@@ -528,7 +521,7 @@ ISA::filterCP0Write(int misc_reg, int reg_sel, const MiscReg &val)
 }
 
 void
-ISA::scheduleCP0Update(int delay)
+ISA::scheduleCP0Update(BaseCPU *cpu, int delay)
 {
     if (!cp0Updated) {
         cp0Updated = true;
@@ -540,7 +533,7 @@ ISA::scheduleCP0Update(int delay)
 }
 
 void
-ISA::updateCPU()
+ISA::updateCPU(BaseCPU *cpu)
 {
     ///////////////////////////////////////////////////////////////////
     //
@@ -578,7 +571,7 @@ ISA::CP0Event::process()
     switch (cp0EventType)
     {
       case UpdateCP0:
-        cp0->updateCPU();
+        cp0->updateCPU(cpu);
         break;
     }
 }
diff --git a/src/arch/mips/isa.hh b/src/arch/mips/isa.hh
index 165adff83..3f7afcdd0 100644
--- a/src/arch/mips/isa.hh
+++ b/src/arch/mips/isa.hh
@@ -64,18 +64,15 @@ namespace MipsISA
         std::vector<std::vector<MiscReg> > miscRegFile_WriteMask;
         std::vector<BankType> bankType;
 
-        BaseCPU *cpu;
-
       public:
         ISA();
-        ISA(BaseCPU *_cpu);
 
         void init();
 
         void clear(unsigned tid_or_vpn = 0);
 
         void reset(std::string core_name, ThreadID num_threads,
-                   unsigned num_vpes, BaseCPU *_cpu);
+                   unsigned num_vpes, BaseCPU *cpu);
 
         void expandForMultithreading(ThreadID num_threads, unsigned num_vpes);
 
@@ -147,11 +144,11 @@ namespace MipsISA
         };
 
         // Schedule a CP0 Update Event
-        void scheduleCP0Update(int delay = 0);
+        void scheduleCP0Update(BaseCPU *cpu, int delay = 0);
 
         // If any changes have been made, then check the state for changes
         // and if necessary alert the CPU
-        void updateCPU();
+        void updateCPU(BaseCPU *cpu);
 
         // Keep a List of CPU Events that need to be deallocated
         std::queue<CP0Event*> cp0EventRemoveList;
diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa
index c531347d2..36533e076 100644
--- a/src/arch/mips/isa/decoder.isa
+++ b/src/arch/mips/isa/decoder.isa
@@ -2476,10 +2476,14 @@ decode OPCODE_HI default Unknown::unknown() {
                         }
                     }
                 }
-                0x3: decode OP_HI {
-                    0x2: decode OP_LO {
-                        0x3: FailUnimpl::rdhwr();
+                0x3: decode OP {
+#if FULL_SYSTEM
+                    0x0: FailUnimpl::rdhwr();
+#else
+                    0x0: decode RD {
+                        29: BasicOp::rdhwr({{ Rt = TpValue; }});
                     }
+#endif
                 }
             }
         }
diff --git a/src/arch/mips/isa/operands.isa b/src/arch/mips/isa/operands.isa
index 50726cd30..27cb4357a 100644
--- a/src/arch/mips/isa/operands.isa
+++ b/src/arch/mips/isa/operands.isa
@@ -109,8 +109,11 @@ def operands {{
     #LL Flag
     'LLFlag': ('ControlReg', 'uw', 'MISCREG_LLFLAG', None, 1),
 
+    #Thread pointer value for SE mode
+    'TpValue': ('ControlReg', 'ud', 'MISCREG_TP_VALUE', None, 1),
+
     # Index Register
-    'Index':('ControlReg','uw','MISCREG_INDEX',None,1),
+    'Index': ('ControlReg','uw','MISCREG_INDEX',None,1),
 
 
     'CP0_RD_SEL': ('ControlReg', 'uw', '(RD << 3 | SEL)', None, 1),
diff --git a/src/arch/mips/linux/process.cc b/src/arch/mips/linux/process.cc
index c2a05b73b..4c3581ecb 100644
--- a/src/arch/mips/linux/process.cc
+++ b/src/arch/mips/linux/process.cc
@@ -126,6 +126,16 @@ sys_setsysinfoFunc(SyscallDesc *desc, int callnum, LiveProcess *process,
     return 1;
 }
 
+static SyscallReturn
+setThreadAreaFunc(SyscallDesc *desc, int callnum, LiveProcess *process,
+                  ThreadContext *tc)
+{
+    int index = 0;
+    Addr addr = process->getSyscallArg(tc, index);
+    tc->setMiscRegNoEffect(MISCREG_TP_VALUE, addr);
+    return 0;
+}
+
 SyscallDesc MipsLinuxProcess::syscallDescs[] = {
     /*  0 */ SyscallDesc("syscall", unimplementedFunc),
     /*  1 */ SyscallDesc("exit", exitFunc),
@@ -409,7 +419,44 @@ SyscallDesc MipsLinuxProcess::syscallDescs[] = {
     /* 279 */ SyscallDesc("unknown #279", unimplementedFunc),
     /* 280 */ SyscallDesc("add_key", unimplementedFunc),
     /* 281 */ SyscallDesc("request_key", unimplementedFunc),
-    /* 282 */ SyscallDesc("keyctl", unimplementedFunc)
+    /* 282 */ SyscallDesc("keyctl", unimplementedFunc),
+    /* 283 */ SyscallDesc("set_thread_area", setThreadAreaFunc),
+    /* 284 */ SyscallDesc("inotify_init", unimplementedFunc),
+    /* 285 */ SyscallDesc("inotify_add_watch", unimplementedFunc),
+    /* 286 */ SyscallDesc("inotify_rm_watch", unimplementedFunc),
+    /* 287 */ SyscallDesc("migrate_pages", unimplementedFunc),
+    /* 288 */ SyscallDesc("openat", unimplementedFunc),
+    /* 289 */ SyscallDesc("mkdirat", unimplementedFunc),
+    /* 290 */ SyscallDesc("mknodat", unimplementedFunc),
+    /* 291 */ SyscallDesc("fchownat", unimplementedFunc),
+    /* 292 */ SyscallDesc("futimesat", unimplementedFunc),
+    /* 293 */ SyscallDesc("fstatat64", unimplementedFunc),
+    /* 294 */ SyscallDesc("unlinkat", unimplementedFunc),
+    /* 295 */ SyscallDesc("renameat", unimplementedFunc),
+    /* 296 */ SyscallDesc("linkat", unimplementedFunc),
+    /* 297 */ SyscallDesc("symlinkat", unimplementedFunc),
+    /* 298 */ SyscallDesc("readlinkat", unimplementedFunc),
+    /* 299 */ SyscallDesc("fchmodat", unimplementedFunc),
+    /* 300 */ SyscallDesc("faccessat", unimplementedFunc),
+    /* 301 */ SyscallDesc("pselect6", unimplementedFunc),
+    /* 302 */ SyscallDesc("ppoll", unimplementedFunc),
+    /* 303 */ SyscallDesc("unshare", unimplementedFunc),
+    /* 304 */ SyscallDesc("splice", unimplementedFunc),
+    /* 305 */ SyscallDesc("sync_file_range", unimplementedFunc),
+    /* 306 */ SyscallDesc("tee", unimplementedFunc),
+    /* 307 */ SyscallDesc("vmsplice", unimplementedFunc),
+    /* 308 */ SyscallDesc("move_pages", unimplementedFunc),
+    /* 309 */ SyscallDesc("set_robust_list", unimplementedFunc),
+    /* 310 */ SyscallDesc("get_robust_list", unimplementedFunc),
+    /* 311 */ SyscallDesc("kexec_load", unimplementedFunc),
+    /* 312 */ SyscallDesc("getcpu", unimplementedFunc),
+    /* 313 */ SyscallDesc("epoll_pwait", unimplementedFunc),
+    /* 314 */ SyscallDesc("ioprio_set", unimplementedFunc),
+    /* 315 */ SyscallDesc("ioprio_get", unimplementedFunc),
+    /* 316 */ SyscallDesc("utimensat", unimplementedFunc),
+    /* 317 */ SyscallDesc("signalfd", unimplementedFunc),
+    /* 318 */ SyscallDesc("timerfd", unimplementedFunc),
+    /* 319 */ SyscallDesc("eventfd", unimplementedFunc)
 };
 
 MipsLinuxProcess::MipsLinuxProcess(LiveProcessParams * params,
diff --git a/src/arch/mips/process.cc b/src/arch/mips/process.cc
index d96b0c81c..2fd9114e9 100644
--- a/src/arch/mips/process.cc
+++ b/src/arch/mips/process.cc
@@ -34,6 +34,7 @@
 #include "arch/mips/process.hh"
 
 #include "base/loader/object_file.hh"
+#include "base/loader/elf_object.hh"
 #include "base/misc.hh"
 #include "cpu/thread_context.hh"
 
@@ -61,8 +62,8 @@ MipsLiveProcess::MipsLiveProcess(LiveProcessParams * params,
     brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize();
     brk_point = roundUp(brk_point, VMPageSize);
 
-    // Set up region for mmaps. For now, start at bottom of kuseg space.
-    mmap_start = mmap_end = 0x10000;
+    // Set up region for mmaps.  Start it 1GB above the top of the heap.
+    mmap_start = mmap_end = brk_point + 0x40000000L;
 }
 
 void
@@ -70,18 +71,52 @@ MipsLiveProcess::startup()
 {
     Process::startup();
 
-    argsInit(MachineBytes, VMPageSize);
+    argsInit<uint32_t>(VMPageSize);
 }
 
+template<class IntType>
 void
-MipsLiveProcess::argsInit(int intSize, int pageSize)
+MipsLiveProcess::argsInit(int pageSize)
 {
+    int intSize = sizeof(IntType);
+    Process::startup();
+
     // load object file into target memory
     objFile->loadSections(initVirtMem);
 
-    // Calculate how much space we need for arg & env arrays.
+    typedef AuxVector<IntType> auxv_t;
+    std::vector<auxv_t> auxv;
+
+    ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile);
+    if (elfObject)
+    {
+        // Set the system page size
+        auxv.push_back(auxv_t(M5_AT_PAGESZ, MipsISA::VMPageSize));
+        // Set the frequency at which time() increments
+        auxv.push_back(auxv_t(M5_AT_CLKTCK, 100));
+        // For statically linked executables, this is the virtual
+        // address of the program header tables if they appear in the
+        // executable image.
+        auxv.push_back(auxv_t(M5_AT_PHDR, elfObject->programHeaderTable()));
+        DPRINTF(Loader, "auxv at PHDR %08p\n", elfObject->programHeaderTable());
+        // This is the size of a program header entry from the elf file.
+        auxv.push_back(auxv_t(M5_AT_PHENT, elfObject->programHeaderSize()));
+        // This is the number of program headers from the original elf file.
+        auxv.push_back(auxv_t(M5_AT_PHNUM, elfObject->programHeaderCount()));
+        //The entry point to the program
+        auxv.push_back(auxv_t(M5_AT_ENTRY, objFile->entryPoint()));
+        //Different user and group IDs
+        auxv.push_back(auxv_t(M5_AT_UID, uid()));
+        auxv.push_back(auxv_t(M5_AT_EUID, euid()));
+        auxv.push_back(auxv_t(M5_AT_GID, gid()));
+        auxv.push_back(auxv_t(M5_AT_EGID, egid()));
+    }
+
+    // Calculate how much space we need for arg & env & auxv arrays.
     int argv_array_size = intSize * (argv.size() + 1);
     int envp_array_size = intSize * (envp.size() + 1);
+    int auxv_array_size = intSize * 2 * (auxv.size() + 1);
+
     int arg_data_size = 0;
     for (vector<string>::size_type i = 0; i < argv.size(); ++i) {
         arg_data_size += argv[i].size() + 1;
@@ -92,9 +127,11 @@ MipsLiveProcess::argsInit(int intSize, int pageSize)
     }
 
     int space_needed =
-         argv_array_size + envp_array_size + arg_data_size + env_data_size;
-    if (space_needed < 32*1024)
-        space_needed = 32*1024;
+        argv_array_size +
+        envp_array_size +
+        auxv_array_size +
+        arg_data_size +
+        env_data_size;
 
     // set bottom of stack
     stack_min = stack_base - space_needed;
@@ -105,27 +142,16 @@ MipsLiveProcess::argsInit(int intSize, int pageSize)
     pTable->allocate(stack_min, roundUp(stack_size, pageSize));
 
     // map out initial stack contents
-    // ========
-    // NOTE: Using uint32_t hardcodes MIPS32 and not MIPS64
-    // even if MIPS64 was intended. This is because the
-    // copyStringArray function templates on the parameters.
-    // Elegant way to check intSize and vary between 32/64?
-    // ========
-    uint32_t argv_array_base = stack_min + intSize; // room for argc
-    uint32_t envp_array_base = argv_array_base + argv_array_size;
-    uint32_t arg_data_base = envp_array_base + envp_array_size;
-    uint32_t env_data_base = arg_data_base + arg_data_size;
+    IntType argv_array_base = stack_min + intSize; // room for argc
+    IntType envp_array_base = argv_array_base + argv_array_size;
+    IntType auxv_array_base = envp_array_base + envp_array_size;
+    IntType arg_data_base = auxv_array_base + auxv_array_size;
+    IntType env_data_base = arg_data_base + arg_data_size;
 
     // write contents to stack
-    uint32_t argc = argv.size();
-
-    if (intSize == 8)
-        argc = htog((uint64_t)argc);
-    else if (intSize == 4)
-        argc = htog((uint32_t)argc);
-    else
-        panic("Unknown int size");
+    IntType argc = argv.size();
 
+    argc = htog((IntType)argc);
 
     initVirtMem->writeBlob(stack_min, (uint8_t*)&argc, intSize);
 
@@ -133,6 +159,21 @@ MipsLiveProcess::argsInit(int intSize, int pageSize)
 
     copyStringArray(envp, envp_array_base, env_data_base, initVirtMem);
 
+    // Copy the aux vector
+    for (typename vector<auxv_t>::size_type x = 0; x < auxv.size(); x++) {
+        initVirtMem->writeBlob(auxv_array_base + x * 2 * intSize,
+                (uint8_t*)&(auxv[x].a_type), intSize);
+        initVirtMem->writeBlob(auxv_array_base + (x * 2 + 1) * intSize,
+                (uint8_t*)&(auxv[x].a_val), intSize);
+    }
+
+    // Write out the terminating zeroed auxilliary vector
+    for (unsigned i = 0; i < 2; i++) {
+        const IntType zero = 0;
+        const Addr addr = auxv_array_base + 2 * intSize * (auxv.size() + i);
+        initVirtMem->writeBlob(addr, (uint8_t*)&zero, intSize);
+    }
+
     ThreadContext *tc = system->getThreadContext(contextIds[0]);
 
     setSyscallArg(tc, 0, argc);
diff --git a/src/arch/mips/process.hh b/src/arch/mips/process.hh
index f35ec8554..f1238b41f 100644
--- a/src/arch/mips/process.hh
+++ b/src/arch/mips/process.hh
@@ -47,7 +47,8 @@ class MipsLiveProcess : public LiveProcess
 
     void startup();
 
-    void argsInit(int intSize, int pageSize);
+    template<class IntType>
+    void argsInit(int pageSize);
 
   public:
     MipsISA::IntReg getSyscallArg(ThreadContext *tc, int &i);
diff --git a/src/arch/mips/registers.hh b/src/arch/mips/registers.hh
index fdb04b131..5cf76634d 100644
--- a/src/arch/mips/registers.hh
+++ b/src/arch/mips/registers.hh
@@ -275,6 +275,7 @@ enum MiscRegIndex{
     MISCREG_DESAVE = 248,       //Bank 31: 248-256
 
     MISCREG_LLFLAG = 257,
+    MISCREG_TP_VALUE,
 
     MISCREG_NUMREGS
 };
diff --git a/src/arch/x86/insts/micromediaop.hh b/src/arch/x86/insts/micromediaop.hh
index 508ef4e26..854d4de09 100644
--- a/src/arch/x86/insts/micromediaop.hh
+++ b/src/arch/x86/insts/micromediaop.hh
@@ -35,6 +35,12 @@
 
 namespace X86ISA
 {
+    enum MediaFlag {
+        MediaMultHiOp = 1,
+        MediaSignedOp = 64,
+        MediaScalarOp = 128
+    };
+
     class MediaOpBase : public X86MicroopBase
     {
       protected:
@@ -59,6 +65,30 @@ namespace X86ISA
             src1(_src1.idx), dest(_dest.idx),
             srcSize(_srcSize), destSize(_destSize), ext(_ext)
         {}
+
+        bool
+        scalarOp() const
+        {
+            return ext & MediaScalarOp;
+        }
+        
+        int
+        numItems(int size) const
+        {
+            return scalarOp() ? 1 : (sizeof(FloatRegBits) / size);
+        }
+
+        bool
+        multHi() const
+        {
+            return ext & MediaMultHiOp;
+        }
+
+        bool
+        signedOp() const
+        {
+            return ext & MediaSignedOp;
+        }
     };
 
     class MediaOpReg : public MediaOpBase
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py
index 083d8775d..e4c90b8d9 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py
@@ -55,33 +55,33 @@
 
 microcode = '''
 def macroop ADDSS_XMM_XMM {
-    maddf xmml, xmml, xmmlm, size=4, ext=1
+    maddf xmml, xmml, xmmlm, size=4, ext=Scalar
 };
 
 def macroop ADDSS_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    maddf xmml, xmml, ufp1, size=4, ext=1
+    maddf xmml, xmml, ufp1, size=4, ext=Scalar
 };
 
 def macroop ADDSS_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    maddf xmml, xmml, ufp1, size=4, ext=1
+    maddf xmml, xmml, ufp1, size=4, ext=Scalar
 };
 
 def macroop ADDSD_XMM_XMM {
-    maddf xmml, xmml, xmmlm, size=8, ext=1
+    maddf xmml, xmml, xmmlm, size=8, ext=Scalar
 };
 
 def macroop ADDSD_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    maddf xmml, xmml, ufp1, size=8, ext=1
+    maddf xmml, xmml, ufp1, size=8, ext=Scalar
 };
 
 def macroop ADDSD_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    maddf xmml, xmml, ufp1, size=8, ext=1
+    maddf xmml, xmml, ufp1, size=8, ext=Scalar
 };
 
 def macroop ADDPS_XMM_XMM {
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py
index 3e565278c..e8f596463 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py
@@ -55,33 +55,33 @@
 
 microcode = '''
 def macroop DIVSS_XMM_XMM {
-    mdivf xmml, xmml, xmmlm, size=4, ext=1
+    mdivf xmml, xmml, xmmlm, size=4, ext=Scalar
 };
 
 def macroop DIVSS_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    mdivf xmml, xmml, ufp1, size=4, ext=1
+    mdivf xmml, xmml, ufp1, size=4, ext=Scalar
 };
 
 def macroop DIVSS_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    mdivf xmml, xmml, ufp1, size=4, ext=1
+    mdivf xmml, xmml, ufp1, size=4, ext=Scalar
 };
 
 def macroop DIVSD_XMM_XMM {
-    mdivf xmml, xmml, xmmlm, size=8, ext=1
+    mdivf xmml, xmml, xmmlm, size=8, ext=Scalar
 };
 
 def macroop DIVSD_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    mdivf xmml, xmml, ufp1, size=8, ext=1
+    mdivf xmml, xmml, ufp1, size=8, ext=Scalar
 };
 
 def macroop DIVSD_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    mdivf xmml, xmml, ufp1, size=8, ext=1
+    mdivf xmml, xmml, ufp1, size=8, ext=Scalar
 };
 
 def macroop DIVPS_XMM_XMM {
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
index adf7650b9..41c5f719c 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
@@ -57,23 +57,23 @@ microcode = '''
 # HADDPS
 
 def macroop HADDPD_XMM_XMM {
-    maddf ufp1, xmmh , xmml, size=8, ext=1
-    maddf xmmh, xmmlm, xmmhm, size=8, ext=1
+    maddf ufp1, xmmh , xmml, size=8, ext=Scalar
+    maddf xmmh, xmmlm, xmmhm, size=8, ext=Scalar
     movfp xmml, ufp1
 };
 
 def macroop HADDPD_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT+8", dataSize=8
-    maddf xmml, xmmh, xmml, size=8, ext=1
-    maddf xmmh, ufp1, ufp2, size=8, ext=1
+    maddf xmml, xmmh, xmml, size=8, ext=Scalar
+    maddf xmmh, ufp1, ufp2, size=8, ext=Scalar
 };
 
 def macroop HADDPD_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT+8", dataSize=8
-    maddf xmml, xmmh, xmml, size=8, ext=1
-    maddf xmmh, ufp1, ufp2, size=8, ext=1
+    maddf xmml, xmmh, xmml, size=8, ext=Scalar
+    maddf xmmh, ufp1, ufp2, size=8, ext=Scalar
 };
 '''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py
index fc28fbda4..c00aa6048 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py
@@ -55,33 +55,33 @@
 
 microcode = '''
 def macroop MULSS_XMM_XMM {
-    mmulf xmml, xmml, xmmlm, size=4, ext=1
+    mmulf xmml, xmml, xmmlm, size=4, ext=Scalar
 };
 
 def macroop MULSS_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    mmulf xmml, xmml, ufp1, size=4, ext=1
+    mmulf xmml, xmml, ufp1, size=4, ext=Scalar
 };
 
 def macroop MULSS_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    mmulf xmml, xmml, ufp1, size=4, ext=1
+    mmulf xmml, xmml, ufp1, size=4, ext=Scalar
 };
 
 def macroop MULSD_XMM_XMM {
-    mmulf xmml, xmml, xmmlm, size=8, ext=1
+    mmulf xmml, xmml, xmmlm, size=8, ext=Scalar
 };
 
 def macroop MULSD_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    mmulf xmml, xmml, ufp1, size=8, ext=1
+    mmulf xmml, xmml, ufp1, size=8, ext=Scalar
 };
 
 def macroop MULSD_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    mmulf xmml, xmml, ufp1, size=8, ext=1
+    mmulf xmml, xmml, ufp1, size=8, ext=Scalar
 };
 
 def macroop MULPS_XMM_XMM {
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py
index fdeb30ddc..dc52a63c3 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py
@@ -55,18 +55,18 @@
 
 microcode = '''
 def macroop SQRTSS_XMM_XMM {
-    msqrt xmml, xmmlm, size=4, ext=1
+    msqrt xmml, xmmlm, size=4, ext=Scalar
 };
 
 def macroop SQRTSS_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    msqrt xmml, ufp1, size=4, ext=1
+    msqrt xmml, ufp1, size=4, ext=Scalar
 };
 
 def macroop SQRTSS_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    msqrt xmml, ufp1, size=4, ext=1
+    msqrt xmml, ufp1, size=4, ext=Scalar
 };
 
 def macroop SQRTPS_XMM_XMM {
@@ -90,18 +90,18 @@ def macroop SQRTPS_XMM_P {
 };
 
 def macroop SQRTSD_XMM_XMM {
-    msqrt xmml, xmmlm, size=8, ext=1
+    msqrt xmml, xmmlm, size=8, ext=Scalar
 };
 
 def macroop SQRTSD_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    msqrt xmml, ufp1, size=8, ext=1
+    msqrt xmml, ufp1, size=8, ext=Scalar
 };
 
 def macroop SQRTSD_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    msqrt xmml, ufp1, size=8, ext=1
+    msqrt xmml, ufp1, size=8, ext=Scalar
 };
 
 def macroop SQRTPD_XMM_XMM {
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py
index 378abc070..d69ce3831 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py
@@ -55,33 +55,33 @@
 
 microcode = '''
 def macroop SUBSS_XMM_XMM {
-    msubf xmml, xmml, xmmlm, size=4, ext=1
+    msubf xmml, xmml, xmmlm, size=4, ext=Scalar
 };
 
 def macroop SUBSS_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    msubf xmml, xmml, ufp1, size=4, ext=1
+    msubf xmml, xmml, ufp1, size=4, ext=Scalar
 };
 
 def macroop SUBSS_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    msubf xmml, xmml, ufp1, size=4, ext=1
+    msubf xmml, xmml, ufp1, size=4, ext=Scalar
 };
 
 def macroop SUBSD_XMM_XMM {
-    msubf xmml, xmml, xmmlm, size=8, ext=1
+    msubf xmml, xmml, xmmlm, size=8, ext=Scalar
 };
 
 def macroop SUBSD_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    msubf xmml, xmml, ufp1, size=8, ext=1
+    msubf xmml, xmml, ufp1, size=8, ext=Scalar
 };
 
 def macroop SUBSD_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    msubf xmml, xmml, ufp1, size=8, ext=1
+    msubf xmml, xmml, ufp1, size=8, ext=Scalar
 };
 
 def macroop SUBPS_XMM_XMM {
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py
index 09c34600b..e4449be10 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py
@@ -95,32 +95,32 @@ def macroop CMPPD_XMM_P_I {
 };
 
 def macroop CMPSS_XMM_XMM_I {
-    mcmpf2r xmml, xmml, xmmlm, size=4, ext="IMMEDIATE | 0x8"
+    mcmpf2r xmml, xmml, xmmlm, size=4, ext="IMMEDIATE |" + Scalar
 };
 
 def macroop CMPSS_XMM_M_I {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
-    mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE | 0x8"
+    mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE |" + Scalar
 };
 
 def macroop CMPSS_XMM_P_I {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
-    mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE | 0x8"
+    mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE |" + Scalar
 };
 
 def macroop CMPSD_XMM_XMM_I {
-    mcmpf2r xmml, xmml, xmmlm, size=8, ext="IMMEDIATE | 0x8"
+    mcmpf2r xmml, xmml, xmmlm, size=8, ext="IMMEDIATE |" + Scalar
 };
 
 def macroop CMPSD_XMM_M_I {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
-    mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE | 0x8"
+    mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE |" + Scalar
 };
 
 def macroop CMPSD_XMM_P_I {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
-    mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE | 0x8"
+    mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE |" + Scalar
 };
 '''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py
index 17c97662c..0a62ce343 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py
@@ -95,33 +95,33 @@ def macroop MINPD_XMM_P {
 };
 
 def macroop MINSS_XMM_XMM {
-    mminf xmml, xmml, xmmlm, ext=1, size=4
+    mminf xmml, xmml, xmmlm, ext=Scalar, size=4
 };
 
 def macroop MINSS_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
-    mminf xmml, xmml, ufp1, ext=1, size=4
+    mminf xmml, xmml, ufp1, ext=Scalar, size=4
 };
 
 def macroop MINSS_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
-    mminf xmml, xmml, ufp1, ext=1, size=4
+    mminf xmml, xmml, ufp1, ext=Scalar, size=4
 };
 
 def macroop MINSD_XMM_XMM {
-    mminf xmml, xmml, xmmlm, ext=1, size=8
+    mminf xmml, xmml, xmmlm, ext=Scalar, size=8
 };
 
 def macroop MINSD_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
-    mminf xmml, xmml, ufp1, ext=1, size=8
+    mminf xmml, xmml, ufp1, ext=Scalar, size=8
 };
 
 def macroop MINSD_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
-    mminf xmml, xmml, ufp1, ext=1, size=8
+    mminf xmml, xmml, ufp1, ext=Scalar, size=8
 };
 
 def macroop MAXPS_XMM_XMM {
@@ -165,32 +165,32 @@ def macroop MAXPD_XMM_P {
 };
 
 def macroop MAXSS_XMM_XMM {
-    mmaxf xmml, xmml, xmmlm, ext=1, size=4
+    mmaxf xmml, xmml, xmmlm, ext=Scalar, size=4
 };
 
 def macroop MAXSS_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
-    mmaxf xmml, xmml, ufp1, ext=1, size=4
+    mmaxf xmml, xmml, ufp1, ext=Scalar, size=4
 };
 
 def macroop MAXSS_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
-    mmaxf xmml, xmml, ufp1, ext=1, size=4
+    mmaxf xmml, xmml, ufp1, ext=Scalar, size=4
 };
 
 def macroop MAXSD_XMM_XMM {
-    mmaxf xmml, xmml, xmmlm, ext=1, size=8
+    mmaxf xmml, xmml, xmmlm, ext=Scalar, size=8
 };
 
 def macroop MAXSD_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
-    mmaxf xmml, xmml, ufp1, ext=1, size=8
+    mmaxf xmml, xmml, ufp1, ext=Scalar, size=8
 };
 
 def macroop MAXSD_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
-    mmaxf xmml, xmml, ufp1, ext=1, size=8
+    mmaxf xmml, xmml, ufp1, ext=Scalar, size=8
 };
 '''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py
index 1c36f7e45..5988c77ba 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py
@@ -55,33 +55,33 @@
 
 microcode = '''
 def macroop CVTSS2SD_XMM_XMM {
-    cvtf2f xmml, xmmlm, destSize=8, srcSize=4, ext=1
+    cvtf2f xmml, xmmlm, destSize=8, srcSize=4, ext=Scalar
 };
 
 def macroop CVTSS2SD_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=1
+    cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=Scalar
 };
 
 def macroop CVTSS2SD_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=1
+    cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=Scalar
 };
 
 def macroop CVTSD2SS_XMM_XMM {
-    cvtf2f xmml, xmmlm, destSize=4, srcSize=8, ext=1
+    cvtf2f xmml, xmmlm, destSize=4, srcSize=8, ext=Scalar
 };
 
 def macroop CVTSD2SS_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=1
+    cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=Scalar
 };
 
 def macroop CVTSD2SS_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=1
+    cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=Scalar
 };
 
 def macroop CVTPS2PD_XMM_XMM {
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py
index 16abd96f4..0b7ca5c5b 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py
@@ -55,74 +55,74 @@
 
 microcode = '''
 def macroop CVTSS2SI_R_XMM {
-    cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext=(1 | 4)
+    cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext = Scalar + "| 4"
     mov2int reg, ufp1, size=dsz
 };
 
 def macroop CVTSS2SI_R_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=(1 | 4)
+    cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext = Scalar + "| 4"
     mov2int reg, ufp1, size=dsz
 };
 
 def macroop CVTSS2SI_R_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=(1 | 4)
+    cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext = Scalar + "| 4"
     mov2int reg, ufp1, size=dsz
 };
 
 def macroop CVTSD2SI_R_XMM {
-    cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext=(1 | 4)
+    cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext = Scalar + "| 4"
     mov2int reg, ufp1, size=dsz
 };
 
 def macroop CVTSD2SI_R_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=(1 | 4)
+    cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext = Scalar + "| 4"
     mov2int reg, ufp1, size=dsz
 };
 
 def macroop CVTSD2SI_R_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=(1 | 4)
+    cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext = Scalar + "| 4"
     mov2int reg, ufp1, size=dsz
 };
 
 def macroop CVTTSS2SI_R_XMM {
-    cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext=1
+    cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext=Scalar
     mov2int reg, ufp1, size=dsz
 };
 
 def macroop CVTTSS2SI_R_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=1
+    cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=Scalar
     mov2int reg, ufp1, size=dsz
 };
 
 def macroop CVTTSS2SI_R_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=1
+    cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=Scalar
     mov2int reg, ufp1, size=dsz
 };
 
 def macroop CVTTSD2SI_R_XMM {
-    cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext=1
+    cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext=Scalar
     mov2int reg, ufp1, size=dsz
 };
 
 def macroop CVTTSD2SI_R_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=1
+    cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=Scalar
     mov2int reg, ufp1, size=dsz
 };
 
 def macroop CVTTSD2SI_R_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=1
+    cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=Scalar
     mov2int reg, ufp1, size=dsz
 };
 '''
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py
index 05e2b80d5..1e9856562 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py
@@ -135,43 +135,43 @@ def macroop PADDQ_XMM_P {
 };
 
 def macroop PADDSB_XMM_XMM {
-    maddi xmml, xmml, xmmlm, size=1, ext=4
-    maddi xmmh, xmmh, xmmhm, size=1, ext=4
+    maddi xmml, xmml, xmmlm, size=1, ext = "2 |" + Signed
+    maddi xmmh, xmmh, xmmhm, size=1, ext = "2 |" + Signed
 };
 
 def macroop PADDSB_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    maddi xmml, xmml, ufp1, size=1, ext=4
-    maddi xmmh, xmmh, ufp2, size=1, ext=4
+    maddi xmml, xmml, ufp1, size=1, ext = "2 |" + Signed
+    maddi xmmh, xmmh, ufp2, size=1, ext = "2 |" + Signed
 };
 
 def macroop PADDSB_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    maddi xmml, xmml, ufp1, size=1, ext=4
-    maddi xmmh, xmmh, ufp2, size=1, ext=4
+    maddi xmml, xmml, ufp1, size=1, ext = "2 |" + Signed
+    maddi xmmh, xmmh, ufp2, size=1, ext = "2 |" + Signed
 };
 
 def macroop PADDSW_XMM_XMM {
-    maddi xmml, xmml, xmmlm, size=2, ext=4
-    maddi xmmh, xmmh, xmmhm, size=2, ext=4
+    maddi xmml, xmml, xmmlm, size=2, ext = "2 |" + Signed
+    maddi xmmh, xmmh, xmmhm, size=2, ext = "2 |" + Signed
 };
 
 def macroop PADDSW_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    maddi xmml, xmml, ufp1, size=2, ext=4
-    maddi xmmh, xmmh, ufp2, size=2, ext=4
+    maddi xmml, xmml, ufp1, size=2, ext = "2 |" + Signed
+    maddi xmmh, xmmh, ufp2, size=2, ext = "2 |" + Signed
 };
 
 def macroop PADDSW_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    maddi xmml, xmml, ufp1, size=2, ext=4
-    maddi xmmh, xmmh, ufp2, size=2, ext=4
+    maddi xmml, xmml, ufp1, size=2, ext = "2 |" + Signed
+    maddi xmmh, xmmh, ufp2, size=2, ext = "2 |" + Signed
 };
 
 def macroop PADDUSB_XMM_XMM {
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py
index a5d90c6b2..904bf69f8 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py
@@ -55,82 +55,82 @@
 
 microcode = '''
 def macroop PMULHW_XMM_XMM {
-    mmuli xmml, xmml, xmmlm, size=2, ext=(0x2 | 0x8)
-    mmuli xmmh, xmmh, xmmhm, size=2, ext=(0x2 | 0x8)
+    mmuli xmml, xmml, xmmlm, size=2, ext = Signed + "|" + MultHi
+    mmuli xmmh, xmmh, xmmhm, size=2, ext = Signed + "|" + MultHi
 };
 
 def macroop PMULHW_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    mmuli xmml, xmml, ufp1, size=2, ext=(0x2 | 0x8)
-    mmuli xmmh, xmmh, ufp2, size=2, ext=(0x2 | 0x8)
+    mmuli xmml, xmml, ufp1, size=2, ext = Signed + "|" + MultHi
+    mmuli xmmh, xmmh, ufp2, size=2, ext = Signed + "|" + MultHi
 };
 
 def macroop PMULHW_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    mmuli xmml, xmml, ufp1, size=2, ext=(0x2 | 0x8)
-    mmuli xmmh, xmmh, ufp2, size=2, ext=(0x2 | 0x8)
+    mmuli xmml, xmml, ufp1, size=2, ext = Signed + "|" + MultHi
+    mmuli xmmh, xmmh, ufp2, size=2, ext = Signed + "|" + MultHi
 };
 
 def macroop PMULLW_XMM_XMM {
-    mmuli xmml, xmml, xmmlm, size=2, ext=2
-    mmuli xmmh, xmmh, xmmhm, size=2, ext=2
+    mmuli xmml, xmml, xmmlm, size=2, ext=Signed
+    mmuli xmmh, xmmh, xmmhm, size=2, ext=Signed
 };
 
 def macroop PMULLW_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    mmuli xmml, xmml, ufp1, size=2, ext=2
-    mmuli xmmh, xmmh, ufp2, size=2, ext=2
+    mmuli xmml, xmml, ufp1, size=2, ext=Signed
+    mmuli xmmh, xmmh, ufp2, size=2, ext=Signed
 };
 
 def macroop PMULLW_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    mmuli xmml, xmml, ufp1, size=2, ext=2
-    mmuli xmmh, xmmh, ufp2, size=2, ext=2
+    mmuli xmml, xmml, ufp1, size=2, ext=Signed
+    mmuli xmmh, xmmh, ufp2, size=2, ext=Signed
 };
 
 def macroop PMULHUW_XMM_XMM {
-    mmuli xmml, xmml, xmmlm, size=2, ext=8
-    mmuli xmmh, xmmh, xmmhm, size=2, ext=8
+    mmuli xmml, xmml, xmmlm, size=2, ext = MultHi
+    mmuli xmmh, xmmh, xmmhm, size=2, ext = MultHi
 };
 
 def macroop PMULHUW_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    mmuli xmml, xmml, ufp1, size=2, ext=8
-    mmuli xmmh, xmmh, ufp2, size=2, ext=8
+    mmuli xmml, xmml, ufp1, size=2, ext = MultHi
+    mmuli xmmh, xmmh, ufp2, size=2, ext = MultHi
 };
 
 def macroop PMULHUW_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    mmuli xmml, xmml, ufp1, size=2, ext=8
-    mmuli xmmh, xmmh, ufp2, size=2, ext=8
+    mmuli xmml, xmml, ufp1, size=2, ext = MultHi
+    mmuli xmmh, xmmh, ufp2, size=2, ext = MultHi
 };
 
 def macroop PMULUDQ_XMM_XMM {
-    mmuli xmml, xmml, xmmlm, srcSize=4, destSize=8, ext=1
-    mmuli xmmh, xmmh, xmmhm, srcSize=4, destSize=8, ext=1
+    mmuli xmml, xmml, xmmlm, srcSize=4, destSize=8, ext=Scalar
+    mmuli xmmh, xmmh, xmmhm, srcSize=4, destSize=8, ext=Scalar
 };
 
 def macroop PMULUDQ_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=1
-    mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=1
+    mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar
+    mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar
 };
 
 def macroop PMULUDQ_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=1
-    mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=1
+    mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar
+    mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar
 };
 '''
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py
index f157d165f..64ae05190 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py
@@ -55,22 +55,22 @@
 
 microcode = '''
 def macroop PMADDWD_XMM_XMM {
-    mmuli ufp3, xmml, xmmlm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
-    mmuli ufp4, xmml, xmmlm, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+    mmuli ufp3, xmml, xmmlm, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20"
+    mmuli ufp4, xmml, xmmlm, srcSize=2, destSize=4, ext = Signed + "| 0x10"
     maddi xmml, ufp3, ufp4, size=4, ext=0
-    mmuli ufp3, xmmh, xmmhm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
-    mmuli ufp4, xmmh, xmmhm, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+    mmuli ufp3, xmmh, xmmhm, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20"
+    mmuli ufp4, xmmh, xmmhm, srcSize=2, destSize=4, ext = Signed + "| 0x10"
     maddi xmmh, ufp3, ufp4, size=4, ext=0
 };
 
 def macroop PMADDWD_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
-    mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+    mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20"
+    mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10"
     maddi xmml, ufp3, ufp4, size=4, ext=0
-    mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
-    mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+    mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20"
+    mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext = Signed + "| 0x10"
     maddi xmmh, ufp3, ufp4, size=4, ext=0
 };
 
@@ -78,11 +78,11 @@ def macroop PMADDWD_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
-    mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+    mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20"
+    mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10"
     maddi xmml, ufp3, ufp4, size=4, ext=0
-    mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
-    mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+    mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20"
+    mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext = Signed + "| 0x10"
     maddi xmmh, ufp3, ufp4, size=4, ext=0
 };
 '''
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py
index fdfb08667..d73434832 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py
@@ -135,43 +135,43 @@ def macroop PSUBQ_XMM_P {
 };
 
 def macroop PSUBSB_XMM_XMM {
-    msubi xmml, xmml, xmmlm, size=1, ext=4
-    msubi xmmh, xmmh, xmmhm, size=1, ext=4
+    msubi xmml, xmml, xmmlm, size=1, ext = "2 |" + Signed
+    msubi xmmh, xmmh, xmmhm, size=1, ext = "2 |" + Signed
 };
 
 def macroop PSUBSB_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    msubi xmml, xmml, ufp1, size=1, ext=4
-    msubi xmmh, xmmh, ufp2, size=1, ext=4
+    msubi xmml, xmml, ufp1, size=1, ext = "2 |" + Signed
+    msubi xmmh, xmmh, ufp2, size=1, ext = "2 |" + Signed
 };
 
 def macroop PSUBSB_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    msubi xmml, xmml, ufp1, size=1, ext=4
-    msubi xmmh, xmmh, ufp2, size=1, ext=4
+    msubi xmml, xmml, ufp1, size=1, ext = "2 |" + Signed
+    msubi xmmh, xmmh, ufp2, size=1, ext = "2 |" + Signed
 };
 
 def macroop PSUBSW_XMM_XMM {
-    msubi xmml, xmml, xmmlm, size=2, ext=4
-    msubi xmmh, xmmh, xmmhm, size=2, ext=4
+    msubi xmml, xmml, xmmlm, size=2, ext = "2 |" + Signed
+    msubi xmmh, xmmh, xmmhm, size=2, ext = "2 |" + Signed
 };
 
 def macroop PSUBSW_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    msubi xmml, xmml, ufp1, size=2, ext=4
-    msubi xmmh, xmmh, ufp2, size=2, ext=4
+    msubi xmml, xmml, ufp1, size=2, ext = "2 |" + Signed
+    msubi xmmh, xmmh, ufp2, size=2, ext = "2 |" + Signed
 };
 
 def macroop PSUBSW_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    msubi xmml, xmml, ufp1, size=2, ext=4
-    msubi xmmh, xmmh, ufp2, size=2, ext=4
+    msubi xmml, xmml, ufp1, size=2, ext = "2 |" + Signed
+    msubi xmmh, xmmh, ufp2, size=2, ext = "2 |" + Signed
 };
 
 def macroop PSUBUSB_XMM_XMM {
diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py
index d3bfbb529..6610e0690 100644
--- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py
+++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py
@@ -75,23 +75,23 @@ def macroop PMINUB_XMM_P {
 };
 
 def macroop PMINSW_XMM_XMM {
-    mmini xmml, xmml, xmmlm, size=2, ext=2
-    mmini xmmh, xmmh, xmmhm, size=2, ext=2
+    mmini xmml, xmml, xmmlm, size=2, ext=Signed
+    mmini xmmh, xmmh, xmmhm, size=2, ext=Signed
 };
 
 def macroop PMINSW_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    mmini xmml, xmml, ufp1, size=2, ext=2
-    mmini xmmh, xmmh, ufp2, size=2, ext=2
+    mmini xmml, xmml, ufp1, size=2, ext=Signed
+    mmini xmmh, xmmh, ufp2, size=2, ext=Signed
 };
 
 def macroop PMINSW_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    mmini xmml, xmml, ufp1, size=2, ext=2
-    mmini xmmh, xmmh, ufp2, size=2, ext=2
+    mmini xmml, xmml, ufp1, size=2, ext=Signed
+    mmini xmmh, xmmh, ufp2, size=2, ext=Signed
 };
 
 def macroop PMAXUB_XMM_XMM {
@@ -115,22 +115,22 @@ def macroop PMAXUB_XMM_P {
 };
 
 def macroop PMAXSW_XMM_XMM {
-    mmaxi xmml, xmml, xmmlm, size=2, ext=2
-    mmaxi xmmh, xmmh, xmmhm, size=2, ext=2
+    mmaxi xmml, xmml, xmmlm, size=2, ext=Signed
+    mmaxi xmmh, xmmh, xmmhm, size=2, ext=Signed
 };
 
 def macroop PMAXSW_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    mmaxi xmml, xmml, ufp1, size=2, ext=2
-    mmaxi xmmh, xmmh, ufp2, size=2, ext=2
+    mmaxi xmml, xmml, ufp1, size=2, ext=Signed
+    mmaxi xmmh, xmmh, ufp2, size=2, ext=Signed
 };
 
 def macroop PMAXSW_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    mmaxi xmml, xmml, ufp1, size=2, ext=2
-    mmaxi xmmh, xmmh, ufp2, size=2, ext=2
+    mmaxi xmml, xmml, ufp1, size=2, ext=Signed
+    mmaxi xmmh, xmmh, ufp2, size=2, ext=Signed
 };
 '''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py
index 8d632a0ac..080be66f6 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py
@@ -56,33 +56,33 @@
 microcode = '''
 def macroop CVTSI2SS_XMM_R {
     mov2fp ufp1, regm, destSize=dsz, srcSize=dsz
-    cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1
+    cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=Scalar
 };
 
 def macroop CVTSI2SS_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1
+    cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=Scalar
 };
 
 def macroop CVTSI2SS_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1
+    cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=Scalar
 };
 
 def macroop CVTSI2SD_XMM_R {
     mov2fp ufp1, regm, destSize=dsz, srcSize=dsz
-    cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1
+    cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=Scalar
 };
 
 def macroop CVTSI2SD_XMM_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1
+    cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=Scalar
 };
 
 def macroop CVTSI2SD_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1
+    cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=Scalar
 };
 '''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py
index 9112a7382..7afee6cbf 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py
@@ -55,45 +55,45 @@
 
 microcode = '''
 def macroop PACKSSDW_XMM_XMM {
-    pack ufp1, xmml, xmmh, ext=1, srcSize=4, destSize=2
-    pack xmmh, xmmlm, xmmhm, ext=1, srcSize=4, destSize=2
+    pack ufp1, xmml, xmmh, ext=Signed, srcSize=4, destSize=2
+    pack xmmh, xmmlm, xmmhm, ext=Signed, srcSize=4, destSize=2
     movfp xmml, ufp1, dataSize=8
 };
 
 def macroop PACKSSDW_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    pack xmml, xmml, xmmh, ext=1, srcSize=4, destSize=2
-    pack xmmh, ufp1, ufp2, ext=1, srcSize=4, destSize=2
+    pack xmml, xmml, xmmh, ext=Signed, srcSize=4, destSize=2
+    pack xmmh, ufp1, ufp2, ext=Signed, srcSize=4, destSize=2
 };
 
 def macroop PACKSSDW_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    pack xmml, xmml, xmmh, ext=1, srcSize=4, destSize=2
-    pack xmmh, ufp1, ufp2, ext=1, srcSize=4, destSize=2
+    pack xmml, xmml, xmmh, ext=Signed, srcSize=4, destSize=2
+    pack xmmh, ufp1, ufp2, ext=Signed, srcSize=4, destSize=2
 };
 
 def macroop PACKSSWB_XMM_XMM {
-    pack ufp1, xmml, xmmh, ext=1, srcSize=2, destSize=1
-    pack xmmh, xmmlm, xmmhm, ext=1, srcSize=2, destSize=1
+    pack ufp1, xmml, xmmh, ext=Signed, srcSize=2, destSize=1
+    pack xmmh, xmmlm, xmmhm, ext=Signed, srcSize=2, destSize=1
     movfp xmml, ufp1, dataSize=8
 };
 
 def macroop PACKSSWB_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    pack xmml, xmml, xmmh, ext=1, srcSize=2, destSize=1
-    pack xmmh, ufp1, ufp2, ext=1, srcSize=2, destSize=1
+    pack xmml, xmml, xmmh, ext=Signed, srcSize=2, destSize=1
+    pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1
 };
 
 def macroop PACKSSWB_XMM_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
-    pack xmml, xmml, xmmh, ext=1, srcSize=2, destSize=1
-    pack xmmh, ufp1, ufp2, ext=1, srcSize=2, destSize=1
+    pack xmml, xmml, xmmh, ext=Signed, srcSize=2, destSize=1
+    pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1
 };
 
 def macroop PACKUSWB_XMM_XMM {
@@ -105,8 +105,8 @@ def macroop PACKUSWB_XMM_XMM {
 def macroop PACKUSWB_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    pack xmml, xmml, xmmh, ext=0, srcSize=2, destSize=1
-    pack xmmh, ufp1, ufp2, ext=0, srcSize=2, destSize=1
+    pack xmml, xmml, xmmh, ext=Signed, srcSize=2, destSize=1
+    pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1
 };
 
 def macroop PACKUSWB_XMM_P {
diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py
index b663d15b7..d376dccce 100644
--- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py
+++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py
@@ -115,33 +115,33 @@ def macroop PADDQ_MMX_P {
 };
 
 def macroop PADDSB_MMX_MMX {
-    maddi mmx, mmx, mmxm, size=1, ext=4
+    maddi mmx, mmx, mmxm, size=1, ext = "2 |" + Signed
 };
 
 def macroop PADDSB_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    maddi mmx, mmx, ufp1, size=1, ext=4
+    maddi mmx, mmx, ufp1, size=1, ext = "2 |" + Signed
 };
 
 def macroop PADDSB_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    maddi mmx, mmx, ufp1, size=1, ext=4
+    maddi mmx, mmx, ufp1, size=1, ext = "2 |" + Signed
 };
 
 def macroop PADDSW_MMX_MMX {
-    maddi mmx, mmx, mmxm, size=2, ext=4
+    maddi mmx, mmx, mmxm, size=2, ext = "2 |" + Signed
 };
 
 def macroop PADDSW_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    maddi mmx, mmx, ufp1, size=2, ext=4
+    maddi mmx, mmx, ufp1, size=2, ext = "2 |" + Signed
 };
 
 def macroop PADDSW_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    maddi mmx, mmx, ufp1, size=2, ext=4
+    maddi mmx, mmx, ufp1, size=2, ext = "2 |" + Signed
 };
 
 def macroop PADDUSB_MMX_MMX {
diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py
index 7383a744f..526162e32 100644
--- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py
+++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py
@@ -55,77 +55,77 @@
 
 microcode = '''
 def macroop PMULHW_MMX_MMX {
-    mmuli mmx, mmx, mmxm, size=2, ext=(0x2 | 0x8)
+    mmuli mmx, mmx, mmxm, size=2, ext = Signed + "|" + MultHi
 };
 
 def macroop PMULHW_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x8)
+    mmuli mmx, mmx, ufp1, size=2, ext = Signed + "|" + MultHi
 };
 
 def macroop PMULHW_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x8)
+    mmuli mmx, mmx, ufp1, size=2, ext = Signed + "|" + MultHi
 };
 
 def macroop PMULLW_MMX_MMX {
-    mmuli mmx, mmx, mmxm, size=2, ext=2
+    mmuli mmx, mmx, mmxm, size=2, ext = Signed
 };
 
 def macroop PMULLW_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    mmuli mmx, mmx, ufp1, size=2, ext=2
+    mmuli mmx, mmx, ufp1, size=2, ext = Signed
 };
 
 def macroop PMULLW_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    mmuli mmx, mmx, ufp1, size=2, ext=2
+    mmuli mmx, mmx, ufp1, size=2, ext = Signed
 };
 
 def macroop PMULHRW_MMX_MMX {
-    mmuli mmx, mmx, mmxm, size=2, ext=(0x2 | 0x4 | 0x8)
+    mmuli mmx, mmx, mmxm, size=2, ext = Signed + "| 0x4 |" + MultHi
 };
 
 def macroop PMULHRW_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x4 | 0x8)
+    mmuli mmx, mmx, ufp1, size=2, ext = Signed + "| 0x4 |" + MultHi
 };
 
 def macroop PMULHRW_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x4 | 0x8)
+    mmuli mmx, mmx, ufp1, size=2, ext = Signed + "| 0x4 |" + MultHi
 };
 
 def macroop PMULHUW_MMX_MMX {
-    mmuli mmx, mmx, mmxm, size=2, ext=8
+    mmuli mmx, mmx, mmxm, size=2, ext = MultHi
 };
 
 def macroop PMULHUW_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    mmuli mmx, mmx, ufp1, size=2, ext=8
+    mmuli mmx, mmx, ufp1, size=2, ext = MultHi
 };
 
 def macroop PMULHUW_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    mmuli mmx, mmx, ufp1, size=2, ext=8
+    mmuli mmx, mmx, ufp1, size=2, ext = MultHi
 };
 
 def macroop PMULUDQ_MMX_MMX {
-    mmuli mmx, mmx, mmxm, srcSize=4, destSize=8, ext=1
+    mmuli mmx, mmx, mmxm, srcSize=4, destSize=8, ext=Scalar
 };
 
 def macroop PMULUDQ_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=1
+    mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=Scalar
 };
 
 def macroop PMULUDQ_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=1
+    mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=Scalar
 };
 '''
diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py
index f6940d159..354cf8722 100644
--- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py
+++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py
@@ -55,23 +55,23 @@
 
 microcode = '''
 def macroop PMADDWD_MMX_MMX {
-    mmuli ufp3, mmx, mmxm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
-    mmuli ufp4, mmx, mmxm, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+    mmuli ufp3, mmx, mmxm, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20"
+    mmuli ufp4, mmx, mmxm, srcSize=2, destSize=4, ext = Signed + "| 0x10"
     maddi mmx, ufp3, ufp4, size=4, ext=0
 };
 
 def macroop PMADDWD_MMX_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
-    mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
-    mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+    mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20"
+    mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10"
     maddi mmx, ufp3, ufp4, size=4, ext=0
 };
 
 def macroop PMADDWD_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
-    mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
-    mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+    mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20"
+    mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10"
     maddi mmx, ufp3, ufp4, size=4, ext=0
 };
 '''
diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py
index a60c0b1a8..4ee87e0f8 100644
--- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py
+++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py
@@ -115,33 +115,33 @@ def macroop PSUBQ_MMX_P {
 };
 
 def macroop PSUBSB_MMX_MMX {
-    msubi mmx, mmx, mmxm, size=1, ext=4
+    msubi mmx, mmx, mmxm, size=1, ext = "2 |" + Signed
 };
 
 def macroop PSUBSB_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    msubi mmx, mmx, ufp1, size=1, ext=4
+    msubi mmx, mmx, ufp1, size=1, ext = "2 |" + Signed
 };
 
 def macroop PSUBSB_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    msubi mmx, mmx, ufp1, size=1, ext=4
+    msubi mmx, mmx, ufp1, size=1, ext = "2 |" + Signed
 };
 
 def macroop PSUBSW_MMX_MMX {
-    msubi mmx, mmx, mmxm, size=2, ext=4
+    msubi mmx, mmx, mmxm, size=2, ext = "2 |" + Signed
 };
 
 def macroop PSUBSW_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    msubi mmx, mmx, ufp1, size=2, ext=4
+    msubi mmx, mmx, ufp1, size=2, ext = "2 |" + Signed
 };
 
 def macroop PSUBSW_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    msubi mmx, mmx, ufp1, size=2, ext=4
+    msubi mmx, mmx, ufp1, size=2, ext = "2 |" + Signed
 };
 
 def macroop PSUBUSB_MMX_MMX {
diff --git a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py
index 8d8247300..c2eedbb0e 100644
--- a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py
+++ b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py
@@ -70,18 +70,18 @@ def macroop PMINUB_MMX_P {
 };
 
 def macroop PMINSW_MMX_MMX {
-    mmini mmx, mmx, mmxm, size=2, ext=2
+    mmini mmx, mmx, mmxm, size=2, ext=Signed
 };
 
 def macroop PMINSW_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    mmini mmx, mmx, ufp1, size=2, ext=2
+    mmini mmx, mmx, ufp1, size=2, ext=Signed
 };
 
 def macroop PMINSW_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    mmini mmx, mmx, ufp1, size=2, ext=2
+    mmini mmx, mmx, ufp1, size=2, ext=Signed
 };
 
 def macroop PMAXUB_MMX_MMX {
@@ -100,17 +100,17 @@ def macroop PMAXUB_MMX_P {
 };
 
 def macroop PMAXSW_MMX_MMX {
-    mmaxi mmx, mmx, mmxm, size=2, ext=2
+    mmaxi mmx, mmx, mmxm, size=2, ext=Signed
 };
 
 def macroop PMAXSW_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    mmaxi mmx, mmx, ufp1, size=2, ext=2
+    mmaxi mmx, mmx, ufp1, size=2, ext=Signed
 };
 
 def macroop PMAXSW_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    mmaxi mmx, mmx, ufp1, size=2, ext=2
+    mmaxi mmx, mmx, ufp1, size=2, ext=Signed
 };
 '''
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py b/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py
index 4235d7f26..cb8b4eaa7 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py
@@ -55,33 +55,33 @@
 
 microcode = '''
 def macroop PACKSSDW_MMX_MMX {
-    pack mmx, mmx, mmxm, ext=1, srcSize=4, destSize=2
+    pack mmx, mmx, mmxm, ext=Signed, srcSize=4, destSize=2
 };
 
 def macroop PACKSSDW_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    pack mmx, mmx, ufp1, ext=1, srcSize=4, destSize=2
+    pack mmx, mmx, ufp1, ext=Signed, srcSize=4, destSize=2
 };
 
 def macroop PACKSSDW_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    pack mmx, mmx, ufp1, ext=1, srcSize=4, destSize=2
+    pack mmx, mmx, ufp1, ext=Signed, srcSize=4, destSize=2
 };
 
 def macroop PACKSSWB_MMX_MMX {
-    pack mmx, mmx, mmxm, ext=1, srcSize=2, destSize=1
+    pack mmx, mmx, mmxm, ext=Signed, srcSize=2, destSize=1
 };
 
 def macroop PACKSSWB_MMX_M {
     ldfp ufp1, seg, sib, disp, dataSize=8
-    pack mmx, mmx, ufp1, ext=1, srcSize=2, destSize=1
+    pack mmx, mmx, ufp1, ext=Signed, srcSize=2, destSize=1
 };
 
 def macroop PACKSSWB_MMX_P {
     rdip t7
     ldfp ufp1, seg, riprel, disp, dataSize=8
-    pack mmx, mmx, ufp1, ext=1, srcSize=2, destSize=1
+    pack mmx, mmx, ufp1, ext=Signed, srcSize=2, destSize=1
 };
 
 def macroop PACKUSWB_MMX_MMX {
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index 25b58dfb7..b0b557521 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -181,6 +181,9 @@ let {{
                 'kernel_gs_base'):
         assembler.symbols[reg] = regIdx("MISCREG_%s" % reg.upper())
 
+    for flag in ('Scalar', 'MultHi', 'Signed'):
+        assembler.symbols[flag] = 'Media%sOp' % flag
+
     # Code literal which forces a default 64 bit operand size in 64 bit mode.
     assembler.symbols["oszIn64Override"] = '''
     if (machInst.mode.submode == SixtyFourBitMode &&
diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa
index 4052f254d..900c166f8 100644
--- a/src/arch/x86/isa/microops/mediaop.isa
+++ b/src/arch/x86/isa/microops/mediaop.isa
@@ -352,7 +352,7 @@ let {{
             assert(srcSize == destSize);
             int size = srcSize;
             int sizeBits = size * 8;
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -451,14 +451,14 @@ let {{
                 // Handle saturation.
                 if (signBit) {
                     if (overflow != mask(destBits - srcBits + 1)) {
-                        if (ext & 0x1)
+                        if (signedOp())
                             picked = (ULL(1) << (destBits - 1));
                         else
                             picked = 0;
                     }
                 } else {
                     if (overflow != 0) {
-                        if (ext & 0x1)
+                        if (signedOp())
                             picked = mask(destBits - 1);
                         else
                             picked = mask(destBits);
@@ -479,14 +479,14 @@ let {{
                 // Handle saturation.
                 if (signBit) {
                     if (overflow != mask(destBits - srcBits + 1)) {
-                        if (ext & 0x1)
+                        if (signedOp())
                             picked = (ULL(1) << (destBits - 1));
                         else
                             picked = 0;
                     }
                 } else {
                     if (overflow != 0) {
-                        if (ext & 0x1)
+                        if (signedOp())
                             picked = mask(destBits - 1);
                         else
                             picked = mask(destBits);
@@ -545,7 +545,7 @@ let {{
             int size = srcSize;
             int sizeBits = size * 8;
             assert(srcSize == 4 || srcSize == 8);
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -595,7 +595,7 @@ let {{
             int size = srcSize;
             int sizeBits = size * 8;
             assert(srcSize == 4 || srcSize == 8);
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -634,7 +634,7 @@ let {{
             assert(srcSize == destSize);
             int size = srcSize;
             int sizeBits = size * 8;
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -648,7 +648,7 @@ let {{
                     (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
                 uint64_t resBits;
 
-                if (ext & 0x2) {
+                if (signedOp()) {
                     if (arg1 < arg2) {
                         resBits = arg1Bits;
                     } else {
@@ -672,7 +672,7 @@ let {{
             assert(srcSize == destSize);
             int size = srcSize;
             int sizeBits = size * 8;
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -686,7 +686,7 @@ let {{
                     (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
                 uint64_t resBits;
 
-                if (ext & 0x2) {
+                if (signedOp()) {
                     if (arg1 > arg2) {
                         resBits = arg1Bits;
                     } else {
@@ -725,7 +725,7 @@ let {{
             int size = srcSize;
             int sizeBits = size * 8;
             assert(srcSize == 4 || srcSize == 8);
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -766,7 +766,7 @@ let {{
             int size = srcSize;
             int sizeBits = size * 8;
             assert(srcSize == 4 || srcSize == 8);
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -812,7 +812,7 @@ let {{
             int size = srcSize;
             int sizeBits = size * 8;
             assert(srcSize == 4 || srcSize == 8);
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -858,7 +858,7 @@ let {{
             int size = srcSize;
             int sizeBits = size * 8;
             assert(srcSize == 4 || srcSize == 8);
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -904,7 +904,7 @@ let {{
             int size = srcSize;
             int sizeBits = size * 8;
             assert(srcSize == 4 || srcSize == 8);
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -938,7 +938,7 @@ let {{
             assert(srcSize == destSize);
             int size = srcSize;
             int sizeBits = size * 8;
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -949,17 +949,19 @@ let {{
                 uint64_t resBits = arg1Bits + arg2Bits;
                 
                 if (ext & 0x2) {
-                    if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
-                        resBits = mask(sizeBits);
-                } else if (ext & 0x4) {
-                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
-                    int arg2Sign = bits(arg2Bits, sizeBits - 1);
-                    int resSign = bits(resBits, sizeBits - 1);
-                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
-                        if (resSign == 0)
-                            resBits = (ULL(1) << (sizeBits - 1));
-                        else
-                            resBits = mask(sizeBits - 1);
+                    if (signedOp()) {
+                        int arg1Sign = bits(arg1Bits, sizeBits - 1);
+                        int arg2Sign = bits(arg2Bits, sizeBits - 1);
+                        int resSign = bits(resBits, sizeBits - 1);
+                        if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
+                            if (resSign == 0)
+                                resBits = (ULL(1) << (sizeBits - 1));
+                            else
+                                resBits = mask(sizeBits - 1);
+                        }
+                    } else {
+                        if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
+                            resBits = mask(sizeBits);
                     }
                 }
 
@@ -973,7 +975,7 @@ let {{
             assert(srcSize == destSize);
             int size = srcSize;
             int sizeBits = size * 8;
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -984,21 +986,23 @@ let {{
                 uint64_t resBits = arg1Bits - arg2Bits;
                 
                 if (ext & 0x2) {
-                    if (arg2Bits > arg1Bits) {
-                        resBits = 0;
-                    } else if (!findCarry(sizeBits, resBits,
-                                         arg1Bits, ~arg2Bits)) {
-                        resBits = mask(sizeBits);
-                    }
-                } else if (ext & 0x4) {
-                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
-                    int arg2Sign = !bits(arg2Bits, sizeBits - 1);
-                    int resSign = bits(resBits, sizeBits - 1);
-                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
-                        if (resSign == 0)
-                            resBits = (ULL(1) << (sizeBits - 1));
-                        else
-                            resBits = mask(sizeBits - 1);
+                    if (signedOp()) {
+                        int arg1Sign = bits(arg1Bits, sizeBits - 1);
+                        int arg2Sign = !bits(arg2Bits, sizeBits - 1);
+                        int resSign = bits(resBits, sizeBits - 1);
+                        if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
+                            if (resSign == 0)
+                                resBits = (ULL(1) << (sizeBits - 1));
+                            else
+                                resBits = mask(sizeBits - 1);
+                        }
+                    } else {
+                        if (arg2Bits > arg1Bits) {
+                            resBits = 0;
+                        } else if (!findCarry(sizeBits, resBits,
+                                             arg1Bits, ~arg2Bits)) {
+                            resBits = mask(sizeBits);
+                        }
                     }
                 }
 
@@ -1013,7 +1017,7 @@ let {{
             int destBits = destSize * 8;
             assert(destBits <= 64);
             assert(destSize >= srcSize);
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize);
+            int items = numItems(destSize);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -1030,7 +1034,7 @@ let {{
                 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
                 uint64_t resBits;
 
-                if (ext & 0x2) {
+                if (signedOp()) {
                     int64_t arg1 = arg1Bits |
                         (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
                     int64_t arg2 = arg2Bits |
@@ -1043,7 +1047,7 @@ let {{
                 if (ext & 0x4)
                     resBits += (ULL(1) << (destBits - 1));
                 
-                if (ext & 0x8)
+                if (multHi())
                     resBits >>= destBits;
 
                 int destHiIndex = (i + 1) * destBits - 1;
@@ -1058,7 +1062,7 @@ let {{
             assert(srcSize == destSize);
             int size = srcSize;
             int sizeBits = size * 8;
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -1098,7 +1102,7 @@ let {{
             assert(srcSize == destSize);
             int size = srcSize;
             int sizeBits = size * 8;
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t shiftAmt = op2.uqw;
             uint64_t result = FpDestReg.uqw;
 
@@ -1125,7 +1129,7 @@ let {{
             assert(srcSize == destSize);
             int size = srcSize;
             int sizeBits = size * 8;
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t shiftAmt = op2.uqw;
             uint64_t result = FpDestReg.uqw;
 
@@ -1156,7 +1160,7 @@ let {{
             assert(srcSize == destSize);
             int size = srcSize;
             int sizeBits = size * 8;
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t shiftAmt = op2.uqw;
             uint64_t result = FpDestReg.uqw;
 
@@ -1201,15 +1205,15 @@ let {{
             int srcStart = 0;
             int destStart = 0;
             if (srcSize == 2 * destSize) {
-                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
+                items = numItems(srcSize);
                 if (ext & 0x2)
                     destStart = destSizeBits * items;
             } else if (destSize == 2 * srcSize) {
-                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+                items = numItems(destSize);
                 if (ext & 0x2)
                     srcStart = srcSizeBits * items;
             } else {
-                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+                items = numItems(destSize);
             }
             uint64_t result = FpDestReg.uqw;
 
@@ -1273,15 +1277,15 @@ let {{
             int srcStart = 0;
             int destStart = 0;
             if (srcSize == 2 * destSize) {
-                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
+                items = numItems(srcSize);
                 if (ext & 0x2)
                     destStart = destSizeBits * items;
             } else if (destSize == 2 * srcSize) {
-                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+                items = numItems(destSize);
                 if (ext & 0x2)
                     srcStart = srcSizeBits * items;
             } else {
-                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+                items = numItems(destSize);
             }
             uint64_t result = FpDestReg.uqw;
 
@@ -1334,15 +1338,15 @@ let {{
             int srcStart = 0;
             int destStart = 0;
             if (srcSize == 2 * destSize) {
-                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
+                items = numItems(srcSize);
                 if (ext & 0x2)
                     destStart = destSizeBits * items;
             } else if (destSize == 2 * srcSize) {
-                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+                items = numItems(destSize);
                 if (ext & 0x2)
                     srcStart = srcSizeBits * items;
             } else {
-                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+                items = numItems(destSize);
             }
             uint64_t result = FpDestReg.uqw;
 
@@ -1393,7 +1397,7 @@ let {{
             assert(srcSize == destSize);
             int size = srcSize;
             int sizeBits = size * 8;
-            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
@@ -1432,7 +1436,7 @@ let {{
             assert(srcSize == destSize);
             int size = srcSize;
             int sizeBits = size * 8;
-            int items = (ext & 0x8) ? 1: (sizeof(FloatRegBits) / size);
+            int items = numItems(size);
             uint64_t result = FpDestReg.uqw;
 
             for (int i = 0; i < items; i++) {
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index bfeec0870..b229ddd38 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -274,7 +274,7 @@ class BaseCPU : public MemObject
      */
     virtual BranchPred *getBranchPred() { return NULL; };
 
-    virtual Counter totalInstructions() const { return 0; }
+    virtual Counter totalInstructions() const = 0;
 
     // Function tracing
   private:
diff --git a/src/dev/x86/I82094AA.py b/src/dev/x86/I82094AA.py
index 5476becc6..d4ab2cb17 100644
--- a/src/dev/x86/I82094AA.py
+++ b/src/dev/x86/I82094AA.py
@@ -38,6 +38,8 @@ class I82094AA(BasicPioDevice):
     pio_latency = Param.Latency('1ns', "Programmed IO latency in simticks")
     pio_addr = Param.Addr("Device address")
     int_port = Port("Port for sending and receiving interrupt messages")
+    int_latency = Param.Latency('1ns', \
+            "Latency for an interrupt to propagate through this device.")
     external_int_pic = Param.I8259(NULL, "External PIC, if any")
 
     def pin(self, line):
diff --git a/src/dev/x86/i82094aa.cc b/src/dev/x86/i82094aa.cc
index 591fee6a4..65b3ee732 100644
--- a/src/dev/x86/i82094aa.cc
+++ b/src/dev/x86/i82094aa.cc
@@ -36,7 +36,8 @@
 #include "mem/packet_access.hh"
 #include "sim/system.hh"
 
-X86ISA::I82094AA::I82094AA(Params *p) : PioDevice(p), IntDev(this),
+X86ISA::I82094AA::I82094AA(Params *p) : PioDevice(p),
+    IntDev(this, p->int_latency),
     latency(p->pio_latency), pioAddr(p->pio_addr),
     extIntPic(p->external_int_pic), lowestPriorityOffset(0)
 {
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 429928c79..2397a17c5 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -266,7 +266,8 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
         return false;
     }
 
-    blk = tags->accessBlock(pkt->getAddr(), lat);
+    int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
+    blk = tags->accessBlock(pkt->getAddr(), lat, id);
 
     DPRINTF(Cache, "%s%s %x %s\n", pkt->cmdString(),
             pkt->req->isInstFetch() ? " (ifetch)" : "",
@@ -299,7 +300,8 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
                 incMissCount(pkt);
                 return false;
             }
-            tags->insertBlock(pkt->getAddr(), blk);
+            int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
+            tags->insertBlock(pkt->getAddr(), blk, id);
             blk->status = BlkValid | BlkReadable;
         }
         std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
@@ -976,7 +978,8 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
             tempBlock->tag = tags->extractTag(addr);
             DPRINTF(Cache, "using temp block for %x\n", addr);
         } else {
-            tags->insertBlock(addr, blk);
+            int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
+            tags->insertBlock(pkt->getAddr(), blk, id);
         }
     } else {
         // existing block... probably an upgrade
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
index 122e6e14b..808f9e25a 100644
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -154,7 +154,7 @@ FALRU::invalidateBlk(FALRU::BlkType *blk)
 }
 
 FALRUBlk*
-FALRU::accessBlock(Addr addr, int &lat, int *inCache)
+FALRU::accessBlock(Addr addr, int &lat, int context_src, int *inCache)
 {
     accesses++;
     int tmp_in_cache = 0;
@@ -228,7 +228,7 @@ FALRU::findVictim(Addr addr, PacketList &writebacks)
 }
 
 void
-FALRU::insertBlock(Addr addr, FALRU::BlkType *blk)
+FALRU::insertBlock(Addr addr, FALRU::BlkType *blk, int context_src)
 {
 }
 
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
index 4e6bccc1d..b20d25d2b 100644
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -182,7 +182,7 @@ public:
      * @param inCache The FALRUBlk::inCache flags.
      * @return Pointer to the cache block.
      */
-    FALRUBlk* accessBlock(Addr addr, int &lat, int *inCache = 0);
+    FALRUBlk* accessBlock(Addr addr, int &lat, int context_src, int *inCache = 0);
 
     /**
      * Find the block in the cache, do not update the replacement data.
@@ -200,7 +200,7 @@ public:
      */
     FALRUBlk* findVictim(Addr addr, PacketList & writebacks);
 
-    void insertBlock(Addr addr, BlkType *blk);
+    void insertBlock(Addr addr, BlkType *blk, int context_src);
 
     /**
      * Return the hit latency of this cache.
diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc
index b9ba5256b..a8ef4e6fb 100644
--- a/src/mem/cache/tags/iic.cc
+++ b/src/mem/cache/tags/iic.cc
@@ -219,7 +219,7 @@ IIC::regStats(const string &name)
 
 
 IICTag*
-IIC::accessBlock(Addr addr, int &lat)
+IIC::accessBlock(Addr addr, int &lat, int context_src)
 {
     Addr tag = extractTag(addr);
     unsigned set = hash(addr);
@@ -338,7 +338,7 @@ IIC::findVictim(Addr addr, PacketList &writebacks)
 }
 
 void
-IIC::insertBlock(Addr addr, BlkType* blk)
+IIC::insertBlock(Addr addr, BlkType* blk, int context_src)
 {
 }
 
diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh
index 994f7b8f7..c96cdaf3e 100644
--- a/src/mem/cache/tags/iic.hh
+++ b/src/mem/cache/tags/iic.hh
@@ -422,7 +422,7 @@ class IIC : public BaseTags
      * @param lat The access latency.
      * @return A pointer to the block found, if any.
      */
-    IICTag* accessBlock(Addr addr, int &lat);
+    IICTag* accessBlock(Addr addr, int &lat, int context_src);
 
     /**
      * Find the block, do not update the replacement data.
@@ -440,7 +440,7 @@ class IIC : public BaseTags
      */
     IICTag* findVictim(Addr addr, PacketList &writebacks);
 
-    void insertBlock(Addr addr, BlkType *blk);
+    void insertBlock(Addr addr, BlkType *blk, int context_src);
 
     /**
      * Called at end of simulation to complete average block reference stats.
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 9371f193a..81d82c231 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -150,7 +150,7 @@ LRU::~LRU()
 }
 
 LRUBlk*
-LRU::accessBlock(Addr addr, int &lat)
+LRU::accessBlock(Addr addr, int &lat, int context_src)
 {
     Addr tag = extractTag(addr);
     unsigned set = extractSet(addr);
@@ -200,7 +200,7 @@ LRU::findVictim(Addr addr, PacketList &writebacks)
 }
 
 void
-LRU::insertBlock(Addr addr, LRU::BlkType *blk)
+LRU::insertBlock(Addr addr, LRU::BlkType *blk, int context_src)
 {
     if (!blk->isTouched) {
         tagsInUse++;
diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh
index 2874d8f1f..ecd6e861f 100644
--- a/src/mem/cache/tags/lru.hh
+++ b/src/mem/cache/tags/lru.hh
@@ -172,7 +172,7 @@ public:
      * @param lat The access latency.
      * @return Pointer to the cache block if found.
      */
-    LRUBlk* accessBlock(Addr addr, int &lat);
+    LRUBlk* accessBlock(Addr addr, int &lat, int context_src);
 
     /**
      * Finds the given address in the cache, do not update replacement data.
@@ -197,7 +197,7 @@ public:
      * @param addr The address to update.
      * @param blk The block to update.
      */
-     void insertBlock(Addr addr, BlkType *blk);
+     void insertBlock(Addr addr, BlkType *blk, int context_src);
 
     /**
      * Generate the tag from the given address.
diff --git a/src/mem/page_table.cc b/src/mem/page_table.cc
index 4bc3a4434..bcaf5582a 100644
--- a/src/mem/page_table.cc
+++ b/src/mem/page_table.cc
@@ -222,6 +222,6 @@ PageTable::unserialize(Checkpoint *cp, const std::string &section)
         entry->unserialize(cp, csprintf("%s.Entry%d", process->name(), i));
         pTable[vaddr] = *entry;
         ++i;
-   }
+    }
 }
 
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index 121a6e447..081fbb4cb 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -540,12 +540,8 @@ PhysicalMemory::unserialize(Checkpoint *cp, const string &section)
     /* Only copy bytes that are non-zero, so we don't give the VM system hell */
     while (curSize < params()->range.size()) {
         bytesRead = gzread(compressedMem, tempPage, chunkSize);
-        if (bytesRead != chunkSize &&
-            bytesRead != params()->range.size() - curSize)
-            fatal("Read failed on physical memory checkpoint file '%s'"
-                  " got %d bytes, expected %d or %d bytes\n",
-                  filename, bytesRead, chunkSize,
-                  params()->range.size() - curSize);
+        if (bytesRead == 0)
+            break;
 
         assert(bytesRead % sizeof(long) == 0);
 
diff --git a/src/sim/faults.cc b/src/sim/faults.cc
index 0fe853785..6149a8335 100644
--- a/src/sim/faults.cc
+++ b/src/sim/faults.cc
@@ -40,7 +40,7 @@
 #if !FULL_SYSTEM
 void FaultBase::invoke(ThreadContext * tc)
 {
-    fatal("fault (%s) detected @ PC %p", name(), tc->readPC());
+    panic("fault (%s) detected @ PC %p", name(), tc->readPC());
 }
 #else
 void FaultBase::invoke(ThreadContext * tc)
@@ -54,7 +54,7 @@ void FaultBase::invoke(ThreadContext * tc)
 
 void UnimpFault::invoke(ThreadContext * tc)
 {
-    fatal("Unimpfault: %s\n", panicStr.c_str());
+    panic("Unimpfault: %s\n", panicStr.c_str());
 }
 
 #if !FULL_SYSTEM
diff --git a/src/sim/process.cc b/src/sim/process.cc
index 343d2ad5a..957c3cc3e 100644
--- a/src/sim/process.cc
+++ b/src/sim/process.cc
@@ -507,6 +507,7 @@ Process::serialize(std::ostream &os)
         nameOut(os, csprintf("%s.FdMap%d", name(), x));
         fd_map[x].serialize(os);
     }
+    SERIALIZE_SCALAR(M5_pid);
 
 }
 
@@ -528,6 +529,11 @@ Process::unserialize(Checkpoint *cp, const std::string &section)
         fd_map[x].unserialize(cp, csprintf("%s.FdMap%d", section, x));
      }
     fix_file_offsets();
+    UNSERIALIZE_OPT_SCALAR(M5_pid);
+    // The above returns a bool so that you could do something if you don't
+    // find the param in the checkpoint if you wanted to, like set a default
+    // but in this case we'll just stick with the instantianted value if not
+    // found.   
 
     checkpointRestored = true;
 
diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc
index 5ae9128e5..0e6d9b254 100644
--- a/src/sim/serialize.cc
+++ b/src/sim/serialize.cc
@@ -204,6 +204,18 @@ paramIn(Checkpoint *cp, const string &section, const string &name, T &param)
     }
 }
 
+template <class T>
+bool
+optParamIn(Checkpoint *cp, const string &section, const string &name, T &param)
+{
+    string str;
+    if (!cp->find(section, name, str) || !parseParam(str, param)) {
+        warn("optional parameter %s:%s not present\n", section, name);
+        return false;
+    } else {
+        return true;
+    }
+}
 
 template <class T>
 void
@@ -322,6 +334,9 @@ paramOut(ostream &os, const string &name, type const &param);           \
 template void                                                           \
 paramIn(Checkpoint *cp, const string &section,                          \
         const string &name, type & param);                              \
+template bool                                                           \
+optParamIn(Checkpoint *cp, const string &section,                       \
+        const string &name, type & param);                              \
 template void                                                           \
 arrayParamOut(ostream &os, const string &name,                          \
               type const *param, unsigned size);                        \
@@ -422,7 +437,7 @@ Serializable::serializeAll(const string &cpt_dir)
     time_t t = time(NULL);
     if (!outstream.is_open())
         fatal("Unable to open file %s for writing\n", cpt_file.c_str());
-    outstream << "// checkpoint generated: " << ctime(&t);
+    outstream << "## checkpoint generated: " << ctime(&t);
 
     globals.serialize(outstream);
     SimObject::serializeAll(outstream);
diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh
index 08240c0c0..cf1a672be 100644
--- a/src/sim/serialize.hh
+++ b/src/sim/serialize.hh
@@ -58,6 +58,10 @@ void paramIn(Checkpoint *cp, const std::string &section,
              const std::string &name, T &param);
 
 template <class T>
+bool optParamIn(Checkpoint *cp, const std::string &section,
+             const std::string &name, T &param);
+
+template <class T>
 void arrayParamOut(std::ostream &os, const std::string &name,
                    const T *param, unsigned size);
 
@@ -85,6 +89,7 @@ objParamIn(Checkpoint *cp, const std::string &section,
 #define SERIALIZE_SCALAR(scalar)        paramOut(os, #scalar, scalar)
 
 #define UNSERIALIZE_SCALAR(scalar)      paramIn(cp, section, #scalar, scalar)
+#define UNSERIALIZE_OPT_SCALAR(scalar)      optParamIn(cp, section, #scalar, scalar)
 
 // ENUMs are like SCALARs, but we cast them to ints on the way out
 #define SERIALIZE_ENUM(scalar)          paramOut(os, #scalar, (int)scalar)
diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini
index cf8b99da8..78a86bf82 100644
--- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini
+++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini
@@ -132,7 +132,6 @@ hash_delay=1
 latency=1000
 max_miss_count=0
 mshrs=10
-prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=10000
@@ -167,7 +166,6 @@ hash_delay=1
 latency=1000
 max_miss_count=0
 mshrs=10
-prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=10000
@@ -202,7 +200,6 @@ hash_delay=1
 latency=10000
 max_miss_count=0
 mshrs=10
-prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=100000
@@ -244,7 +241,7 @@ egid=100
 env=
 errout=cerr
 euid=100
-executable=tests/test-progs/hello/bin/mips/linux/hello
+executable=/dist/m5/regression/test-progs/hello/bin/mips/linux/hello
 gid=100
 input=cin
 max_stack_size=67108864
diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout
index f04692a1f..581c531f6 100755
--- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout
+++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout
@@ -5,13 +5,13 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Sep 24 2009 12:19:09
-M5 revision 9bc3e4611009+ 6661+ default tip
-M5 started Sep 24 2009 12:19:46
-M5 executing on zooks
-command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing
+M5 compiled Jan  2 2010 07:01:31
+M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch
+M5 started Jan  2 2010 07:03:09
+M5 executing on fajita
+command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing
 Global frequency set at 1000000000000 ticks per second
 info: Entering event queue @ 0.  Starting simulation...
 info: Increasing stack size by one page.
 Hello World!
-Exiting @ tick 29521500 because target called exit()
+Exiting @ tick 29940500 because target called exit()
diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt
index a47f185bc..d55c721ca 100644
--- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt
+++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt
@@ -1,99 +1,99 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  29581                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 155804                       # Number of bytes of host memory used
-host_seconds                                     0.19                       # Real time elapsed on the host
-host_tick_rate                              153369596                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  10400                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 205896                       # Number of bytes of host memory used
+host_seconds                                     0.56                       # Real time elapsed on the host
+host_tick_rate                               53415864                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                        5685                       # Number of instructions simulated
+sim_insts                                        5827                       # Number of instructions simulated
 sim_seconds                                  0.000030                       # Number of seconds simulated
-sim_ticks                                    29521500                       # Number of ticks simulated
-system.cpu.AGEN-Unit.instReqsProcessed           2058                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Branch-Predictor.instReqsProcessed         5686                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Branch-Predictor.predictedNotTaken          789                       # Number of Branches Predicted As Not Taken (False).
-system.cpu.Branch-Predictor.predictedTaken           96                       # Number of Branches Predicted As Taken (True).
-system.cpu.Decode-Unit.instReqsProcessed         5686                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Execution-Unit.instReqsProcessed         3624                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Execution-Unit.predictedNotTakenIncorrect          516                       # Number of Branches Incorrectly Predicted As Not Taken).
-system.cpu.Execution-Unit.predictedTakenIncorrect           34                       # Number of Branches Incorrectly Predicted As Taken.
+sim_ticks                                    29940500                       # Number of ticks simulated
+system.cpu.AGEN-Unit.instReqsProcessed           2090                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Branch-Predictor.instReqsProcessed         5828                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Branch-Predictor.predictedNotTaken          826                       # Number of Branches Predicted As Not Taken (False).
+system.cpu.Branch-Predictor.predictedTaken           90                       # Number of Branches Predicted As Taken (True).
+system.cpu.Decode-Unit.instReqsProcessed         5828                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Execution-Unit.instReqsProcessed         3734                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Execution-Unit.predictedNotTakenIncorrect          541                       # Number of Branches Incorrectly Predicted As Not Taken).
+system.cpu.Execution-Unit.predictedTakenIncorrect           35                       # Number of Branches Incorrectly Predicted As Taken.
 system.cpu.Fetch-Buffer-T0.instReqsProcessed            0                       # Number of Instructions Requests that completed in this resource.
 system.cpu.Fetch-Buffer-T0.instsBypassed            0                       # Number of Instructions Bypassed.
 system.cpu.Fetch-Buffer-T1.instReqsProcessed            0                       # Number of Instructions Requests that completed in this resource.
 system.cpu.Fetch-Buffer-T1.instsBypassed            0                       # Number of Instructions Bypassed.
-system.cpu.Fetch-Seq-Unit.instReqsProcessed        11373                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Graduation-Unit.instReqsProcessed         5685                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Fetch-Seq-Unit.instReqsProcessed        11657                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Graduation-Unit.instReqsProcessed         5827                       # Number of Instructions Requests that completed in this resource.
 system.cpu.Mult-Div-Unit.divInstReqsProcessed            1                       # Number of Divide Requests Processed.
 system.cpu.Mult-Div-Unit.instReqsProcessed            8                       # Number of Instructions Requests that completed in this resource.
 system.cpu.Mult-Div-Unit.multInstReqsProcessed            3                       # Number of Multiply Requests Processed.
-system.cpu.RegFile-Manager.instReqsProcessed        10479                       # Number of Instructions Requests that completed in this resource.
-system.cpu.committedInsts                        5685                       # Number of Instructions Simulated (Per-Thread)
-system.cpu.committedInsts_total                  5685                       # Number of Instructions Simulated (Total)
-system.cpu.cpi                              10.385928                       # CPI: Cycles Per Instruction (Per-Thread)
-system.cpu.cpi_total                        10.385928                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               1134                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 56207.317073                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53207.317073                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   1052                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        4609000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.072310                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                   82                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_miss_latency      4363000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.072310                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses              82                       # number of ReadReq MSHR misses
-system.cpu.dcache.WriteReq_accesses               924                       # number of WriteReq accesses(hits+misses)
+system.cpu.RegFile-Manager.instReqsProcessed        10713                       # Number of Instructions Requests that completed in this resource.
+system.cpu.committedInsts                        5827                       # Number of Instructions Simulated (Per-Thread)
+system.cpu.committedInsts_total                  5827                       # Number of Instructions Simulated (Total)
+system.cpu.cpi                              10.276643                       # CPI: Cycles Per Instruction (Per-Thread)
+system.cpu.cpi_total                        10.276643                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               1165                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency 56201.149425                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53201.149425                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   1078                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency        4889500                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.074678                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                   87                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_miss_latency      4628500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.074678                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses              87                       # number of ReadReq MSHR misses
+system.cpu.dcache.WriteReq_accesses               925                       # number of WriteReq accesses(hits+misses)
 system.cpu.dcache.WriteReq_avg_miss_latency 56554.687500                       # average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53554.687500                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                   860                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_hits                   861                       # number of WriteReq hits
 system.cpu.dcache.WriteReq_miss_latency       3619500                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.069264                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_miss_rate         0.069189                       # miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_misses                  64                       # number of WriteReq misses
 system.cpu.dcache.WriteReq_mshr_miss_latency      3427500                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.069264                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.069189                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             64                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  14.590909                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  14.144928                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked::no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.dcache.blocked::no_targets               0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                2058                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 56359.589041                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 53359.589041                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1912                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         8228500                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.070943                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   146                       # number of demand (read+write) misses
+system.cpu.dcache.demand_accesses                2090                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency 56350.993377                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 53350.993377                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                    1939                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         8509000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.072249                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   151                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      7790500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.070943                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              146                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_miss_latency      8056000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.072249                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses              151                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               2058                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 56359.589041                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 53359.589041                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses               2090                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency 56350.993377                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 53350.993377                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1912                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        8228500                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.070943                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  146                       # number of overall misses
+system.cpu.dcache.overall_hits                   1939                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        8509000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.072249                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  151                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      7790500                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.070943                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             146                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_miss_latency      8056000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.072249                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses             151                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.replacements                      0                       # number of replacements
-system.cpu.dcache.sampled_refs                    132                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                    138                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 84.209307                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     1926                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                 88.212490                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     1952                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.dcache_port.instReqsProcessed         2057                       # Number of Instructions Requests that completed in this resource.
+system.cpu.dcache_port.instReqsProcessed         2089                       # Number of Instructions Requests that completed in this resource.
 system.cpu.dtb.accesses                             0                       # DTB accesses
 system.cpu.dtb.hits                                 0                       # DTB hits
 system.cpu.dtb.misses                               0                       # DTB misses
@@ -103,62 +103,62 @@ system.cpu.dtb.read_misses                          0                       # DT
 system.cpu.dtb.write_accesses                       0                       # DTB write accesses
 system.cpu.dtb.write_hits                           0                       # DTB write hits
 system.cpu.dtb.write_misses                         0                       # DTB write misses
-system.cpu.icache.ReadReq_accesses               5687                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 55773.026316                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 52773.026316                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   5383                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency       16955000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.053455                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  304                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_miss_latency     16043000                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.053455                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             304                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_accesses               5829                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 55765.676568                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 52765.676568                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   5526                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency       16897000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.051981                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  303                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_miss_latency     15988000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.051981                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses             303                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  17.707237                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                  18.237624                       # Average number of references to valid blocks.
 system.cpu.icache.blocked::no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.icache.blocked::no_targets               0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                5687                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 55773.026316                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 52773.026316                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    5383                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency        16955000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.053455                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   304                       # number of demand (read+write) misses
+system.cpu.icache.demand_accesses                5829                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 55765.676568                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 52765.676568                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                    5526                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency        16897000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.051981                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   303                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency     16043000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.053455                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              304                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_miss_latency     15988000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.051981                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses              303                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               5687                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 55773.026316                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 52773.026316                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses               5829                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 55765.676568                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 52765.676568                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   5383                       # number of overall hits
-system.cpu.icache.overall_miss_latency       16955000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.053455                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  304                       # number of overall misses
+system.cpu.icache.overall_hits                   5526                       # number of overall hits
+system.cpu.icache.overall_miss_latency       16897000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.051981                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  303                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency     16043000                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.053455                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             304                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_miss_latency     15988000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.051981                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses             303                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.icache.replacements                     13                       # number of replacements
-system.cpu.icache.sampled_refs                    304                       # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs                    303                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                136.385131                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     5383                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                134.267603                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     5526                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.icache_port.instReqsProcessed         5686                       # Number of Instructions Requests that completed in this resource.
-system.cpu.ipc                               0.096284                       # IPC: Instructions Per Cycle (Per-Thread)
-system.cpu.ipc_total                         0.096284                       # IPC: Total IPC of All Threads
+system.cpu.icache_port.instReqsProcessed         5828                       # Number of Instructions Requests that completed in this resource.
+system.cpu.ipc                               0.097308                       # IPC: Instructions Per Cycle (Per-Thread)
+system.cpu.ipc_total                         0.097308                       # IPC: Total IPC of All Threads
 system.cpu.itb.accesses                             0                       # DTB accesses
 system.cpu.itb.hits                                 0                       # DTB hits
 system.cpu.itb.misses                               0                       # DTB misses
@@ -168,83 +168,83 @@ system.cpu.itb.read_misses                          0                       # DT
 system.cpu.itb.write_accesses                       0                       # DTB write accesses
 system.cpu.itb.write_hits                           0                       # DTB write hits
 system.cpu.itb.write_misses                         0                       # DTB write misses
-system.cpu.l2cache.ReadExReq_accesses              50                       # number of ReadExReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_accesses              51                       # number of ReadExReq accesses(hits+misses)
 system.cpu.l2cache.ReadExReq_avg_miss_latency        52500                       # average ReadExReq miss latency
-system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency        40080                       # average ReadExReq mshr miss latency
-system.cpu.l2cache.ReadExReq_miss_latency      2625000                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40098.039216                       # average ReadExReq mshr miss latency
+system.cpu.l2cache.ReadExReq_miss_latency      2677500                       # number of ReadExReq miss cycles
 system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
-system.cpu.l2cache.ReadExReq_misses                50                       # number of ReadExReq misses
-system.cpu.l2cache.ReadExReq_mshr_miss_latency      2004000                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_misses                51                       # number of ReadExReq misses
+system.cpu.l2cache.ReadExReq_mshr_miss_latency      2045000                       # number of ReadExReq MSHR miss cycles
 system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
-system.cpu.l2cache.ReadExReq_mshr_misses           50                       # number of ReadExReq MSHR misses
-system.cpu.l2cache.ReadReq_accesses               386                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency 52052.083333                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40026.041667                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadExReq_mshr_misses           51                       # number of ReadExReq MSHR misses
+system.cpu.l2cache.ReadReq_accesses               390                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency 52052.835052                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40023.195876                       # average ReadReq mshr miss latency
 system.cpu.l2cache.ReadReq_hits                     2                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency      19988000                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate         0.994819                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 384                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency     15370000                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate     0.994819                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            384                       # number of ReadReq MSHR misses
-system.cpu.l2cache.UpgradeReq_accesses             14                       # number of UpgradeReq accesses(hits+misses)
-system.cpu.l2cache.UpgradeReq_avg_miss_latency 52535.714286                       # average UpgradeReq miss latency
-system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40071.428571                       # average UpgradeReq mshr miss latency
-system.cpu.l2cache.UpgradeReq_miss_latency       735500                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency      20196500                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.994872                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses                 388                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency     15529000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.994872                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses            388                       # number of ReadReq MSHR misses
+system.cpu.l2cache.UpgradeReq_accesses             13                       # number of UpgradeReq accesses(hits+misses)
+system.cpu.l2cache.UpgradeReq_avg_miss_latency 52538.461538                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40076.923077                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency       683000                       # number of UpgradeReq miss cycles
 system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
-system.cpu.l2cache.UpgradeReq_misses               14                       # number of UpgradeReq misses
-system.cpu.l2cache.UpgradeReq_mshr_miss_latency       561000                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_misses               13                       # number of UpgradeReq misses
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency       521000                       # number of UpgradeReq MSHR miss cycles
 system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
-system.cpu.l2cache.UpgradeReq_mshr_misses           14                       # number of UpgradeReq MSHR misses
+system.cpu.l2cache.UpgradeReq_mshr_misses           13                       # number of UpgradeReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                  0.005405                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  0.005333                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked::no_mshrs                0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked::no_targets              0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                436                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency 52103.686636                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency 40032.258065                       # average overall mshr miss latency
+system.cpu.l2cache.demand_accesses                441                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency 52104.783599                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency 40031.890661                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      2                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency       22613000                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_rate          0.995413                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses                  434                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_miss_latency       22874000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate          0.995465                       # miss rate for demand accesses
+system.cpu.l2cache.demand_misses                  439                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency     17374000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_rate     0.995413                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses             434                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_miss_latency     17574000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_rate     0.995465                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_misses             439                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               436                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency 52103.686636                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency 40032.258065                       # average overall mshr miss latency
+system.cpu.l2cache.overall_accesses               441                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency 52104.783599                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency 40031.890661                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     2                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency      22613000                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_rate         0.995413                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses                 434                       # number of overall misses
+system.cpu.l2cache.overall_miss_latency      22874000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate         0.995465                       # miss rate for overall accesses
+system.cpu.l2cache.overall_misses                 439                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency     17374000                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_rate     0.995413                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses            434                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_miss_latency     17574000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_rate     0.995465                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_misses            439                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   370                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   375                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               183.672228                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               185.807591                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       2                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                            59044                       # number of cpu cycles simulated
+system.cpu.numCycles                            59882                       # number of cpu cycles simulated
 system.cpu.smtCommittedInsts                        0                       # Number of SMT Instructions Simulated (Per-Thread)
 system.cpu.smtCycles                                0                       # Total number of cycles that the CPU was simultaneous multithreading.(SMT)
 system.cpu.smt_cpi                           no_value                       # CPI: Total SMT-CPI
 system.cpu.smt_ipc                           no_value                       # IPC: Total SMT-IPC
-system.cpu.threadCycles                         59044                       # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
-system.cpu.workload.PROG:num_syscalls              13                       # Number of system calls
+system.cpu.threadCycles                         59882                       # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
+system.cpu.workload.PROG:num_syscalls               8                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/mips/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/o3-timing/config.ini
index b3bdddcfe..962f6ed05 100644
--- a/tests/quick/00.hello/ref/mips/linux/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/mips/linux/o3-timing/config.ini
@@ -163,7 +163,6 @@ hash_delay=1
 latency=1000
 max_miss_count=0
 mshrs=10
-prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=10000
@@ -335,7 +334,6 @@ hash_delay=1
 latency=1000
 max_miss_count=0
 mshrs=10
-prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=10000
@@ -370,7 +368,6 @@ hash_delay=1
 latency=1000
 max_miss_count=0
 mshrs=10
-prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=10000
@@ -412,7 +409,7 @@ egid=100
 env=
 errout=cerr
 euid=100
-executable=tests/test-progs/hello/bin/mips/linux/hello
+executable=/dist/m5/regression/test-progs/hello/bin/mips/linux/hello
 gid=100
 input=cin
 max_stack_size=67108864
diff --git a/tests/quick/00.hello/ref/mips/linux/o3-timing/simout b/tests/quick/00.hello/ref/mips/linux/o3-timing/simout
index 9562c954f..74dedc1d0 100755
--- a/tests/quick/00.hello/ref/mips/linux/o3-timing/simout
+++ b/tests/quick/00.hello/ref/mips/linux/o3-timing/simout
@@ -5,13 +5,13 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Sep 24 2009 12:19:09
-M5 revision 9bc3e4611009+ 6661+ default tip
-M5 started Sep 24 2009 12:19:46
-M5 executing on zooks
-command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/o3-timing -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/o3-timing
+M5 compiled Jan  2 2010 07:01:31
+M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch
+M5 started Jan  2 2010 07:03:10
+M5 executing on fajita
+command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/o3-timing -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
 info: Entering event queue @ 0.  Starting simulation...
 info: Increasing stack size by one page.
 Hello World!
-Exiting @ tick 13914500 because target called exit()
+Exiting @ tick 14060500 because target called exit()
diff --git a/tests/quick/00.hello/ref/mips/linux/o3-timing/stats.txt b/tests/quick/00.hello/ref/mips/linux/o3-timing/stats.txt
index bdce7b5d3..85a5a75dd 100644
--- a/tests/quick/00.hello/ref/mips/linux/o3-timing/stats.txt
+++ b/tests/quick/00.hello/ref/mips/linux/o3-timing/stats.txt
@@ -1,127 +1,127 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  59567                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 155776                       # Number of bytes of host memory used
-host_seconds                                     0.09                       # Real time elapsed on the host
-host_tick_rate                              163592222                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  48407                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 206048                       # Number of bytes of host memory used
+host_seconds                                     0.11                       # Real time elapsed on the host
+host_tick_rate                              131379529                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                        5049                       # Number of instructions simulated
+sim_insts                                        5169                       # Number of instructions simulated
 sim_seconds                                  0.000014                       # Number of seconds simulated
-sim_ticks                                    13914500                       # Number of ticks simulated
+sim_ticks                                    14060500                       # Number of ticks simulated
 system.cpu.BPredUnit.BTBCorrect                     0                       # Number of correct BTB predictions (this stat may not work properly.
-system.cpu.BPredUnit.BTBHits                      552                       # Number of BTB hits
-system.cpu.BPredUnit.BTBLookups                  1939                       # Number of BTB lookups
-system.cpu.BPredUnit.RASInCorrect                  53                       # Number of incorrect RAS predictions.
-system.cpu.BPredUnit.condIncorrect                722                       # Number of conditional branches incorrect
-system.cpu.BPredUnit.condPredicted               1555                       # Number of conditional branches predicted
-system.cpu.BPredUnit.lookups                     2357                       # Number of BP lookups
-system.cpu.BPredUnit.usedRAS                      387                       # Number of times the RAS was used to get a target.
-system.cpu.commit.COM:branches                    885                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events                63                       # number cycles where commit BW limit reached
+system.cpu.BPredUnit.BTBHits                      572                       # Number of BTB hits
+system.cpu.BPredUnit.BTBLookups                  1960                       # Number of BTB lookups
+system.cpu.BPredUnit.RASInCorrect                  66                       # Number of incorrect RAS predictions.
+system.cpu.BPredUnit.condIncorrect                751                       # Number of conditional branches incorrect
+system.cpu.BPredUnit.condPredicted               1593                       # Number of conditional branches predicted
+system.cpu.BPredUnit.lookups                     2416                       # Number of BP lookups
+system.cpu.BPredUnit.usedRAS                      404                       # Number of times the RAS was used to get a target.
+system.cpu.commit.COM:branches                    916                       # Number of branches committed
+system.cpu.commit.COM:bw_lim_events                65                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
-system.cpu.commit.COM:committed_per_cycle::samples        14230                       # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::mean     0.399438                       # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::stdev     1.125719                       # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::samples        14561                       # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::mean     0.400110                       # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::stdev     1.121131                       # Number of insts commited each cycle
 system.cpu.commit.COM:committed_per_cycle::underflows            0      0.00%      0.00% # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::0-1        11753     82.59%     82.59% # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::1-2         1168      8.21%     90.80% # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::2-3          499      3.51%     94.31% # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::3-4          284      2.00%     96.30% # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::4-5          291      2.04%     98.35% # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::5-6           72      0.51%     98.85% # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::6-7           62      0.44%     99.29% # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::7-8           38      0.27%     99.56% # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::8           63      0.44%    100.00% # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::0-1        11999     82.41%     82.41% # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::1-2         1213      8.33%     90.74% # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::2-3          529      3.63%     94.37% # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::3-4          291      2.00%     96.37% # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::4-5          294      2.02%     98.39% # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::5-6           71      0.49%     98.87% # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::6-7           62      0.43%     99.30% # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::7-8           37      0.25%     99.55% # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle::8           65      0.45%    100.00% # Number of insts commited each cycle
 system.cpu.commit.COM:committed_per_cycle::overflows            0      0.00%    100.00% # Number of insts commited each cycle
 system.cpu.commit.COM:committed_per_cycle::min_value            0                       # Number of insts commited each cycle
 system.cpu.commit.COM:committed_per_cycle::max_value            8                       # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle::total        14230                       # Number of insts commited each cycle
-system.cpu.commit.COM:count                      5684                       # Number of instructions committed
-system.cpu.commit.COM:loads                      1133                       # Number of loads committed
+system.cpu.commit.COM:committed_per_cycle::total        14561                       # Number of insts commited each cycle
+system.cpu.commit.COM:count                      5826                       # Number of instructions committed
+system.cpu.commit.COM:loads                      1164                       # Number of loads committed
 system.cpu.commit.COM:membars                       0                       # Number of memory barriers committed
-system.cpu.commit.COM:refs                       2057                       # Number of memory references committed
+system.cpu.commit.COM:refs                       2089                       # Number of memory references committed
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               605                       # The number of times a branch was mispredicted
-system.cpu.commit.commitCommittedInsts           5684                       # The number of committed instructions
-system.cpu.commit.commitNonSpecStalls              15                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts            5973                       # The number of squashed insts skipped by commit
-system.cpu.committedInsts                        5049                       # Number of Instructions Simulated
-system.cpu.committedInsts_total                  5049                       # Number of Instructions Simulated
-system.cpu.cpi                               5.511983                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         5.511983                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               2297                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 34007.812500                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 36022.988506                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   2169                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        4353000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.055725                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                  128                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                41                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency      3134000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.037875                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses              87                       # number of ReadReq MSHR misses
-system.cpu.dcache.WriteReq_accesses               924                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency 27701.724138                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency 36093.750000                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                   634                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       8033500                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.313853                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                 290                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_hits              226                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency      2310000                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.069264                       # mshr miss rate for WriteReq accesses
+system.cpu.commit.branchMispredicts               620                       # The number of times a branch was mispredicted
+system.cpu.commit.commitCommittedInsts           5826                       # The number of committed instructions
+system.cpu.commit.commitNonSpecStalls              10                       # The number of times commit has been forced to stall to communicate backwards
+system.cpu.commit.commitSquashedInsts            6017                       # The number of squashed insts skipped by commit
+system.cpu.committedInsts                        5169                       # Number of Instructions Simulated
+system.cpu.committedInsts_total                  5169                       # Number of Instructions Simulated
+system.cpu.cpi                               5.440511                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                         5.440511                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               2321                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency 34074.626866                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 36043.956044                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   2187                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency        4566000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.057734                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                  134                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits                43                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency      3280000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.039207                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses              91                       # number of ReadReq MSHR misses
+system.cpu.dcache.WriteReq_accesses               925                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_avg_miss_latency 27570.707071                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 36046.875000                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits                   628                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency       8188500                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.321081                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                 297                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_hits              233                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_miss_latency      2307000                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.069189                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             64                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  20.889706                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  20.226950                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked::no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.dcache.blocked::no_targets               0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                3221                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 29632.775120                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 36052.980132                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    2803                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency        12386500                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.129773                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   418                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                267                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      5444000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.046880                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              151                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_accesses                3246                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency 29592.807425                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 36045.161290                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                    2815                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency        12754500                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.132779                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   431                       # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits                276                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency      5587000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.047751                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses              155                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               3221                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 29632.775120                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 36052.980132                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses               3246                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency 29592.807425                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 36045.161290                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   2803                       # number of overall hits
-system.cpu.dcache.overall_miss_latency       12386500                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.129773                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  418                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits               267                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      5444000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.046880                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             151                       # number of overall MSHR misses
+system.cpu.dcache.overall_hits                   2815                       # number of overall hits
+system.cpu.dcache.overall_miss_latency       12754500                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.132779                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  431                       # number of overall misses
+system.cpu.dcache.overall_mshr_hits               276                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency      5587000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.047751                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses             155                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.replacements                      0                       # number of replacements
-system.cpu.dcache.sampled_refs                    136                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                    141                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 87.690614                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     2841                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                 91.308954                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     2852                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles            479                       # Number of cycles decode is blocked
-system.cpu.decode.DECODE:BranchMispred            128                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           128                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           14211                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles              9912                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               3839                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles            1056                       # Number of cycles decode is squashing
-system.cpu.decode.DECODE:SquashedInsts            251                       # Number of squashed instructions handled by decode
+system.cpu.decode.DECODE:BlockedCycles            519                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BranchMispred            139                       # Number of times decode detected a branch misprediction
+system.cpu.decode.DECODE:BranchResolved           139                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts           14436                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles             10077                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles               3965                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles            1080                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:SquashedInsts            267                       # Number of squashed instructions handled by decode
 system.cpu.dtb.accesses                             0                       # DTB accesses
 system.cpu.dtb.hits                                 0                       # DTB hits
 system.cpu.dtb.misses                               0                       # DTB misses
@@ -131,116 +131,116 @@ system.cpu.dtb.read_misses                          0                       # DT
 system.cpu.dtb.write_accesses                       0                       # DTB write accesses
 system.cpu.dtb.write_hits                           0                       # DTB write hits
 system.cpu.dtb.write_misses                         0                       # DTB write misses
-system.cpu.fetch.Branches                        2357                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      2171                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          6187                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   360                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          15337                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                     738                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.084693                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               2171                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches                939                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        0.551096                       # Number of inst fetches per cycle
-system.cpu.fetch.rateDist::samples              15286                       # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::mean              1.003336                       # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::stdev             2.263199                       # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.Branches                        2416                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                      2220                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                          6371                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   355                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                          15622                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                     767                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.085911                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles               2220                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches                976                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        0.555508                       # Number of inst fetches per cycle
+system.cpu.fetch.rateDist::samples              15641                       # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist::mean              0.998785                       # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist::stdev             2.252974                       # Number of instructions fetched each cycle (Total)
 system.cpu.fetch.rateDist::underflows               0      0.00%      0.00% # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::0-1                  11277     73.77%     73.77% # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::1-2                   1770     11.58%     85.35% # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::2-3                    198      1.30%     86.65% # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::3-4                    138      0.90%     87.55% # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::4-5                    316      2.07%     89.62% # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::5-6                    114      0.75%     90.36% # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::6-7                    306      2.00%     92.37% # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::7-8                    249      1.63%     93.99% # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::8                      918      6.01%    100.00% # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist::0-1                  11507     73.57%     73.57% # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist::1-2                   1847     11.81%     85.38% # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist::2-3                    223      1.43%     86.80% # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist::3-4                    141      0.90%     87.71% # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist::4-5                    312      1.99%     89.70% # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist::5-6                    120      0.77%     90.47% # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist::6-7                    308      1.97%     92.44% # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist::7-8                    254      1.62%     94.06% # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist::8                      929      5.94%    100.00% # Number of instructions fetched each cycle (Total)
 system.cpu.fetch.rateDist::overflows                0      0.00%    100.00% # Number of instructions fetched each cycle (Total)
 system.cpu.fetch.rateDist::min_value                0                       # Number of instructions fetched each cycle (Total)
 system.cpu.fetch.rateDist::max_value                8                       # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist::total                15286                       # Number of instructions fetched each cycle (Total)
-system.cpu.icache.ReadReq_accesses               2171                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 35436.489607                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 34915.151515                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   1738                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency       15344000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.199447                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  433                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits               103                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency     11522000                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.152004                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             330                       # number of ReadReq MSHR misses
+system.cpu.fetch.rateDist::total                15641                       # Number of instructions fetched each cycle (Total)
+system.cpu.icache.ReadReq_accesses               2220                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 35681.279621                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 34902.735562                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   1798                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency       15057500                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.190090                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  422                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits                93                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency     11483000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.148198                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses             329                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                   5.266667                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                   5.465046                       # Average number of references to valid blocks.
 system.cpu.icache.blocked::no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.icache.blocked::no_targets               0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                2171                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 35436.489607                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 34915.151515                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    1738                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency        15344000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.199447                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   433                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                103                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency     11522000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.152004                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              330                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_accesses                2220                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 35681.279621                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 34902.735562                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                    1798                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency        15057500                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.190090                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   422                       # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits                 93                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency     11483000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.148198                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses              329                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               2171                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 35436.489607                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 34915.151515                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses               2220                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 35681.279621                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 34902.735562                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   1738                       # number of overall hits
-system.cpu.icache.overall_miss_latency       15344000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.199447                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  433                       # number of overall misses
-system.cpu.icache.overall_mshr_hits               103                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency     11522000                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.152004                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             330                       # number of overall MSHR misses
+system.cpu.icache.overall_hits                   1798                       # number of overall hits
+system.cpu.icache.overall_miss_latency       15057500                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.190090                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  422                       # number of overall misses
+system.cpu.icache.overall_mshr_hits                93                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency     11483000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.148198                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses             329                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.icache.replacements                     16                       # number of replacements
-system.cpu.icache.sampled_refs                    330                       # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs                    329                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                159.086288                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     1738                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                156.015053                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     1798                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                           12544                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     1216                       # Number of branches executed
-system.cpu.iew.EXEC:nop                          1820                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.292239                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         3432                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                       1048                       # Number of stores executed
+system.cpu.idleCycles                           12481                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                     1253                       # Number of branches executed
+system.cpu.iew.EXEC:nop                          1830                       # number of nop insts executed
+system.cpu.iew.EXEC:rate                     0.295249                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         3456                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                       1049                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                      4040                       # num instructions consuming a value
-system.cpu.iew.WB:count                          7355                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.694802                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                      4132                       # num instructions consuming a value
+system.cpu.iew.WB:count                          7536                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.703291                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      2807                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.264283                       # insts written-back per cycle
-system.cpu.iew.WB:sent                           7444                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  663                       # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles                       8                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  2795                       # Number of dispatched load instructions
-system.cpu.iew.iewDispNonSpecInsts                 15                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts               968                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 1158                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts               11660                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  2384                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               531                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                  8133                       # Number of executed instructions
+system.cpu.iew.WB:producers                      2906                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.267975                       # insts written-back per cycle
+system.cpu.iew.WB:sent                           7618                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                  681                       # Number of branch mispredicts detected at execute
+system.cpu.iew.iewBlockCycles                       0                       # Number of cycles IEW is blocking
+system.cpu.iew.iewDispLoadInsts                  2806                       # Number of dispatched load instructions
+system.cpu.iew.iewDispNonSpecInsts                 12                       # Number of dispatched non-speculative instructions
+system.cpu.iew.iewDispSquashedInsts               963                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                 1159                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts               11847                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                  2407                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               549                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                  8303                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
-system.cpu.iew.iewLSQFullEvents                     1                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                   1056                       # Number of cycles IEW is squashing
+system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
+system.cpu.iew.iewSquashCycles                   1080                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                     0                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
@@ -250,68 +250,69 @@ system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Nu
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.memOrderViolation           22                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            0                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads         1662                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedLoads         1642                       # Number of loads squashed
 system.cpu.iew.lsq.thread.0.squashedStores          234                       # Number of stores squashed
 system.cpu.iew.memOrderViolationEvents             22                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          277                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect            386                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.181423                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.181423                       # IPC: Total IPC of All Threads
+system.cpu.iew.predictedNotTakenIncorrect          272                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect            409                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc                               0.183806                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.183806                       # IPC: Total IPC of All Threads
 system.cpu.iq.ISSUE:FU_type_0::No_OpClass            0      0.00%      0.00% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::IntAlu            5020     57.94%     57.94% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::IntMult              5      0.06%     58.00% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::IntDiv               2      0.02%     58.02% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::FloatAdd             2      0.02%     58.04% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::FloatCmp             0      0.00%     58.04% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::FloatCvt             0      0.00%     58.04% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::FloatMult            0      0.00%     58.04% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::FloatDiv             0      0.00%     58.04% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::FloatSqrt            0      0.00%     58.04% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::MemRead           2572     29.69%     87.73% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::MemWrite          1063     12.27%    100.00% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::IntAlu            5184     58.56%     58.56% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::IntMult              5      0.06%     58.62% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::IntDiv               2      0.02%     58.64% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::FloatAdd             2      0.02%     58.66% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::FloatCmp             0      0.00%     58.66% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::FloatCvt             0      0.00%     58.66% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::FloatMult            0      0.00%     58.66% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::FloatDiv             0      0.00%     58.66% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::FloatSqrt            0      0.00%     58.66% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::MemRead           2595     29.32%     87.98% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::MemWrite          1064     12.02%    100.00% # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0::IprAccess            0      0.00%    100.00% # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0::InstPrefetch            0      0.00%    100.00% # Type of FU issued
-system.cpu.iq.ISSUE:FU_type_0::total             8664                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0::total             8852                       # Type of FU issued
 system.cpu.iq.ISSUE:fu_busy_cnt                   162                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.018698                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate             0.018301                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full::No_OpClass             0      0.00%      0.00% # attempts to use FU when none available
-system.cpu.iq.ISSUE:fu_full::IntAlu                10      6.17%      6.17% # attempts to use FU when none available
-system.cpu.iq.ISSUE:fu_full::IntMult                0      0.00%      6.17% # attempts to use FU when none available
-system.cpu.iq.ISSUE:fu_full::IntDiv                 0      0.00%      6.17% # attempts to use FU when none available
-system.cpu.iq.ISSUE:fu_full::FloatAdd               0      0.00%      6.17% # attempts to use FU when none available
-system.cpu.iq.ISSUE:fu_full::FloatCmp               0      0.00%      6.17% # attempts to use FU when none available
-system.cpu.iq.ISSUE:fu_full::FloatCvt               0      0.00%      6.17% # attempts to use FU when none available
-system.cpu.iq.ISSUE:fu_full::FloatMult              0      0.00%      6.17% # attempts to use FU when none available
-system.cpu.iq.ISSUE:fu_full::FloatDiv               0      0.00%      6.17% # attempts to use FU when none available
-system.cpu.iq.ISSUE:fu_full::FloatSqrt              0      0.00%      6.17% # attempts to use FU when none available
-system.cpu.iq.ISSUE:fu_full::MemRead               98     60.49%     66.67% # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full::IntAlu                 8      4.94%      4.94% # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full::IntMult                0      0.00%      4.94% # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full::IntDiv                 0      0.00%      4.94% # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full::FloatAdd               0      0.00%      4.94% # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full::FloatCmp               0      0.00%      4.94% # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full::FloatCvt               0      0.00%      4.94% # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full::FloatMult              0      0.00%      4.94% # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full::FloatDiv               0      0.00%      4.94% # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full::FloatSqrt              0      0.00%      4.94% # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full::MemRead              100     61.73%     66.67% # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full::MemWrite              54     33.33%    100.00% # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full::IprAccess              0      0.00%    100.00% # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full::InstPrefetch            0      0.00%    100.00% # attempts to use FU when none available
-system.cpu.iq.ISSUE:issued_per_cycle::samples        15286                       # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle::mean     0.566793                       # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle::stdev     1.217668                       # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle::samples        15641                       # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle::mean     0.565948                       # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle::stdev     1.209939                       # Number of insts issued each cycle
 system.cpu.iq.ISSUE:issued_per_cycle::underflows            0      0.00%      0.00% # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle::0-1        11421     74.72%     74.72% # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle::1-2         1678     10.98%     85.69% # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle::2-3          792      5.18%     90.87% # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle::3-4          722      4.72%     95.60% # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle::4-5          333      2.18%     97.78% # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle::5-6          200      1.31%     99.08% # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle::6-7           91      0.60%     99.68% # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle::7-8           34      0.22%     99.90% # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle::0-1        11653     74.50%     74.50% # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle::1-2         1757     11.23%     85.74% # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle::2-3          814      5.20%     90.94% # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle::3-4          738      4.72%     95.66% # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle::4-5          342      2.19%     97.85% # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle::5-6          199      1.27%     99.12% # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle::6-7           91      0.58%     99.70% # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle::7-8           32      0.20%     99.90% # Number of insts issued each cycle
 system.cpu.iq.ISSUE:issued_per_cycle::8            15      0.10%    100.00% # Number of insts issued each cycle
 system.cpu.iq.ISSUE:issued_per_cycle::overflows            0      0.00%    100.00% # Number of insts issued each cycle
 system.cpu.iq.ISSUE:issued_per_cycle::min_value            0                       # Number of insts issued each cycle
 system.cpu.iq.ISSUE:issued_per_cycle::max_value            8                       # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle::total        15286                       # Number of insts issued each cycle
-system.cpu.iq.ISSUE:rate                     0.311319                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                       9825                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                      8664                       # Number of instructions issued
-system.cpu.iq.iqNonSpecInstsAdded                  15                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined            4207                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued                30                       # Number of squashed instructions issued
-system.cpu.iq.iqSquashedOperandsExamined         2761                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.iq.ISSUE:issued_per_cycle::total        15641                       # Number of insts issued each cycle
+system.cpu.iq.ISSUE:rate                     0.314771                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                      10005                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                      8852                       # Number of instructions issued
+system.cpu.iq.iqNonSpecInstsAdded                  12                       # Number of non-speculative instructions added to the IQ
+system.cpu.iq.iqSquashedInstsExamined            4214                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued                36                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedNonSpecRemoved              2                       # Number of squashed non-spec instructions that were removed
+system.cpu.iq.iqSquashedOperandsExamined         2725                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.itb.accesses                             0                       # DTB accesses
 system.cpu.itb.hits                                 0                       # DTB hits
 system.cpu.itb.misses                               0                       # DTB misses
@@ -321,98 +322,98 @@ system.cpu.itb.read_misses                          0                       # DT
 system.cpu.itb.write_accesses                       0                       # DTB write accesses
 system.cpu.itb.write_hits                           0                       # DTB write hits
 system.cpu.itb.write_misses                         0                       # DTB write misses
-system.cpu.l2cache.ReadExReq_accesses              49                       # number of ReadExReq accesses(hits+misses)
-system.cpu.l2cache.ReadExReq_avg_miss_latency 34704.081633                       # average ReadExReq miss latency
-system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 31408.163265                       # average ReadExReq mshr miss latency
-system.cpu.l2cache.ReadExReq_miss_latency      1700500                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_accesses              50                       # number of ReadExReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_avg_miss_latency        34680                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency        31360                       # average ReadExReq mshr miss latency
+system.cpu.l2cache.ReadExReq_miss_latency      1734000                       # number of ReadExReq miss cycles
 system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
-system.cpu.l2cache.ReadExReq_misses                49                       # number of ReadExReq misses
-system.cpu.l2cache.ReadExReq_mshr_miss_latency      1539000                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_misses                50                       # number of ReadExReq misses
+system.cpu.l2cache.ReadExReq_mshr_miss_latency      1568000                       # number of ReadExReq MSHR miss cycles
 system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
-system.cpu.l2cache.ReadExReq_mshr_misses           49                       # number of ReadExReq MSHR misses
-system.cpu.l2cache.ReadReq_accesses               417                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency 34307.506053                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 31130.750605                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadExReq_mshr_misses           50                       # number of ReadExReq MSHR misses
+system.cpu.l2cache.ReadReq_accesses               420                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency 34317.307692                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 31138.221154                       # average ReadReq mshr miss latency
 system.cpu.l2cache.ReadReq_hits                     4                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency      14169000                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate         0.990408                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 413                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency     12857000                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate     0.990408                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            413                       # number of ReadReq MSHR misses
-system.cpu.l2cache.UpgradeReq_accesses             15                       # number of UpgradeReq accesses(hits+misses)
-system.cpu.l2cache.UpgradeReq_avg_miss_latency        34400                       # average UpgradeReq miss latency
-system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 31166.666667                       # average UpgradeReq mshr miss latency
-system.cpu.l2cache.UpgradeReq_miss_latency       516000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency      14276000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.990476                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses                 416                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency     12953500                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.990476                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses            416                       # number of ReadReq MSHR misses
+system.cpu.l2cache.UpgradeReq_accesses             14                       # number of UpgradeReq accesses(hits+misses)
+system.cpu.l2cache.UpgradeReq_avg_miss_latency 34428.571429                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 31178.571429                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency       482000                       # number of UpgradeReq miss cycles
 system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
-system.cpu.l2cache.UpgradeReq_misses               15                       # number of UpgradeReq misses
-system.cpu.l2cache.UpgradeReq_mshr_miss_latency       467500                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_misses               14                       # number of UpgradeReq misses
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency       436500                       # number of UpgradeReq MSHR miss cycles
 system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
-system.cpu.l2cache.UpgradeReq_mshr_misses           15                       # number of UpgradeReq MSHR misses
+system.cpu.l2cache.UpgradeReq_mshr_misses           14                       # number of UpgradeReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                  0.010050                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  0.009950                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked::no_mshrs                0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked::no_targets              0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                466                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency 34349.567100                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency 31160.173160                       # average overall mshr miss latency
+system.cpu.l2cache.demand_accesses                470                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency 34356.223176                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency 31162.017167                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      4                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency       15869500                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_rate          0.991416                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses                  462                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_miss_latency       16010000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate          0.991489                       # miss rate for demand accesses
+system.cpu.l2cache.demand_misses                  466                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency     14396000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_rate     0.991416                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses             462                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_miss_latency     14521500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_rate     0.991489                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_misses             466                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               466                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency 34349.567100                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency 31160.173160                       # average overall mshr miss latency
+system.cpu.l2cache.overall_accesses               470                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency 34356.223176                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency 31162.017167                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     4                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency      15869500                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_rate         0.991416                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses                 462                       # number of overall misses
+system.cpu.l2cache.overall_miss_latency      16010000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate         0.991489                       # miss rate for overall accesses
+system.cpu.l2cache.overall_misses                 466                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency     14396000                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_rate     0.991416                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses            462                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_miss_latency     14521500                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_rate     0.991489                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_misses            466                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   398                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   402                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               209.158769                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               210.151573                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       4                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.memDep0.conflictingLoads                 5                       # Number of conflicting loads.
 system.cpu.memDep0.conflictingStores                2                       # Number of conflicting stores.
-system.cpu.memDep0.insertedLoads                 2795                       # Number of loads inserted to the mem dependence unit.
-system.cpu.memDep0.insertedStores                1158                       # Number of stores inserted to the mem dependence unit.
-system.cpu.numCycles                            27830                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles               20                       # Number of cycles rename is blocking
-system.cpu.rename.RENAME:CommittedMaps           3323                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IdleCycles             10291                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents             16                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:RenameLookups          15666                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           13454                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands         8251                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               3462                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles            1056                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles             29                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps              4928                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles          428                       # count of cycles rename stalled for serializing inst
-system.cpu.rename.RENAME:serializingInsts           20                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts                125                       # count of insts added to the skid buffer
-system.cpu.rename.RENAME:tempSerializingInsts           14                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                             250                       # Number of times that the entire CPU went into an idle state and unscheduled itself
-system.cpu.workload.PROG:num_syscalls              13                       # Number of system calls
+system.cpu.memDep0.insertedLoads                 2806                       # Number of loads inserted to the mem dependence unit.
+system.cpu.memDep0.insertedStores                1159                       # Number of stores inserted to the mem dependence unit.
+system.cpu.numCycles                            28122                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles                5                       # Number of cycles rename is blocking
+system.cpu.rename.RENAME:CommittedMaps           3410                       # Number of HB maps that are committed
+system.cpu.rename.RENAME:IdleCycles             10468                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents              9                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:RenameLookups          15900                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts           13681                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands         8420                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles               3575                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles            1080                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles             19                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps              5010                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles          494                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializingInsts           17                       # count of serializing insts renamed
+system.cpu.rename.RENAME:skidInsts                111                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:tempSerializingInsts           11                       # count of temporary serializing insts renamed
+system.cpu.timesIdled                             249                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.workload.PROG:num_syscalls               8                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/config.ini
index cae17207c..eb0e10d29 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/config.ini
+++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/config.ini
@@ -135,7 +135,7 @@ port=system.physmem.port[0] system.cpu.icache_port system.cpu.dcache_port
 [system.physmem]
 type=RubyMemory
 clock=1
-config_file=build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic-ruby/ruby.config
+config_file=build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-atomic-ruby/ruby.config
 debug=false
 debug_file=ruby.debug
 file=
@@ -143,8 +143,10 @@ latency=30000
 latency_var=0
 null=false
 num_cpus=1
+num_dmas=1
 phase=0
-range=0:134217727
+ports_per_core=2
+range=0:1073741823
 stats_file=ruby.stats
 zero=false
 port=system.membus.port[0]
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simerr b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simerr
index aece78b32..9a6ce1210 100755
--- a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simerr
+++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simerr
@@ -1,4 +1,4 @@
-["-r", "tests/configs/../../src/mem/ruby/config/MI_example-homogeneous.rb", "-p", "1", "-m", "1", "-s", "1024"]
+["-r", "tests/configs/../../src/mem/ruby/config/MI_example-homogeneous.rb", "-p", "1", "-m", "1", "-s", "1024", "-C", "32768", "-A", "8", "-D", "1"]
 print config: 1
 warn: Sockets disabled, not accepting gdb connections
 For more information see: http://www.m5sim.org/warn/d946bea6
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simout b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simout
index 7408d6fc9..7ac0ea8eb 100755
--- a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simout
+++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simout
@@ -5,13 +5,13 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Oct  6 2009 20:51:47
-M5 revision 300266bf68ec+ 6674+ default tip
-M5 started Oct  6 2009 20:51:48
-M5 executing on zooks
-command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic-ruby -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic-ruby
+M5 compiled Jan  2 2010 07:01:31
+M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch
+M5 started Jan  2 2010 07:03:09
+M5 executing on fajita
+command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-atomic-ruby -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-atomic-ruby
 Global frequency set at 1000000000000 ticks per second
 info: Entering event queue @ 0.  Starting simulation...
 info: Increasing stack size by one page.
 Hello World!
-Exiting @ tick 2842500 because target called exit()
+Exiting @ tick 2913500 because target called exit()
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/stats.txt
index 94d67cedd..3e60620c5 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/stats.txt
+++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/stats.txt
@@ -1,13 +1,13 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  27672                       # Simulator instruction rate (inst/s)
-host_mem_usage                                1265116                       # Number of bytes of host memory used
-host_seconds                                     0.21                       # Real time elapsed on the host
-host_tick_rate                               13820616                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  57498                       # Simulator instruction rate (inst/s)
+host_mem_usage                                2303472                       # Number of bytes of host memory used
+host_seconds                                     0.10                       # Real time elapsed on the host
+host_tick_rate                               28699061                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                        5685                       # Number of instructions simulated
+sim_insts                                        5827                       # Number of instructions simulated
 sim_seconds                                  0.000003                       # Number of seconds simulated
-sim_ticks                                     2842500                       # Number of ticks simulated
+sim_ticks                                     2913500                       # Number of ticks simulated
 system.cpu.dtb.accesses                             0                       # DTB accesses
 system.cpu.dtb.hits                                 0                       # DTB hits
 system.cpu.dtb.misses                               0                       # DTB misses
@@ -28,9 +28,9 @@ system.cpu.itb.write_accesses                       0                       # DT
 system.cpu.itb.write_hits                           0                       # DTB write hits
 system.cpu.itb.write_misses                         0                       # DTB write misses
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                             5686                       # number of cpu cycles simulated
-system.cpu.num_insts                             5685                       # Number of instructions executed
-system.cpu.num_refs                              2058                       # Number of memory references
-system.cpu.workload.PROG:num_syscalls              13                       # Number of system calls
+system.cpu.numCycles                             5828                       # number of cpu cycles simulated
+system.cpu.num_insts                             5827                       # Number of instructions executed
+system.cpu.num_refs                              2090                       # Number of memory references
+system.cpu.workload.PROG:num_syscalls               8                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini
index 296171530..5d677c743 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini
+++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini
@@ -111,7 +111,7 @@ egid=100
 env=
 errout=cerr
 euid=100
-executable=tests/test-progs/hello/bin/mips/linux/hello
+executable=/dist/m5/regression/test-progs/hello/bin/mips/linux/hello
 gid=100
 input=cin
 max_stack_size=67108864
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/simout b/tests/quick/00.hello/ref/mips/linux/simple-atomic/simout
index 77cc5d321..a364f6e08 100755
--- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/simout
+++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/simout
@@ -5,13 +5,13 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Sep 24 2009 12:19:09
-M5 revision 9bc3e4611009+ 6661+ default tip
-M5 started Sep 24 2009 12:19:47
-M5 executing on zooks
-command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic
+M5 compiled Jan  2 2010 07:01:31
+M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch
+M5 started Jan  2 2010 07:03:10
+M5 executing on fajita
+command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-atomic -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-atomic
 Global frequency set at 1000000000000 ticks per second
 info: Entering event queue @ 0.  Starting simulation...
 info: Increasing stack size by one page.
 Hello World!
-Exiting @ tick 2842500 because target called exit()
+Exiting @ tick 2913500 because target called exit()
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-atomic/stats.txt
index d36fc469a..090c28d32 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/stats.txt
+++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/stats.txt
@@ -1,13 +1,13 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 588083                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 149516                       # Number of bytes of host memory used
+host_inst_rate                                 449580                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 197348                       # Number of bytes of host memory used
 host_seconds                                     0.01                       # Real time elapsed on the host
-host_tick_rate                              285563593                       # Simulator tick rate (ticks/s)
+host_tick_rate                              220987561                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                        5685                       # Number of instructions simulated
+sim_insts                                        5827                       # Number of instructions simulated
 sim_seconds                                  0.000003                       # Number of seconds simulated
-sim_ticks                                     2842500                       # Number of ticks simulated
+sim_ticks                                     2913500                       # Number of ticks simulated
 system.cpu.dtb.accesses                             0                       # DTB accesses
 system.cpu.dtb.hits                                 0                       # DTB hits
 system.cpu.dtb.misses                               0                       # DTB misses
@@ -28,9 +28,9 @@ system.cpu.itb.write_accesses                       0                       # DT
 system.cpu.itb.write_hits                           0                       # DTB write hits
 system.cpu.itb.write_misses                         0                       # DTB write misses
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                             5686                       # number of cpu cycles simulated
-system.cpu.num_insts                             5685                       # Number of instructions executed
-system.cpu.num_refs                              2058                       # Number of memory references
-system.cpu.workload.PROG:num_syscalls              13                       # Number of system calls
+system.cpu.numCycles                             5828                       # number of cpu cycles simulated
+system.cpu.num_insts                             5827                       # Number of instructions executed
+system.cpu.num_refs                              2090                       # Number of memory references
+system.cpu.workload.PROG:num_syscalls               8                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/config.ini
index 1562d7d6a..a290f96a4 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/config.ini
+++ b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/config.ini
@@ -132,7 +132,7 @@ port=system.physmem.port[0] system.cpu.icache_port system.cpu.dcache_port
 [system.physmem]
 type=RubyMemory
 clock=1
-config_file=build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing-ruby/ruby.config
+config_file=build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-timing-ruby/ruby.config
 debug=false
 debug_file=ruby.debug
 file=
@@ -140,8 +140,10 @@ latency=30000
 latency_var=0
 null=false
 num_cpus=1
+num_dmas=1
 phase=0
-range=0:134217727
+ports_per_core=2
+range=0:1073741823
 stats_file=ruby.stats
 zero=false
 port=system.membus.port[0]
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simerr b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simerr
index aece78b32..9a6ce1210 100755
--- a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simerr
+++ b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simerr
@@ -1,4 +1,4 @@
-["-r", "tests/configs/../../src/mem/ruby/config/MI_example-homogeneous.rb", "-p", "1", "-m", "1", "-s", "1024"]
+["-r", "tests/configs/../../src/mem/ruby/config/MI_example-homogeneous.rb", "-p", "1", "-m", "1", "-s", "1024", "-C", "32768", "-A", "8", "-D", "1"]
 print config: 1
 warn: Sockets disabled, not accepting gdb connections
 For more information see: http://www.m5sim.org/warn/d946bea6
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simout b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simout
index 6c7350461..cf385d81f 100755
--- a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simout
+++ b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simout
@@ -5,13 +5,13 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Oct  6 2009 20:43:14
-M5 revision 300266bf68ec 6674 default tip
-M5 started Oct  6 2009 20:47:38
-M5 executing on zooks
-command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing-ruby -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing-ruby
+M5 compiled Jan  2 2010 07:01:31
+M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch
+M5 started Jan  2 2010 07:03:09
+M5 executing on fajita
+command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-timing-ruby -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-timing-ruby
 Global frequency set at 1000000000000 ticks per second
 info: Entering event queue @ 0.  Starting simulation...
 info: Increasing stack size by one page.
 Hello World!
-Exiting @ tick 23227000 because target called exit()
+Exiting @ tick 23749000 because target called exit()
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/stats.txt
index 15c68a6b0..8a4afd8c9 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/stats.txt
+++ b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/stats.txt
@@ -1,13 +1,13 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                   3701                       # Simulator instruction rate (inst/s)
-host_mem_usage                                1265204                       # Number of bytes of host memory used
-host_seconds                                     1.54                       # Real time elapsed on the host
-host_tick_rate                               15119806                       # Simulator tick rate (ticks/s)
+host_inst_rate                                   6560                       # Simulator instruction rate (inst/s)
+host_mem_usage                                2303716                       # Number of bytes of host memory used
+host_seconds                                     0.89                       # Real time elapsed on the host
+host_tick_rate                               26729951                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                        5685                       # Number of instructions simulated
-sim_seconds                                  0.000023                       # Number of seconds simulated
-sim_ticks                                    23227000                       # Number of ticks simulated
+sim_insts                                        5827                       # Number of instructions simulated
+sim_seconds                                  0.000024                       # Number of seconds simulated
+sim_ticks                                    23749000                       # Number of ticks simulated
 system.cpu.dtb.accesses                             0                       # DTB accesses
 system.cpu.dtb.hits                                 0                       # DTB hits
 system.cpu.dtb.misses                               0                       # DTB misses
@@ -28,9 +28,9 @@ system.cpu.itb.write_accesses                       0                       # DT
 system.cpu.itb.write_hits                           0                       # DTB write hits
 system.cpu.itb.write_misses                         0                       # DTB write misses
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                            46454                       # number of cpu cycles simulated
-system.cpu.num_insts                             5685                       # Number of instructions executed
-system.cpu.num_refs                              2058                       # Number of memory references
-system.cpu.workload.PROG:num_syscalls              13                       # Number of system calls
+system.cpu.numCycles                            47498                       # number of cpu cycles simulated
+system.cpu.num_insts                             5827                       # Number of instructions executed
+system.cpu.num_refs                              2090                       # Number of memory references
+system.cpu.workload.PROG:num_syscalls               8                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini
index 2edca998b..3e36bc6f8 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini
+++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini
@@ -99,7 +99,6 @@ hash_delay=1
 latency=1000
 max_miss_count=0
 mshrs=10
-prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=10000
@@ -134,7 +133,6 @@ hash_delay=1
 latency=1000
 max_miss_count=0
 mshrs=10
-prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=10000
@@ -169,7 +167,6 @@ hash_delay=1
 latency=10000
 max_miss_count=0
 mshrs=10
-prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=100000
@@ -211,7 +208,7 @@ egid=100
 env=
 errout=cerr
 euid=100
-executable=tests/test-progs/hello/bin/mips/linux/hello
+executable=/dist/m5/regression/test-progs/hello/bin/mips/linux/hello
 gid=100
 input=cin
 max_stack_size=67108864
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/simout b/tests/quick/00.hello/ref/mips/linux/simple-timing/simout
index 15331f633..f5b9b6f90 100755
--- a/tests/quick/00.hello/ref/mips/linux/simple-timing/simout
+++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/simout
@@ -5,13 +5,13 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Sep 24 2009 12:19:09
-M5 revision 9bc3e4611009+ 6661+ default tip
-M5 started Sep 24 2009 12:19:31
-M5 executing on zooks
-command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing
+M5 compiled Jan  2 2010 07:01:31
+M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch
+M5 started Jan  2 2010 07:03:09
+M5 executing on fajita
+command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-timing -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-timing
 Global frequency set at 1000000000000 ticks per second
 info: Entering event queue @ 0.  Starting simulation...
 info: Increasing stack size by one page.
 Hello World!
-Exiting @ tick 32409000 because target called exit()
+Exiting @ tick 32803000 because target called exit()
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-timing/stats.txt
index 3bfaf3540..14247d496 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-timing/stats.txt
+++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/stats.txt
@@ -1,74 +1,74 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 303832                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 155376                       # Number of bytes of host memory used
-host_seconds                                     0.02                       # Real time elapsed on the host
-host_tick_rate                             1703674499                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  21056                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 204976                       # Number of bytes of host memory used
+host_seconds                                     0.28                       # Real time elapsed on the host
+host_tick_rate                              118397165                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                        5685                       # Number of instructions simulated
-sim_seconds                                  0.000032                       # Number of seconds simulated
-sim_ticks                                    32409000                       # Number of ticks simulated
-system.cpu.dcache.ReadReq_accesses               1133                       # number of ReadReq accesses(hits+misses)
+sim_insts                                        5827                       # Number of instructions simulated
+sim_seconds                                  0.000033                       # Number of seconds simulated
+sim_ticks                                    32803000                       # Number of ticks simulated
+system.cpu.dcache.ReadReq_accesses               1164                       # number of ReadReq accesses(hits+misses)
 system.cpu.dcache.ReadReq_avg_miss_latency        56000                       # average ReadReq miss latency
 system.cpu.dcache.ReadReq_avg_mshr_miss_latency        53000                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   1051                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        4592000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.072374                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                   82                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_miss_latency      4346000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.072374                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses              82                       # number of ReadReq MSHR misses
-system.cpu.dcache.WriteReq_accesses               924                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_hits                   1077                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency        4872000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.074742                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                   87                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_miss_latency      4611000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.074742                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses              87                       # number of ReadReq MSHR misses
+system.cpu.dcache.WriteReq_accesses               925                       # number of WriteReq accesses(hits+misses)
 system.cpu.dcache.WriteReq_avg_miss_latency        56000                       # average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency        53000                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                   860                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_hits                   861                       # number of WriteReq hits
 system.cpu.dcache.WriteReq_miss_latency       3584000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.069264                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_miss_rate         0.069189                       # miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_misses                  64                       # number of WriteReq misses
 system.cpu.dcache.WriteReq_mshr_miss_latency      3392000                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.069264                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.069189                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             64                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  14.583333                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  14.137681                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked::no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.dcache.blocked::no_targets               0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                2057                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses                2089                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_avg_miss_latency        56000                       # average overall miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency        53000                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1911                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         8176000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.070977                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   146                       # number of demand (read+write) misses
+system.cpu.dcache.demand_hits                    1938                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         8456000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.072283                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   151                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      7738000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.070977                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              146                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_miss_latency      8003000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.072283                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses              151                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               2057                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses               2089                       # number of overall (read+write) accesses
 system.cpu.dcache.overall_avg_miss_latency        56000                       # average overall miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency        53000                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1911                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        8176000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.070977                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  146                       # number of overall misses
+system.cpu.dcache.overall_hits                   1938                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        8456000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.072283                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  151                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      7738000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.070977                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             146                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_miss_latency      8003000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.072283                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses             151                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.replacements                      0                       # number of replacements
-system.cpu.dcache.sampled_refs                    132                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                    138                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 83.830110                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     1925                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                 87.887695                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     1951                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
 system.cpu.dtb.accesses                             0                       # DTB accesses
@@ -80,57 +80,57 @@ system.cpu.dtb.read_misses                          0                       # DT
 system.cpu.dtb.write_accesses                       0                       # DTB write accesses
 system.cpu.dtb.write_hits                           0                       # DTB write hits
 system.cpu.dtb.write_misses                         0                       # DTB write misses
-system.cpu.icache.ReadReq_accesses               5687                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 55723.684211                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 52723.684211                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   5383                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency       16940000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.053455                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  304                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_miss_latency     16028000                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.053455                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             304                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_accesses               5829                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 55722.772277                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 52722.772277                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   5526                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency       16884000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.051981                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  303                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_miss_latency     15975000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.051981                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses             303                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  17.707237                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                  18.237624                       # Average number of references to valid blocks.
 system.cpu.icache.blocked::no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.icache.blocked::no_targets               0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                5687                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 55723.684211                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 52723.684211                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    5383                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency        16940000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.053455                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   304                       # number of demand (read+write) misses
+system.cpu.icache.demand_accesses                5829                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 55722.772277                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 52722.772277                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                    5526                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency        16884000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.051981                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   303                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency     16028000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.053455                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              304                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_miss_latency     15975000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.051981                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses              303                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               5687                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 55723.684211                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 52723.684211                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses               5829                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 55722.772277                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 52722.772277                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   5383                       # number of overall hits
-system.cpu.icache.overall_miss_latency       16940000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.053455                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  304                       # number of overall misses
+system.cpu.icache.overall_hits                   5526                       # number of overall hits
+system.cpu.icache.overall_miss_latency       16884000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.051981                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  303                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency     16028000                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.053455                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             304                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_miss_latency     15975000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.051981                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses             303                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.icache.replacements                     13                       # number of replacements
-system.cpu.icache.sampled_refs                    304                       # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs                    303                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                135.394401                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     5383                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                133.475693                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     5526                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.idle_fraction                            0                       # Percentage of idle cycles
@@ -143,81 +143,81 @@ system.cpu.itb.read_misses                          0                       # DT
 system.cpu.itb.write_accesses                       0                       # DTB write accesses
 system.cpu.itb.write_hits                           0                       # DTB write hits
 system.cpu.itb.write_misses                         0                       # DTB write misses
-system.cpu.l2cache.ReadExReq_accesses              50                       # number of ReadExReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_accesses              51                       # number of ReadExReq accesses(hits+misses)
 system.cpu.l2cache.ReadExReq_avg_miss_latency        52000                       # average ReadExReq miss latency
 system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency        40000                       # average ReadExReq mshr miss latency
-system.cpu.l2cache.ReadExReq_miss_latency      2600000                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_miss_latency      2652000                       # number of ReadExReq miss cycles
 system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
-system.cpu.l2cache.ReadExReq_misses                50                       # number of ReadExReq misses
-system.cpu.l2cache.ReadExReq_mshr_miss_latency      2000000                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_misses                51                       # number of ReadExReq misses
+system.cpu.l2cache.ReadExReq_mshr_miss_latency      2040000                       # number of ReadExReq MSHR miss cycles
 system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
-system.cpu.l2cache.ReadExReq_mshr_misses           50                       # number of ReadExReq MSHR misses
-system.cpu.l2cache.ReadReq_accesses               386                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_mshr_misses           51                       # number of ReadExReq MSHR misses
+system.cpu.l2cache.ReadReq_accesses               390                       # number of ReadReq accesses(hits+misses)
 system.cpu.l2cache.ReadReq_avg_miss_latency        52000                       # average ReadReq miss latency
 system.cpu.l2cache.ReadReq_avg_mshr_miss_latency        40000                       # average ReadReq mshr miss latency
 system.cpu.l2cache.ReadReq_hits                     2                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency      19968000                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate         0.994819                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 384                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency     15360000                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate     0.994819                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            384                       # number of ReadReq MSHR misses
-system.cpu.l2cache.UpgradeReq_accesses             14                       # number of UpgradeReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_miss_latency      20176000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.994872                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses                 388                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency     15520000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.994872                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses            388                       # number of ReadReq MSHR misses
+system.cpu.l2cache.UpgradeReq_accesses             13                       # number of UpgradeReq accesses(hits+misses)
 system.cpu.l2cache.UpgradeReq_avg_miss_latency        52000                       # average UpgradeReq miss latency
 system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency        40000                       # average UpgradeReq mshr miss latency
-system.cpu.l2cache.UpgradeReq_miss_latency       728000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_miss_latency       676000                       # number of UpgradeReq miss cycles
 system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
-system.cpu.l2cache.UpgradeReq_misses               14                       # number of UpgradeReq misses
-system.cpu.l2cache.UpgradeReq_mshr_miss_latency       560000                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_misses               13                       # number of UpgradeReq misses
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency       520000                       # number of UpgradeReq MSHR miss cycles
 system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
-system.cpu.l2cache.UpgradeReq_mshr_misses           14                       # number of UpgradeReq MSHR misses
+system.cpu.l2cache.UpgradeReq_mshr_misses           13                       # number of UpgradeReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                  0.005405                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  0.005333                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked::no_mshrs                0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked::no_targets              0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                436                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses                441                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_avg_miss_latency        52000                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency        40000                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      2                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency       22568000                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_rate          0.995413                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses                  434                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_miss_latency       22828000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate          0.995465                       # miss rate for demand accesses
+system.cpu.l2cache.demand_misses                  439                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency     17360000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_rate     0.995413                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses             434                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_miss_latency     17560000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_rate     0.995465                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_misses             439                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               436                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses               441                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_avg_miss_latency        52000                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency        40000                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     2                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency      22568000                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_rate         0.995413                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses                 434                       # number of overall misses
+system.cpu.l2cache.overall_miss_latency      22828000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate         0.995465                       # miss rate for overall accesses
+system.cpu.l2cache.overall_misses                 439                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency     17360000                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_rate     0.995413                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses            434                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_miss_latency     17560000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_rate     0.995465                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_misses            439                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   370                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   375                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               182.412916                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               184.758016                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       2                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                            64818                       # number of cpu cycles simulated
-system.cpu.num_insts                             5685                       # Number of instructions executed
-system.cpu.num_refs                              2058                       # Number of memory references
-system.cpu.workload.PROG:num_syscalls              13                       # Number of system calls
+system.cpu.numCycles                            65606                       # number of cpu cycles simulated
+system.cpu.num_insts                             5827                       # Number of instructions executed
+system.cpu.num_refs                              2090                       # Number of memory references
+system.cpu.workload.PROG:num_syscalls               8                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/util/checkpoint-aggregator.py b/util/checkpoint-aggregator.py
new file mode 100755
index 000000000..6e40db01e
--- /dev/null
+++ b/util/checkpoint-aggregator.py
@@ -0,0 +1,167 @@
+# Copyright (c) 2009 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Lisa Hsu
+
+from ConfigParser import ConfigParser
+import gzip
+
+import sys, re, optparse, os
+
+class myCP(ConfigParser):
+    def __init__(self):
+        ConfigParser.__init__(self)
+
+    def optionxform(self, optionstr):
+        return optionstr
+
+def aggregate(options, args):
+    merged = myCP()
+    page_ptr = 0
+
+    allfiles = os.listdir(os.getcwd())
+    cpts = []
+    for arg in args:
+        found = False
+        for f in allfiles:
+            if re.compile("cpt." + arg + ".\d+").search(f):
+                found = True
+                cpts.append(f)
+                break
+        if not found:
+            print "missing checkpoint: ", arg
+            sys.exit(1)
+
+    dirname = "-".join([options.prefix, "cpt"])
+    print dirname
+    agg_name = "-".join(args)
+    print agg_name
+    fullpath = os.path.join("..", dirname, "cpt." + agg_name + ".10000")
+    if not os.path.isdir(fullpath):
+        os.system("mkdir -p " + fullpath)
+
+    myfile = open(fullpath + "/system.physmem.physmem", "wb+")
+    merged_mem = gzip.GzipFile(fileobj=myfile, mode="wb")
+
+    max_curtick = 0
+    when = 0
+    for (i, arg) in enumerate(args):
+        config = myCP()
+        config.readfp(open(cpts[i] + "/m5.cpt"))
+
+        for sec in config.sections():
+            if re.compile("cpu").search(sec):
+                newsec = re.sub("cpu", "cpu" + str(i), sec)
+                merged.add_section(newsec)
+                if re.compile("workload$").search(sec):
+                    merged.set(newsec, "M5_pid", i)
+
+                items = config.items(sec)
+                for item in items:
+                    if item[0] == "ppn":
+                        if config.getint(sec, "tag") != 0:
+                            merged.set(newsec, item[0], int(item[1]) + page_ptr)
+                            continue
+                    elif item[0] == "asn":
+                        tmp = re.compile("(.*).Entry(\d+)").search(sec).groups()
+                        if config.has_option(tmp[0], "nlu"):
+                            size = config.getint(tmp[0], "nlu")
+                            if int(tmp[1]) < size:
+                                merged.set(newsec, item[0], i)
+                                continue
+                        else:
+                            merged.set(newsec, item[0], i)
+                            continue
+                    merged.set(newsec, item[0], item[1])
+            elif sec == "system":
+                pass
+            elif sec == "Globals":
+                tick = config.getint(sec, "curTick")
+                if tick > max_curtick:
+                    max_curtick = tick
+                    when = config.getint("system.cpu.tickEvent", "_when")
+            else:
+                if i == 0:
+                    print sec
+                    merged.add_section(sec)
+                    for item in config.items(sec):
+                        merged.set(sec, item[0], item[1])
+                        if item[0] == "curtick":
+                            merged.optionxform(str("curTick"))
+                        elif item[0] == "numevents":
+                            merged.optionxform(str("numEvents"))
+
+        page_ptr = page_ptr + int(config.get("system", "page_ptr"))
+
+        ### memory stuff
+        f = open(cpts[i] + "/system.physmem.physmem", "rb")
+        gf = gzip.GzipFile(fileobj=f, mode="rb")
+        bytes = int(config.get("system", "page_ptr")) << 13
+        print "bytes to be read: ", bytes
+
+        bytesRead = gf.read(int(config.get("system", "page_ptr")) << 13)
+        merged_mem.write(bytesRead)
+
+        gf.close()
+        f.close()
+
+    merged.add_section("system")
+    merged.set("system", "page_ptr", page_ptr)
+    print "WARNING: "
+    print "Make sure the simulation using this checkpoint has at least "
+    if page_ptr > (1<<20):
+        print "8G ",
+    elif page_ptr > (1<<19):
+        print "4G ",
+    elif page_ptr > (1<<18):
+        print "2G ",
+    elif page_ptr > (1<<17):
+        print "1G ",
+    elif page_ptr > (1<<16):
+        print "512KB ",
+    else:
+        print "this is a small sim, you're probably fine",
+    print "of memory."
+
+    merged.add_section("Globals")
+    merged.set("Globals", "curTick", max_curtick)
+
+    for i in xrange(len(args)):
+        merged.set("system.cpu" + str(i) + ".tickEvent", "_when", when)
+
+    merged.write(file(fullpath + "/m5.cpt", "wb"))
+    merged_mem.close()
+    myfile.close()
+
+if __name__ == "__main__":
+
+    parser = optparse.OptionParser()
+    parser.add_option("--prefix", type="string", default="agg")
+
+    (options, args) = parser.parse_args()
+
+    aggregate(options, args)
+