70 files changed, 1235 insertions, 759 deletions
diff --git a/src/SConscript b/src/SConscript
index 0a4bb57f4..7fb03e821 100755
--- a/src/SConscript
+++ b/src/SConscript
@@ -851,8 +851,8 @@ def makeEnv(label, objsfx, strip = False, **kwargs):
         swig_env.Append(CCFLAGS='-Wno-uninitialized')
         swig_env.Append(CCFLAGS='-Wno-sign-compare')
         swig_env.Append(CCFLAGS='-Wno-parentheses')
+        swig_env.Append(CCFLAGS='-Wno-unused-label')
         if compareVersions(env['GCC_VERSION'], '4.6.0') != -1:
-            swig_env.Append(CCFLAGS='-Wno-unused-label')
             swig_env.Append(CCFLAGS='-Wno-unused-but-set-variable')
 
     werror_env = new_env.Clone()
diff --git a/src/arch/arm/isa/insts/m5ops.isa b/src/arch/arm/isa/insts/m5ops.isa
index a157b414c..da2e10886 100644
--- a/src/arch/arm/isa/insts/m5ops.isa
+++ b/src/arch/arm/isa/insts/m5ops.isa
@@ -190,12 +190,15 @@ let {{
     exec_output += PredOpExecute.subst(loadsymbolIop)
 
     initparamCode = '''
-    Rt = PseudoInst::initParam(xc->tcBase());
+    uint64_t ip_val  = PseudoInst::initParam(xc->tcBase());
+    R0 = bits(ip_val, 31, 0);
+    R1 = bits(ip_val, 63, 32);
     '''
 
     initparamIop = InstObjParams("initparam", "Initparam", "PredOp",
                            { "code": initparamCode,
-                             "predicate_test": predicateTest })
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative"])
     header_output += BasicDeclare.subst(initparamIop)
     decoder_output += BasicConstructor.subst(initparamIop)
     exec_output += PredOpExecute.subst(initparamIop)
diff --git a/src/arch/mips/faults.cc b/src/arch/mips/faults.cc
index 524efa178..3076e0afe 100644
--- a/src/arch/mips/faults.cc
+++ b/src/arch/mips/faults.cc
@@ -29,6 +29,8 @@
  * Authors: Gabe Black
  *          Korey Sewell
  *          Jaidev Patwardhan
+ *          Zhengxing Li
+ *          Deyuan Guo
  */
 
 #include "arch/mips/faults.hh"
@@ -118,7 +120,7 @@ MipsFaultBase::setExceptionState(ThreadContext *tc, uint8_t excCode)
     DPRINTF(MipsPRA, "PC: %s\n", pc);
     bool delay_slot = pc.pc() + sizeof(MachInst) != pc.npc();
     tc->setMiscRegNoEffect(MISCREG_EPC,
-            pc.pc() - delay_slot ? sizeof(MachInst) : 0);
+            pc.pc() - (delay_slot ? sizeof(MachInst) : 0));
 
     // Set Cause_EXCCODE field
     CauseReg cause = tc->readMiscReg(MISCREG_CAUSE);
diff --git a/src/arch/mips/faults.hh b/src/arch/mips/faults.hh
index bce828ec1..b90c38e99 100644
--- a/src/arch/mips/faults.hh
+++ b/src/arch/mips/faults.hh
@@ -29,6 +29,8 @@
  * Authors: Gabe Black
  *          Korey Sewell
  *          Jaidev Patwardhan
+ *          Zhengxing Li
+ *          Deyuan Guo
  */
 
 #ifndef __MIPS_FAULTS_HH__
@@ -88,7 +90,7 @@ class MipsFaultBase : public FaultBase
     virtual FaultVect base(ThreadContext *tc) const
     {
         StatusReg status = tc->readMiscReg(MISCREG_STATUS);
-        if (status.bev)
+        if (!status.bev)
             return tc->readMiscReg(MISCREG_EBASE);
         else
             return 0xbfc00200;
@@ -167,7 +169,7 @@ class CoprocessorUnusableFault : public MipsFault<CoprocessorUnusableFault>
         if (FullSystem) {
             CauseReg cause = tc->readMiscReg(MISCREG_CAUSE);
             cause.ce = coProcID;
-            tc->setMiscReg(MISCREG_CAUSE, cause);
+            tc->setMiscRegNoEffect(MISCREG_CAUSE, cause);
         }
     }
 };
@@ -179,7 +181,8 @@ class InterruptFault : public MipsFault<InterruptFault>
     offset(ThreadContext *tc) const
     {
         CauseReg cause = tc->readMiscRegNoEffect(MISCREG_CAUSE);
-        return cause.iv ? 0x200 : 0x000;
+        // offset 0x200 for release 2, 0x180 for release 1.
+        return cause.iv ? 0x200 : 0x180;
     }
 };
 
@@ -251,9 +254,10 @@ class TlbFault : public AddressFault<T>
             StaticInstPtr inst = StaticInst::nullStaticInstPtr)
     {
         if (FullSystem) {
-            DPRINTF(MipsPRA, "Fault %s encountered.\n", name());
-            tc->pcState(this->vect(tc));
+            DPRINTF(MipsPRA, "Fault %s encountered.\n", this->name());
+            Addr vect = this->vect(tc);
             setTlbExceptionState(tc, this->code());
+            tc->pcState(vect);
         } else {
             AddressFault<T>::invoke(tc, inst);
         }
diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa
index 193f050de..034133f96 100644
--- a/src/arch/mips/isa/decoder.isa
+++ b/src/arch/mips/isa/decoder.isa
@@ -1253,7 +1253,7 @@ decode OPCODE_HI default Unknown::unknown() {
                     //When rs=L1
                     //Note: "1. Format type L is legal only if 64-bit
                     //floating point operations are enabled."
-                    0x5: decode FUNCTION_HI {
+                    0x5: decode FUNCTION {
                         format FloatConvertOp {
                             0x20: cvt_s_l({{ val = Fs_ud; }}, ToSingle);
                             0x21: cvt_d_l({{ val = Fs_ud; }}, ToDouble);
diff --git a/src/arch/mips/linux/process.cc b/src/arch/mips/linux/process.cc
index 156d4ea05..0982e05cb 100644
--- a/src/arch/mips/linux/process.cc
+++ b/src/arch/mips/linux/process.cc
@@ -55,7 +55,7 @@ unameFunc(SyscallDesc *desc, int callnum, LiveProcess *process,
 
     strcpy(name->sysname, "Linux");
     strcpy(name->nodename,"m5.eecs.umich.edu");
-    strcpy(name->release, "2.4.20");
+    strcpy(name->release, "2.6.35");
     strcpy(name->version, "#1 Mon Aug 18 11:32:15 EDT 2003");
     strcpy(name->machine, "mips");
 
diff --git a/src/arch/mips/registers.hh b/src/arch/mips/registers.hh
index dce7858bf..d3cf1650d 100644
--- a/src/arch/mips/registers.hh
+++ b/src/arch/mips/registers.hh
@@ -55,7 +55,7 @@ const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs;        //HI & LO Regs
 const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs;//
 
 const uint32_t MIPS32_QNAN = 0x7fbfffff;
-const uint64_t MIPS64_QNAN = ULL(0x7fbfffffffffffff);
+const uint64_t MIPS64_QNAN = ULL(0x7ff7ffffffffffff);
 
 enum FPControlRegNums {
    FLOATREG_FIR = NumFloatArchRegs,
diff --git a/src/arch/mips/tlb.cc b/src/arch/mips/tlb.cc
index 057fb5e76..d28ef8231 100644
--- a/src/arch/mips/tlb.cc
+++ b/src/arch/mips/tlb.cc
@@ -29,6 +29,8 @@
  * Authors: Nathan Binkert
  *          Steve Reinhardt
  *          Jaidev Patwardhan
+ *          Zhengxing Li
+ *          Deyuan Guo
  */
 
 #include <string>
@@ -310,18 +312,6 @@ Fault
 TLB::translateData(RequestPtr req, ThreadContext *tc, bool write)
 {
     if (!FullSystem) {
-        //@TODO: This should actually use TLB instead of going directly
-        //       to the page table in syscall mode.
-        /**
-         * Check for alignment faults
-         */
-        if (req->getVaddr() & (req->getSize() - 1)) {
-            DPRINTF(TLB, "Alignment Fault on %#x, size = %d", req->getVaddr(),
-                    req->getSize());
-            return new AddressErrorFault(req->getVaddr(), write);
-        }
-
-
         Process * p = tc->getProcessPtr();
 
         Fault fault = p->pTable->translate(req);
diff --git a/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py b/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py
index c034f8a48..0465b3447 100644
--- a/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py
+++ b/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py
@@ -42,26 +42,34 @@ microcode = '''
     def macroop IN_R_I {
         .adjust_imm trimImm(8)
         limm t1, imm, dataSize=asz
+        mfence
         ld reg, intseg, [1, t1, t0], "IntAddrPrefixIO << 3", addressSize=8, \
             nonSpec=True
+        mfence
     };
 
     def macroop IN_R_R {
         zexti t2, regm, 15, dataSize=8
+        mfence
         ld reg, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
             nonSpec=True
+        mfence
     };
 
     def macroop OUT_I_R {
         .adjust_imm trimImm(8)
         limm t1, imm, dataSize=8
+        mfence
         st reg, intseg, [1, t1, t0], "IntAddrPrefixIO << 3", addressSize=8, \
             nonSpec=True
+        mfence
     };
 
     def macroop OUT_R_R {
         zexti t2, reg, 15, dataSize=8
+        mfence
         st regm, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
             nonSpec=True
+        mfence
     };
 '''
diff --git a/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py b/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py
index 3c90ee7e7..044e57edc 100644
--- a/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py
+++ b/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py
@@ -45,9 +45,11 @@ def macroop INS_M_R {
 
     zexti t2, reg, 15, dataSize=8
 
+    mfence
     ld t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
         nonSpec=True
     st t6, es, [1, t0, rdi]
+    mfence
 
     add rdi, rdi, t3, dataSize=asz
 };
@@ -63,6 +65,7 @@ def macroop INS_E_M_R {
 
     zexti t2, reg, 15, dataSize=8
 
+    mfence
 topOfLoop:
     ld t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
         nonSpec=True
@@ -72,6 +75,7 @@ topOfLoop:
     add rdi, rdi, t3, dataSize=asz
     br label("topOfLoop"), flags=(nCEZF,)
 end:
+    mfence
     fault "NoFault"
 };
 
@@ -84,9 +88,11 @@ def macroop OUTS_R_M {
 
     zexti t2, reg, 15, dataSize=8
 
+    mfence
     ld t6, ds, [1, t0, rsi]
     st t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
         nonSpec=True
+    mfence
 
     add rsi, rsi, t3, dataSize=asz
 };
@@ -102,6 +108,7 @@ def macroop OUTS_E_R_M {
 
     zexti t2, reg, 15, dataSize=8
 
+    mfence
 topOfLoop:
     ld t6, ds, [1, t0, rsi]
     st t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
@@ -111,6 +118,7 @@ topOfLoop:
     add rsi, rsi, t3, dataSize=asz
     br label("topOfLoop"), flags=(nCEZF,)
 end:
+    mfence
     fault "NoFault"
 };
 '''
diff --git a/src/base/hostinfo.cc b/src/base/hostinfo.cc
index 5ff34e603..857ccfa7f 100644
--- a/src/base/hostinfo.cc
+++ b/src/base/hostinfo.cc
@@ -30,6 +30,12 @@
 
 #include <unistd.h>
 
+#ifdef __APPLE__
+#include <mach/mach_init.h>
+#include <mach/shared_region.h>
+#include <mach/task.h>
+#endif
+
 #include <cctype>
 #include <cerrno>
 #include <cmath>
@@ -82,7 +88,31 @@ procInfo(const char *filename, const char *target)
     }
 
     if (fp)
-      fclose(fp);
+        fclose(fp);
 
     return 0;
 }
+
+uint64_t
+memUsage()
+{
+// For the Mach-based Darwin kernel, use the task_info of the self task
+#ifdef __APPLE__
+    struct task_basic_info t_info;
+    mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
+
+    if (KERN_SUCCESS != task_info(mach_task_self(),
+                                  TASK_BASIC_INFO, (task_info_t)&t_info,
+                                  &t_info_count)) {
+        return 0;
+    }
+
+    // Mimic Darwin's implementation of top and subtract
+    // SHARED_REGION_SIZE from the tasks virtual size to account for the
+    // shared memory submap that is incorporated into every process.
+    return (t_info.virtual_size - SHARED_REGION_SIZE) / 1024;
+#else
+    // Linux implementation
+    return procInfo("/proc/self/status", "VmSize:");
+#endif
+}
diff --git a/src/base/hostinfo.hh b/src/base/hostinfo.hh
index ac7d40f13..d9a30481a 100644
--- a/src/base/hostinfo.hh
+++ b/src/base/hostinfo.hh
@@ -39,7 +39,11 @@ std::string &hostname();
 
 uint64_t procInfo(const char *filename, const char *target);
 
-inline uint64_t memUsage()
-{ return procInfo("/proc/self/status", "VmSize:"); }
+/**
+ * Determine the simulator process' total virtual memory usage.
+ *
+ * @return virtual memory usage in kilobytes
+ */
+uint64_t memUsage();
 
 #endif // __HOSTINFO_HH__
diff --git a/src/base/random.cc b/src/base/random.cc
index 457b0c98b..cffeddec9 100644
--- a/src/base/random.cc
+++ b/src/base/random.cc
@@ -29,6 +29,7 @@
  *          Ali Saidi
  */
 
+#include <limits>
 #include "base/fenv.hh"
 #include "base/intmath.hh"
 #include "base/misc.hh"
@@ -67,7 +68,10 @@ Random::genrand(uint32_t max)
 {
     if (max == 0)
         return 0;
-    int log = ceilLog2(max) + 1;
+    if (max == std::numeric_limits<uint32_t>::max())
+        return genrand();
+
+    int log = ceilLog2(max + 1);
     int shift = (sizeof(uint32_t) * 8 - log);
     uint32_t random;
 
@@ -83,7 +87,10 @@ Random::genrand(uint64_t max)
 {
     if (max == 0)
         return 0;
-    int log = ceilLog2(max) + 1;
+    if (max == std::numeric_limits<uint64_t>::max())
+        return genrand();
+
+    int log = ceilLog2(max + 1);
     int shift = (sizeof(uint64_t) * 8 - log);
     uint64_t random;
 
diff --git a/src/base/statistics.hh b/src/base/statistics.hh
index d98c79414..1f8a59326 100644
--- a/src/base/statistics.hh
+++ b/src/base/statistics.hh
@@ -1477,6 +1477,8 @@ class HistStor
 
     /** The current sum. */
     Counter sum;
+    /** The sum of logarithm of each sample, used to compute geometric mean. */
+    Counter logs;
     /** The sum of squares. */
     Counter squares;
     /** The number of samples. */
@@ -1528,6 +1530,7 @@ class HistStor
 
         sum += val * number;
         squares += val * val * number;
+        logs += log(val) * number;
         samples += number;
     }
 
@@ -1567,6 +1570,7 @@ class HistStor
             data.cvec[i] = cvec[i];
 
         data.sum = sum;
+        data.logs = logs;
         data.squares = squares;
         data.samples = samples;
     }
@@ -1589,6 +1593,7 @@ class HistStor
         sum = Counter();
         squares = Counter();
         samples = Counter();
+        logs = Counter();
     }
 };
 
diff --git a/src/base/stats/info.hh b/src/base/stats/info.hh
index 2c5b44a38..98e811747 100644
--- a/src/base/stats/info.hh
+++ b/src/base/stats/info.hh
@@ -183,6 +183,7 @@ struct DistData
     VCounter cvec;
     Counter sum;
     Counter squares;
+    Counter logs;
     Counter samples;
 };
 
diff --git a/src/base/stats/text.cc b/src/base/stats/text.cc
index 683ba7fe4..8fb49dc59 100644
--- a/src/base/stats/text.cc
+++ b/src/base/stats/text.cc
@@ -367,6 +367,12 @@ DistPrint::operator()(ostream &stream) const
     print.value = data.samples ? data.sum / data.samples : NAN;
     print(stream);
 
+    if (data.type == Hist) {
+        print.name = base + "gmean";
+        print.value = data.samples ? exp(data.logs / data.samples) : NAN;
+        print(stream);
+    }
+
     Result stdev = NAN;
     if (data.samples)
         stdev = sqrt((data.samples * data.squares - data.sum * data.sum) /
@@ -507,7 +513,14 @@ Text::visit(const Vector2dInfo &info)
     bool havesub = false;
     VectorPrint print;
 
-    print.subnames = info.y_subnames;
+    if (!info.y_subnames.empty()) {
+        for (off_type i = 0; i < info.y; ++i) {
+            if (!info.y_subnames[i].empty()) {
+                print.subnames = info.y_subnames;
+            }
+            break;
+        }
+    }
     print.flags = info.flags;
     print.separatorString = info.separatorString;
     print.descriptions = descriptions;
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index cf647daaa..a4ffb4716 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -284,17 +284,16 @@ class BaseCPU : public MemObject
     void enableFunctionTrace();
     void traceFunctionsInternal(Addr pc);
 
-  protected:
+  private:
+    static std::vector<BaseCPU *> cpuList;   //!< Static global cpu list
+
+  public:
     void traceFunctions(Addr pc)
     {
         if (functionTracingEnabled)
             traceFunctionsInternal(pc);
     }
 
-  private:
-    static std::vector<BaseCPU *> cpuList;   //!< Static global cpu list
-
-  public:
     static int numSimulatedCPUs() { return cpuList.size(); }
     static Counter numSimulatedInstructions()
     {
diff --git a/src/cpu/inorder/resources/mult_div_unit.cc b/src/cpu/inorder/resources/mult_div_unit.cc
index 0ff05252c..ab0081787 100644
--- a/src/cpu/inorder/resources/mult_div_unit.cc
+++ b/src/cpu/inorder/resources/mult_div_unit.cc
@@ -299,6 +299,7 @@ MultDivUnit::exeMulDiv(int slot_num)
     }    
 
     mult_div_req->setProcessing(false);
+    cpu->wakeCPU();
 }
 
 void
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index c481d7b9c..b6a4c0387 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -51,6 +51,7 @@
 #include "config/use_checker.hh"
 #include "cpu/o3/commit.hh"
 #include "cpu/o3/thread_state.hh"
+#include "cpu/base.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/timebuf.hh"
 #include "debug/Activity.hh"
@@ -987,6 +988,8 @@ DefaultCommit<Impl>::commitInsts()
                 // Updates misc. registers.
                 head_inst->updateMiscRegs();
 
+                cpu->traceFunctions(pc[tid].instAddr());
+
                 TheISA::advancePC(pc[tid], head_inst->staticInst);
 
                 // Keep track of the last sequence number commited
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 3def971e9..985e92826 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -446,10 +446,6 @@ void
 DefaultDecode<Impl>::sortInsts()
 {
     int insts_from_fetch = fromFetch->size;
-#ifdef DEBUG
-    for (ThreadID tid = 0; tid < numThreads; tid++)
-        assert(insts[tid].empty());
-#endif
     for (int i = 0; i < insts_from_fetch; ++i) {
         insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
     }
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 9c4b1068d..92c8875e4 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1340,10 +1340,10 @@ DefaultIEW<Impl>::executeInsts()
                 fetchRedirect[tid] = true;
 
                 DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
-                DPRINTF(IEW, "Predicted target was PC:%#x, NPC:%#x.\n",
-                        inst->predInstAddr(), inst->predNextInstAddr());
+                DPRINTF(IEW, "Predicted target was PC: %s.\n",
+                        inst->readPredTarg());
                 DPRINTF(IEW, "Execute: Redirecting fetch to PC: %s.\n",
-                        inst->pcState(), inst->nextInstAddr());
+                        inst->pcState());
                 // If incorrect, then signal the ROB that it must be squashed.
                 squashDueToBranch(inst, tid);
 
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 4106bbef9..04935604e 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -766,10 +766,6 @@ void
 DefaultRename<Impl>::sortInsts()
 {
     int insts_from_decode = fromDecode->size;
-#ifdef DEBUG
-    for (ThreadID tid = 0; tid < numThreads; tid++)
-        assert(insts[tid].empty());
-#endif
     for (int i = 0; i < insts_from_decode; ++i) {
         DynInstPtr inst = fromDecode->insts[i];
         insts[inst->threadNumber].push_back(inst);
diff --git a/src/cpu/pc_event.cc b/src/cpu/pc_event.cc
index a07d787f0..9cf63f7d3 100644
--- a/src/cpu/pc_event.cc
+++ b/src/cpu/pc_event.cc
@@ -83,7 +83,9 @@ PCEventQueue::schedule(PCEvent *event)
 bool
 PCEventQueue::doService(ThreadContext *tc)
 {
-    Addr pc = tc->instAddr() & ~0x3;
+    // This will fail to break on Alpha PALcode addresses, but that is
+    // a rare use case.
+    Addr pc = tc->instAddr();
     int serviced = 0;
     range_t range = equal_range(pc);
     for (iterator i = range.first; i != range.second; ++i) {
@@ -91,7 +93,7 @@ PCEventQueue::doService(ThreadContext *tc)
         // another event.  This for example, prevents two invocations
         // of the SkipFuncEvent.  Maybe we should have separate PC
         // event queues for each processor?
-        if (pc != (tc->instAddr() & ~0x3))
+        if (pc != tc->instAddr())
             continue;
 
         DPRINTF(PCEvent, "PC based event serviced at %#x: %s\n",
diff --git a/src/cpu/testers/directedtest/DirectedGenerator.hh b/src/cpu/testers/directedtest/DirectedGenerator.hh
index 904dcf399..c156efff0 100644
--- a/src/cpu/testers/directedtest/DirectedGenerator.hh
+++ b/src/cpu/testers/directedtest/DirectedGenerator.hh
@@ -43,7 +43,7 @@ class DirectedGenerator : public SimObject
     virtual ~DirectedGenerator() {}
     
     virtual bool initiate() = 0;
-    virtual void performCallback(uint proc, Addr address) = 0;
+    virtual void performCallback(uint32_t proc, Addr address) = 0;
     
     void setDirectedTester(RubyDirectedTester* directed_tester);
     
diff --git a/src/cpu/testers/directedtest/InvalidateGenerator.cc b/src/cpu/testers/directedtest/InvalidateGenerator.cc
index 902c6cc15..4d8271a05 100644
--- a/src/cpu/testers/directedtest/InvalidateGenerator.cc
+++ b/src/cpu/testers/directedtest/InvalidateGenerator.cc
@@ -103,7 +103,7 @@ InvalidateGenerator::initiate()
 }
 
 void 
-InvalidateGenerator::performCallback(uint proc, Addr address)
+InvalidateGenerator::performCallback(uint32_t proc, Addr address)
 {
     assert(m_address == address);  
 
diff --git a/src/cpu/testers/directedtest/InvalidateGenerator.hh b/src/cpu/testers/directedtest/InvalidateGenerator.hh
index 14c47b70b..50db180e3 100644
--- a/src/cpu/testers/directedtest/InvalidateGenerator.hh
+++ b/src/cpu/testers/directedtest/InvalidateGenerator.hh
@@ -49,14 +49,14 @@ class InvalidateGenerator : public DirectedGenerator
     ~InvalidateGenerator();
     
     bool initiate();
-    void performCallback(uint proc, Addr address);
+    void performCallback(uint32_t proc, Addr address);
     
   private:
     InvalidateGeneratorStatus m_status;
     Addr m_address;
-    uint m_active_read_node;
-    uint m_active_inv_node;
-    uint m_addr_increment_size;
+    uint32_t m_active_read_node;
+    uint32_t m_active_inv_node;
+    uint32_t m_addr_increment_size;
 };
 
 #endif //__CPU_DIRECTEDTEST_INVALIDATEGENERATOR_HH__
diff --git a/src/cpu/testers/directedtest/RubyDirectedTester.hh b/src/cpu/testers/directedtest/RubyDirectedTester.hh
index 163c206d8..53c389692 100644
--- a/src/cpu/testers/directedtest/RubyDirectedTester.hh
+++ b/src/cpu/testers/directedtest/RubyDirectedTester.hh
@@ -53,11 +53,11 @@ class RubyDirectedTester : public MemObject
         RubyDirectedTester *tester;
 
       public:
-        CpuPort(const std::string &_name, RubyDirectedTester *_tester, uint _idx)
+        CpuPort(const std::string &_name, RubyDirectedTester *_tester, uint32_t _idx)
             : SimpleTimingPort(_name, _tester), tester(_tester), idx(_idx)
         {}
 
-        uint idx;
+        uint32_t idx;
 
       protected:
         virtual bool recvTiming(PacketPtr pkt);
diff --git a/src/cpu/testers/directedtest/SeriesRequestGenerator.cc b/src/cpu/testers/directedtest/SeriesRequestGenerator.cc
index 43e140178..4cf9aed1c 100644
--- a/src/cpu/testers/directedtest/SeriesRequestGenerator.cc
+++ b/src/cpu/testers/directedtest/SeriesRequestGenerator.cc
@@ -89,7 +89,7 @@ SeriesRequestGenerator::initiate()
 }
 
 void 
-SeriesRequestGenerator::performCallback(uint proc, Addr address)
+SeriesRequestGenerator::performCallback(uint32_t proc, Addr address)
 {
     assert(m_active_node == proc);
     assert(m_address == address);  
diff --git a/src/cpu/testers/directedtest/SeriesRequestGenerator.hh b/src/cpu/testers/directedtest/SeriesRequestGenerator.hh
index 97b632a12..9b1c3e8ba 100644
--- a/src/cpu/testers/directedtest/SeriesRequestGenerator.hh
+++ b/src/cpu/testers/directedtest/SeriesRequestGenerator.hh
@@ -49,13 +49,13 @@ class SeriesRequestGenerator : public DirectedGenerator
     ~SeriesRequestGenerator();
     
     bool initiate();
-    void performCallback(uint proc, Addr address);
+    void performCallback(uint32_t proc, Addr address);
     
   private:
     SeriesRequestGeneratorStatus m_status;
     Addr m_address;
-    uint m_active_node;
-    uint m_addr_increment_size;
+    uint32_t m_active_node;
+    uint32_t m_addr_increment_size;
     bool m_issue_writes;
 };
 
diff --git a/src/dev/arm/pl111.cc b/src/dev/arm/pl111.cc
index e13045338..958f07aa7 100644
--- a/src/dev/arm/pl111.cc
+++ b/src/dev/arm/pl111.cc
@@ -67,7 +67,7 @@ Pl111::Pl111(const Params *p)
 {
     pioSize = 0xFFFF;
 
-    pic = simout.create("framebuffer.bmp", true);
+    pic = simout.create(csprintf("%s.framebuffer.bmp", sys->name()), true);
 
     dmaBuffer = new uint8_t[LcdMaxWidth * LcdMaxHeight * sizeof(uint32_t)];
 
diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc
index f124767ca..0ad53f09e 100644
--- a/src/dev/io_device.cc
+++ b/src/dev/io_device.cc
@@ -71,7 +71,7 @@ void
 PioDevice::init()
 {
     if (!pioPort)
-        panic("Pio port %s not connected to anything!", name());
+        panic("Pio port of %s not connected to anything!", name());
     pioPort->sendStatusChange(Port::RangeChange);
 }
 
diff --git a/src/mem/SConscript b/src/mem/SConscript
index 2aa7d0323..8418a4f51 100644
--- a/src/mem/SConscript
+++ b/src/mem/SConscript
@@ -59,6 +59,7 @@ DebugFlag('MemoryAccess')
 
 DebugFlag('ProtocolTrace')
 DebugFlag('RubyCache')
+DebugFlag('RubyCacheTrace')
 DebugFlag('RubyDma')
 DebugFlag('RubyGenerated')
 DebugFlag('RubyMemory')
@@ -67,9 +68,9 @@ DebugFlag('RubyPort')
 DebugFlag('RubyQueue')
 DebugFlag('RubySequencer')
 DebugFlag('RubySlicc')
-DebugFlag('RubyStorebuffer')
+DebugFlag('RubySystem')
 DebugFlag('RubyTester')
 
 CompoundFlag('Ruby', [ 'RubyQueue', 'RubyNetwork', 'RubyTester',
-    'RubyGenerated', 'RubySlicc', 'RubyStorebuffer', 'RubyCache',
-    'RubyMemory', 'RubyDma', 'RubyPort', 'RubySequencer'])
+    'RubyGenerated', 'RubySlicc', 'RubySystem', 'RubyCache',
+    'RubyMemory', 'RubyDma', 'RubyPort', 'RubySequencer', 'RubyCacheTrace'])
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 69b14547e..db71b86b7 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -447,13 +447,6 @@ Bus::recvAtomic(PacketPtr pkt)
 void
 Bus::recvFunctional(PacketPtr pkt)
 {
-    if (!pkt->isPrint()) {
-        // don't do DPRINTFs on PrintReq as it clutters up the output
-        DPRINTF(Bus,
-                "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
-                pkt->getSrc(), pkt->getDest(), pkt->getAddr(),
-                pkt->cmdString());
-    }
     assert(pkt->getDest() == Packet::Broadcast);
 
     int port_id = findPort(pkt->getAddr());
@@ -462,6 +455,14 @@ Bus::recvFunctional(PacketPtr pkt)
     // id after each
     int src_id = pkt->getSrc();
 
+    if (!pkt->isPrint()) {
+        // don't do DPRINTFs on PrintReq as it clutters up the output
+        DPRINTF(Bus,
+                "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
+                src_id, port_id, pkt->getAddr(),
+                pkt->cmdString());
+    }
+
     assert(pkt->isRequest()); // hasn't already been satisfied
 
     SnoopIter s_end = snoopPorts.end();
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 5ec977ed4..64f4fcd14 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2006 The Regents of The University of Michigan
  * Copyright (c) 2010 Advanced Micro Devices, Inc.
  * All rights reserved.
@@ -192,14 +204,98 @@ Packet::checkFunctional(Printable *obj, Addr addr, int size, uint8_t *data)
             memcpy(getPtr<uint8_t>(), data + offset, getSize());
             return true;
         } else {
-            // In this case the timing packet only partially satisfies
-            // the request, so we would need more information to make
-            // this work.  Like bytes valid in the packet or
-            // something, so the request could continue and get this
-            // bit of possibly newer data along with the older data
-            // not written to yet.
-            panic("Memory value only partially satisfies the functional "
-                  "request. Now what?");
+            // Offsets and sizes to copy in case of partial overlap
+            int func_offset;
+            int val_offset;
+            int overlap_size;
+
+            // calculate offsets and copy sizes for the two byte arrays
+            if (val_start < func_start && val_end <= func_end) {
+                val_offset = func_start - val_start;
+                func_offset = 0;
+                overlap_size = val_end - func_start;
+            } else if (val_start >= func_start && val_end > func_end) {
+                val_offset = 0;
+                func_offset = val_start - func_start;
+                overlap_size = func_end - val_start;
+            } else if (val_start >= func_start && val_end <= func_end) {
+                val_offset = 0;
+                func_offset = val_start - func_start;
+                overlap_size = size;
+            } else {
+                panic("BUG: Missed a case for a partial functional request");
+            }
+
+            // Figure out how much of the partial overlap should be copied
+            // into the packet and not overwrite previously found bytes.
+            if (bytesValidStart == 0 && bytesValidEnd == 0) {
+                // No bytes have been copied yet, just set indices
+                // to found range
+                bytesValidStart = func_offset;
+                bytesValidEnd = func_offset + overlap_size;
+            } else {
+                // Some bytes have already been copied. Use bytesValid
+                // indices and offset values to figure out how much data
+                // to copy and where to copy it to.
+
+                // Indice overlap conditions to check
+                int a = func_offset - bytesValidStart;
+                int b = (func_offset + overlap_size) - bytesValidEnd;
+                int c = func_offset - bytesValidEnd;
+                int d = (func_offset + overlap_size) - bytesValidStart;
+
+                if (a >= 0 && b <= 0) {
+                    // bytes already in pkt data array are superset of
+                    // found bytes, will not copy any bytes
+                    overlap_size = 0;
+                } else if (a < 0 && d >= 0 && b <= 0) {
+                    // found bytes will move bytesValidStart towards 0
+                    overlap_size = bytesValidStart - func_offset;
+                    bytesValidStart = func_offset;
+                } else if (b > 0 && c <= 0 && a >= 0) {
+                    // found bytes will move bytesValidEnd
+                    // towards end of pkt data array
+                    overlap_size =
+                        (func_offset + overlap_size) - bytesValidEnd;
+                    val_offset += bytesValidEnd - func_offset;
+                    func_offset = bytesValidEnd;
+                    bytesValidEnd += overlap_size;
+                } else if (a < 0 && b > 0) {
+                    // Found bytes are superset of copied range. Will move
+                    // bytesValidStart towards 0 and bytesValidEnd towards
+                    // end of pkt data array.  Need to break copy into two
+                    // pieces so as to not overwrite previously found data.
+
+                    // copy the first half
+                    uint8_t *dest = getPtr<uint8_t>() + func_offset;
+                    uint8_t *src = data + val_offset;
+                    memcpy(dest, src, (bytesValidStart - func_offset));
+
+                    // re-calc the offsets and indices to do the copy
+                    // required for the second half
+                    val_offset += (bytesValidEnd - func_offset);
+                    bytesValidStart = func_offset;
+                    overlap_size =
+                        (func_offset + overlap_size) - bytesValidEnd;
+                    func_offset = bytesValidEnd;
+                    bytesValidEnd += overlap_size;
+                } else if ((c > 0 && b > 0)
+                           || (a < 0 && d < 0)) {
+                    // region to be copied is discontiguous! Not supported.
+                    panic("BUG: Discontiguous bytes found"
+                          "for functional copying!");
+                }
+            }
+            assert(bytesValidEnd <= getSize());
+
+            // copy partial data into the packet's data array
+            uint8_t *dest = getPtr<uint8_t>() + func_offset;
+            uint8_t *src = data + val_offset;
+            memcpy(dest, src, overlap_size);
+
+            // check if we're done filling the functional access
+            bool done = (bytesValidStart == 0) && (bytesValidEnd == getSize());
+            return done;
         }
     } else if (isWrite()) {
         if (offset >= 0) {
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index be0c20d42..6347c21ea 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -299,6 +299,13 @@ class Packet : public FastAlloc, public Printable
      */
     MemCmd origCmd;
 
+    /**
+     * These values specify the range of bytes found that satisfy a
+     * functional read.
+     */
+    uint16_t bytesValidStart;
+    uint16_t bytesValidEnd;
+
   public:
     /// Used to calculate latencies for each packet.
     Tick time;
@@ -507,7 +514,8 @@ class Packet : public FastAlloc, public Printable
      */
     Packet(Request *_req, MemCmd _cmd, NodeID _dest)
         :  flags(VALID_DST), cmd(_cmd), req(_req), data(NULL),
-           dest(_dest), time(curTick()), senderState(NULL)
+           dest(_dest), bytesValidStart(0), bytesValidEnd(0),
+           time(curTick()), senderState(NULL)
     {
         if (req->hasPaddr()) {
             addr = req->getPaddr();
@@ -526,7 +534,8 @@ class Packet : public FastAlloc, public Printable
      */
     Packet(Request *_req, MemCmd _cmd, NodeID _dest, int _blkSize)
         :  flags(VALID_DST), cmd(_cmd), req(_req), data(NULL),
-           dest(_dest), time(curTick()), senderState(NULL)
+           dest(_dest), bytesValidStart(0), bytesValidEnd(0),
+           time(curTick()), senderState(NULL)
     {
         if (req->hasPaddr()) {
             addr = req->getPaddr() & ~(_blkSize - 1);
@@ -547,6 +556,7 @@ class Packet : public FastAlloc, public Printable
         :  cmd(pkt->cmd), req(pkt->req),
            data(pkt->flags.isSet(STATIC_DATA) ? pkt->data : NULL),
            addr(pkt->addr), size(pkt->size), src(pkt->src), dest(pkt->dest),
+           bytesValidStart(pkt->bytesValidStart), bytesValidEnd(pkt->bytesValidEnd),
            time(curTick()), senderState(pkt->senderState)
     {
         if (!clearFlags)
@@ -554,6 +564,7 @@ class Packet : public FastAlloc, public Printable
 
         flags.set(pkt->flags & (VALID_ADDR|VALID_SIZE|VALID_SRC|VALID_DST));
         flags.set(pkt->flags & STATIC_DATA);
+
     }
 
     /**
diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm
index b9d355736..ce16a8777 100644
--- a/src/mem/protocol/MOESI_hammer-cache.sm
+++ b/src/mem/protocol/MOESI_hammer-cache.sm
@@ -1285,7 +1285,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     vv_allocateL2CacheBlock;
     hp_copyFromTBEToL2;
     s_deallocateTBE;
-    ka_wakeUpAllDependents;
   }
 
   transition(I, Trigger_L2_to_L1D, IT) {
@@ -1566,7 +1565,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     k_popMandatoryQueue;
   }
 
-  transition({MM, M, MMR}, Flush_line, MM_F) {
+  transition({MM, M, MMR, MR}, Flush_line, MM_F) {
     i_allocateTBE;
     bf_issueGETF;
     p_decrementNumberOfMessagesByOne;
diff --git a/src/mem/ruby/buffers/MessageBuffer.cc b/src/mem/ruby/buffers/MessageBuffer.cc
index cab98cee9..9a7fdb61b 100644
--- a/src/mem/ruby/buffers/MessageBuffer.cc
+++ b/src/mem/ruby/buffers/MessageBuffer.cc
@@ -198,7 +198,11 @@ MessageBuffer::enqueue(MsgPtr message, Time delta)
                   m_last_arrival_time * g_eventQueue_ptr->getClock());
         }
     }
-    m_last_arrival_time = arrival_time;
+
+    // If running a cache trace, don't worry about the last arrival checks
+    if (!g_system_ptr->m_warmup_enabled) {
+        m_last_arrival_time = arrival_time;
+    }
 
     // compute the delay cycles and set enqueue time
     Message* msg_ptr = message.get();
diff --git a/src/mem/ruby/eventqueue/RubyEventQueue.hh b/src/mem/ruby/eventqueue/RubyEventQueue.hh
index 20b44362a..67fe6131b 100644
--- a/src/mem/ruby/eventqueue/RubyEventQueue.hh
+++ b/src/mem/ruby/eventqueue/RubyEventQueue.hh
@@ -58,7 +58,6 @@
 
 #include <iostream>
 
-#include "config/no_vector_bounds_checks.hh"
 #include "mem/ruby/common/TypeDefines.hh"
 #include "sim/eventq.hh"
 
@@ -77,9 +76,6 @@ class RubyEventQueue : public EventManager
     void scheduleEventAbsolute(Consumer* consumer, Time timeAbs);
     void print(std::ostream& out) const;
 
-    void triggerEvents(Time t) { assert(0); }
-    void triggerAllEvents() { assert(0); }
-
   private:
     // Private copy constructor and assignment operator
     RubyEventQueue(const RubyEventQueue& obj);
diff --git a/src/mem/ruby/recorder/CacheRecorder.cc b/src/mem/ruby/recorder/CacheRecorder.cc
index fc6ad0975..8b724859e 100644
--- a/src/mem/ruby/recorder/CacheRecorder.cc
+++ b/src/mem/ruby/recorder/CacheRecorder.cc
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
+ * Copyright (c) 2010 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,43 +27,154 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <algorithm>
-
-#include "mem/ruby/eventqueue/RubyEventQueue.hh"
+#include "debug/RubyCacheTrace.hh"
 #include "mem/ruby/recorder/CacheRecorder.hh"
-#include "gzstream.hh"
+#include "mem/ruby/system/Sequencer.hh"
+#include "mem/ruby/system/System.hh"
 
 using namespace std;
 
 void
-CacheRecorder::addRecord(Sequencer* sequencer, const Address& data_addr,
-    const Address& pc_addr, RubyRequestType type, Time time)
+TraceRecord::print(ostream& out) const
+{
+    out << "[TraceRecord: Node, " << m_cntrl_id << ", "
+        << m_data_address << ", " << m_pc_address << ", "
+        << m_type << ", Time: " << m_time << "]";
+}
+
+CacheRecorder::CacheRecorder()
+    : m_uncompressed_trace(NULL),
+      m_uncompressed_trace_size(0)
 {
-    TraceRecord rec(sequencer, data_addr, pc_addr, type, time);
-    m_records.push_back(rec);
 }
 
-int
-CacheRecorder::dumpRecords(string filename)
+CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace,
+                             uint64_t uncompressed_trace_size,
+                             std::vector<Sequencer*>& seq_map)
+    : m_uncompressed_trace(uncompressed_trace),
+      m_uncompressed_trace_size(uncompressed_trace_size),
+      m_seq_map(seq_map),  m_bytes_read(0), m_records_read(0),
+      m_records_flushed(0)
 {
-    ogzstream out(filename.c_str());
-    if (out.fail()) {
-        cout << "Error: error opening file '" << filename << "'" << endl;
-        return 0;
+}
+
+CacheRecorder::~CacheRecorder()
+{
+    if (m_uncompressed_trace != NULL) {
+        delete m_uncompressed_trace;
+        m_uncompressed_trace = NULL;
     }
+    m_seq_map.clear();
+}
 
-    std::sort(m_records.begin(), m_records.end(), greater<TraceRecord>());
+void
+CacheRecorder::enqueueNextFlushRequest()
+{
+    if (m_records_flushed < m_records.size()) {
+        TraceRecord* rec = m_records[m_records_flushed];
+        m_records_flushed++;
+        Request* req = new Request(rec->m_data_address,
+                                   RubySystem::getBlockSizeBytes(),0);
+        MemCmd::Command requestType = MemCmd::FlushReq;
+        Packet *pkt = new Packet(req, requestType, -1);
 
-    int size = m_records.size();
-    for (int i = 0; i < size; ++i)
-        m_records[i].output(out);
+        Sequencer* m_sequencer_ptr = m_seq_map[rec->m_cntrl_id];
+        assert(m_sequencer_ptr != NULL);
+        m_sequencer_ptr->makeRequest(pkt);
 
-    m_records.clear();
+        DPRINTF(RubyCacheTrace, "Flushing %s\n", *rec);
+    }
+}
+
+void
+CacheRecorder::enqueueNextFetchRequest()
+{
+    if (m_bytes_read < m_uncompressed_trace_size) {
+        TraceRecord* traceRecord = (TraceRecord*) (m_uncompressed_trace +
+                                                                m_bytes_read);
 
-    return size;
+        DPRINTF(RubyCacheTrace, "Issuing %s\n", *traceRecord);
+        Request* req = new Request();
+        MemCmd::Command requestType;
+
+        if (traceRecord->m_type == RubyRequestType_LD) {
+            requestType = MemCmd::ReadReq;
+            req->setPhys(traceRecord->m_data_address,
+                    RubySystem::getBlockSizeBytes(),0);
+        }   else if (traceRecord->m_type == RubyRequestType_IFETCH) {
+            requestType = MemCmd::ReadReq;
+            req->setPhys(traceRecord->m_data_address,
+                    RubySystem::getBlockSizeBytes(),
+                    Request::INST_FETCH);
+        }   else {
+            requestType = MemCmd::WriteReq;
+            req->setPhys(traceRecord->m_data_address,
+                    RubySystem::getBlockSizeBytes(),0);
+        }
+
+        Packet *pkt = new Packet(req, requestType, -1);
+        pkt->dataStatic(traceRecord->m_data);
+
+        Sequencer* m_sequencer_ptr = m_seq_map[traceRecord->m_cntrl_id];
+        assert(m_sequencer_ptr != NULL);
+        m_sequencer_ptr->makeRequest(pkt);
+
+        m_bytes_read += (sizeof(TraceRecord) +
+                RubySystem::getBlockSizeBytes());
+        m_records_read++;
+    }
 }
 
 void
-CacheRecorder::print(ostream& out) const
+CacheRecorder::addRecord(int cntrl, const physical_address_t data_addr,
+                         const physical_address_t pc_addr,
+                         RubyRequestType type, Time time, DataBlock& data)
+{
+    TraceRecord* rec = (TraceRecord*)malloc(sizeof(TraceRecord) +
+                                            RubySystem::getBlockSizeBytes());
+    rec->m_cntrl_id     = cntrl;
+    rec->m_time         = time;
+    rec->m_data_address = data_addr;
+    rec->m_pc_address   = pc_addr;
+    rec->m_type         = type;
+    memcpy(rec->m_data, data.getData(0, RubySystem::getBlockSizeBytes()),
+           RubySystem::getBlockSizeBytes());
+
+    m_records.push_back(rec);
+}
+
+uint64
+CacheRecorder::aggregateRecords(uint8_t** buf, uint64 total_size)
 {
+    std::sort(m_records.begin(), m_records.end(), compareTraceRecords);
+
+    int size = m_records.size();
+    uint64 current_size = 0;
+    int record_size = sizeof(TraceRecord) + RubySystem::getBlockSizeBytes();
+
+    for (int i = 0; i < size; ++i) {
+        // Determine if we need to expand the buffer size
+        if (current_size + record_size > total_size) {
+            uint8_t* new_buf = new (nothrow) uint8_t[total_size * 2];
+            if (new_buf == NULL) {
+                fatal("Unable to allocate buffer of size %s\n",
+                      total_size * 2);
+            }
+            total_size = total_size * 2;
+            uint8_t* old_buf = *buf;
+            memcpy(new_buf, old_buf, current_size);
+            *buf = new_buf;
+            delete [] old_buf;
+        }
+
+        // Copy the current record into the buffer
+        memcpy(&((*buf)[current_size]), m_records[i], record_size);
+        current_size += record_size;
+
+        free(m_records[i]);
+        m_records[i] = NULL;
+    }
+
+    m_records.clear();
+    return current_size;
 }
diff --git a/src/mem/ruby/recorder/CacheRecorder.hh b/src/mem/ruby/recorder/CacheRecorder.hh
index 9f96f4fa0..839c4f6b1 100644
--- a/src/mem/ruby/recorder/CacheRecorder.hh
+++ b/src/mem/ruby/recorder/CacheRecorder.hh
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
+ * Copyright (c) 2010 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -34,37 +35,90 @@
 #ifndef __MEM_RUBY_RECORDER_CACHERECORDER_HH__
 #define __MEM_RUBY_RECORDER_CACHERECORDER_HH__
 
-#include <iostream>
-#include <string>
 #include <vector>
 
+#include "base/hashmap.hh"
 #include "mem/protocol/RubyRequestType.hh"
-#include "mem/ruby/common/Global.hh"
-#include "mem/ruby/recorder/TraceRecord.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/DataBlock.hh"
+#include "mem/ruby/common/TypeDefines.hh"
 
-class Address;
-class TraceRecord;
 class Sequencer;
 
+/*!
+ * Class for recording cache contents. Note that the last element of the
+ * class is an array of length zero. It is used for creating variable
+ * length object, so that while writing the data to a file one does not
+ * need to copy the meta data and the actual data separately.
+ */
+class TraceRecord {
+  public:
+    int m_cntrl_id;
+    Time m_time;
+    physical_address_t m_data_address;
+    physical_address_t m_pc_address;
+    RubyRequestType m_type;
+    uint8_t m_data[0];
+
+    void print(std::ostream& out) const;
+};
+
 class CacheRecorder
 {
   public:
-    void addRecord(Sequencer* sequencer, const Address& data_addr,
-        const Address& pc_addr, RubyRequestType type, Time time);
-    int dumpRecords(std::string filename);
+    CacheRecorder();
+    ~CacheRecorder();
 
-    void print(std::ostream& out) const;
+    CacheRecorder(uint8_t* uncompressed_trace,
+                  uint64_t uncompressed_trace_size,
+                  std::vector<Sequencer*>& SequencerMap);
+    void addRecord(int cntrl, const physical_address_t data_addr,
+                   const physical_address_t pc_addr,  RubyRequestType type,
+                   Time time, DataBlock& data);
+
+    uint64 aggregateRecords(uint8_t** data, uint64 size);
+
+    /*!
+     * Function for flushing the memory contents of the caches to the
+     * main memory. It goes through the recorded contents of the caches,
+     * and issues flush requests. Except for the first one, a flush request
+     * is issued only after the previous one has completed. This currently
+     * requires use of MOESI Hammer protocol since only that protocol
+     * supports flush requests.
+     */
+    void enqueueNextFlushRequest();
+
+    /*!
+     * Function for fetching warming up the memory and the caches. It goes
+     * through the recorded contents of the caches, as available in the
+     * checkpoint and issues fetch requests. Except for the first one, a
+     * fetch request is issued only after the previous one has completed.
+     * It should be possible to use this with any protocol.
+     */
+    void enqueueNextFetchRequest();
 
   private:
     // Private copy constructor and assignment operator
     CacheRecorder(const CacheRecorder& obj);
     CacheRecorder& operator=(const CacheRecorder& obj);
 
-    std::vector<TraceRecord> m_records;
+    std::vector<TraceRecord*> m_records;
+    uint8_t* m_uncompressed_trace;
+    uint64_t m_uncompressed_trace_size;
+    std::vector<Sequencer*> m_seq_map;
+    uint64_t m_bytes_read;
+    uint64_t m_records_read;
+    uint64_t m_records_flushed;
 };
 
+inline bool
+compareTraceRecords(const TraceRecord* n1, const TraceRecord* n2)
+{
+    return n1->m_time > n2->m_time;
+}
+
 inline std::ostream&
-operator<<(std::ostream& out, const CacheRecorder& obj)
+operator<<(std::ostream& out, const TraceRecord& obj)
 {
     obj.print(out);
     out << std::flush;
diff --git a/src/mem/ruby/recorder/SConscript b/src/mem/ruby/recorder/SConscript
index 035f896a4..e1b3d78b7 100644
--- a/src/mem/ruby/recorder/SConscript
+++ b/src/mem/ruby/recorder/SConscript
@@ -33,8 +33,4 @@ Import('*')
 if env['PROTOCOL'] == 'None':
     Return()
 
-SimObject('Tracer.py')
-
 Source('CacheRecorder.cc')
-Source('Tracer.cc')
-Source('TraceRecord.cc', Werror=False)
diff --git a/src/mem/ruby/recorder/TraceRecord.cc b/src/mem/ruby/recorder/TraceRecord.cc
deleted file mode 100644
index 79186d33b..000000000
--- a/src/mem/ruby/recorder/TraceRecord.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "mem/protocol/RubyRequest.hh"
-#include "mem/ruby/recorder/TraceRecord.hh"
-#include "mem/ruby/system/Sequencer.hh"
-#include "mem/ruby/system/System.hh"
-#include "sim/sim_object.hh"
-
-using namespace std;
-
-TraceRecord::TraceRecord(Sequencer* _sequencer, const Address& data_addr,
-    const Address& pc_addr, RubyRequestType type, Time time)
-{
-    m_sequencer_ptr = _sequencer;
-    m_data_address = data_addr;
-    m_pc_address = pc_addr;
-    m_time = time;
-    m_type = type;
-
-    // Don't differentiate between store misses and atomic requests in
-    // the trace
-    if (m_type == RubyRequestType_Load_Linked) {
-        m_type = RubyRequestType_ST;
-    } else if (m_type == RubyRequestType_Store_Conditional) {
-        m_type = RubyRequestType_ST;
-    }
-}
-
-TraceRecord::TraceRecord(const TraceRecord& obj)
-{
-    // Call assignment operator
-    *this = obj;
-}
-
-TraceRecord&
-TraceRecord::operator=(const TraceRecord& obj)
-{
-    m_sequencer_ptr = obj.m_sequencer_ptr;
-    m_time = obj.m_time;
-    m_data_address = obj.m_data_address;
-    m_pc_address = obj.m_pc_address;
-    m_type = obj.m_type;
-    return *this;
-}
-
-void
-TraceRecord::issueRequest() const
-{
-    assert(m_sequencer_ptr != NULL);
-    Request req(m_data_address.getAddress(), 0, 0);
-    Packet *pkt = new Packet(&req, MemCmd(MemCmd::InvalidCmd), -1);
-
-    // Clear out the sequencer
-    while (!m_sequencer_ptr->empty()) {
-        g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 100);
-    }
-
-    m_sequencer_ptr->makeRequest(pkt);
-
-    // Clear out the sequencer
-    while (!m_sequencer_ptr->empty()) {
-        g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 100);
-    }
-}
-
-void
-TraceRecord::print(ostream& out) const
-{
-    out << "[TraceRecord: Node, " << m_sequencer_ptr->name() << ", "
-        << m_data_address << ", " << m_pc_address << ", "
-        << m_type << ", Time: " << m_time << "]";
-}
-
-void
-TraceRecord::output(ostream& out) const
-{
-    out << m_sequencer_ptr->name() << " ";
-    m_data_address.output(out);
-    out << " ";
-    m_pc_address.output(out);
-    out << " ";
-    out << m_type;
-    out << endl;
-}
-
-bool
-TraceRecord::input(istream& in)
-{
-    string sequencer_name;
-    in >> sequencer_name;
-
-    // The SimObject find function is slow and iterates through the
-    // simObjectList to find the sequencer pointer.  Therefore, expect
-    // trace playback to be slow.
-    m_sequencer_ptr = (Sequencer*)SimObject::find(sequencer_name.c_str());
-
-    m_data_address.input(in);
-    m_pc_address.input(in);
-    if (in.eof())
-        return false;
-
-    string type;
-    in >> type;
-    m_type = string_to_RubyRequestType(type);
-
-    // Ignore the rest of the line
-    char c = '\0';
-    while ((!in.eof()) && (c != '\n')) {
-        in.get(c);
-    }
-
-    return true;
-}
diff --git a/src/mem/ruby/recorder/TraceRecord.hh b/src/mem/ruby/recorder/TraceRecord.hh
deleted file mode 100644
index 42f213564..000000000
--- a/src/mem/ruby/recorder/TraceRecord.hh
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * A entry in the cache request record. It is aware of the ruby time
- * and can issue the request back to the cache.
- */
-
-#ifndef __MEM_RUBY_RECORDER_TRACERECORD_HH__
-#define __MEM_RUBY_RECORDER_TRACERECORD_HH__
-
-#include <iostream>
-
-#include "mem/ruby/common/Address.hh"
-#include "mem/ruby/common/Global.hh"
-#include "mem/ruby/system/Sequencer.hh"
-
-class CacheMsg;
-
-class TraceRecord
-{
-  public:
-    TraceRecord(Sequencer* _sequencer, const Address& data_addr,
-        const Address& pc_addr, RubyRequestType type, Time time);
-
-    TraceRecord()
-    {
-        m_sequencer_ptr = NULL;
-        m_time = 0;
-        m_type = RubyRequestType_NULL;
-    }
-
-    TraceRecord(const TraceRecord& obj);
-    TraceRecord& operator=(const TraceRecord& obj);
-
-    void issueRequest() const;
-
-    void print(std::ostream& out) const;
-    void output(std::ostream& out) const;
-    bool input(std::istream& in);
-
-  private:
-    friend bool operator>(const TraceRecord& n1, const TraceRecord& n2);
-
-    Sequencer* m_sequencer_ptr;
-    Time m_time;
-    Address m_data_address;
-    Address m_pc_address;
-    RubyRequestType m_type;
-};
-
-inline bool
-operator>(const TraceRecord& n1, const TraceRecord& n2)
-{
-    return n1.m_time > n2.m_time;
-}
-
-inline std::ostream&
-operator<<(std::ostream& out, const TraceRecord& obj)
-{
-    obj.print(out);
-    out << std::flush;
-    return out;
-}
-
-#endif // __MEM_RUBY_RECORDER_TRACERECORD_HH__
diff --git a/src/mem/ruby/recorder/Tracer.cc b/src/mem/ruby/recorder/Tracer.cc
deleted file mode 100644
index fcfe5338c..000000000
--- a/src/mem/ruby/recorder/Tracer.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "base/cprintf.hh"
-#include "mem/ruby/eventqueue/RubyEventQueue.hh"
-#include "mem/ruby/recorder/TraceRecord.hh"
-#include "mem/ruby/recorder/Tracer.hh"
-#include "mem/ruby/system/System.hh"
-
-using namespace std;
-
-Tracer::Tracer(const Params *p)
-    : SimObject(p)
-{
-    m_enabled = false;
-    m_warmup_length = p->warmup_length;
-    assert(m_warmup_length  > 0);
-    p->ruby_system->registerTracer(this);
-}
-
-void
-Tracer::startTrace(string filename)
-{
-    if (m_enabled)
-        stopTrace();
-
-    if (filename != "") {
-        m_trace_file.open(filename.c_str());
-        if (m_trace_file.fail()) {
-            cprintf("Error: error opening file '%s'\n", filename);
-            cprintf("Trace not enabled.\n");
-            return;
-        }
-        cprintf("Request trace enabled to output file '%s'\n", filename);
-        m_enabled = true;
-    }
-}
-
-void
-Tracer::stopTrace()
-{
-    if (m_enabled) {
-        m_trace_file.close();
-        cout << "Request trace file closed." << endl;
-        m_enabled = false;
-    }
-}
-
-void
-Tracer::traceRequest(Sequencer* sequencer, const Address& data_addr,
-    const Address& pc_addr, RubyRequestType type, Time time)
-{
-    assert(m_enabled);
-    TraceRecord tr(sequencer, data_addr, pc_addr, type, time);
-    tr.output(m_trace_file);
-}
-
-int
-Tracer::playbackTrace(string filename)
-{
-    igzstream in(filename.c_str());
-    if (in.fail()) {
-        cprintf("Error: error opening file '%s'\n", filename);
-        return 0;
-    }
-
-    time_t start_time = time(NULL);
-
-    TraceRecord record;
-    int counter = 0;
-    // Read in the next TraceRecord
-    bool ok = record.input(in);
-    while (ok) {
-        // Put it in the right cache
-        record.issueRequest();
-        counter++;
-
-        // Read in the next TraceRecord
-        ok = record.input(in);
-
-        // Clear the statistics after warmup
-        if (counter == m_warmup_length) {
-            cprintf("Clearing stats after warmup of length %s\n",
-                    m_warmup_length);
-            g_system_ptr->clearStats();
-        }
-    }
-
-    // Flush the prefetches through the system
-    // FIXME - should be smarter
-    g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 1000);
-
-    time_t stop_time = time(NULL);
-    double seconds = difftime(stop_time, start_time);
-    double minutes = seconds / 60.0;
-    cout << "playbackTrace: " << minutes << " minutes" << endl;
-
-    return counter;
-}
-
-void
-Tracer::print(ostream& out) const
-{
-}
-
-Tracer *
-RubyTracerParams::create()
-{
-    return new Tracer(this);
-}
diff --git a/src/mem/ruby/recorder/Tracer.hh b/src/mem/ruby/recorder/Tracer.hh
deleted file mode 100644
index cad47b28c..000000000
--- a/src/mem/ruby/recorder/Tracer.hh
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Controller class of the tracer. Can stop/start/playback the ruby
- * cache requests trace.
- */
-
-#ifndef __MEM_RUBY_RECORDER_TRACER_HH__
-#define __MEM_RUBY_RECORDER_TRACER_HH__
-
-#include <iostream>
-#include <string>
-
-#include "mem/protocol/RubyRequestType.hh"
-#include "mem/ruby/common/Global.hh"
-#include "params/RubyTracer.hh"
-#include "sim/sim_object.hh"
-#include "gzstream.hh"
-
-class Address;
-class TraceRecord;
-class Sequencer;
-
-class Tracer : public SimObject
-{
-  public:
-    typedef RubyTracerParams Params;
-    Tracer(const Params *p);
-
-    void startTrace(std::string filename);
-    void stopTrace();
-    bool traceEnabled() { return m_enabled; }
-    void traceRequest(Sequencer* sequencer, const Address& data_addr,
-        const Address& pc_addr, RubyRequestType type, Time time);
-
-    void print(std::ostream& out) const;
-
-    int playbackTrace(std::string filename);
-
-  private:
-    // Private copy constructor and assignment operator
-    Tracer(const Tracer& obj);
-    Tracer& operator=(const Tracer& obj);
-
-    ogzstream m_trace_file;
-    bool m_enabled;
-
-    //added by SS
-    int m_warmup_length;
-};
-
-inline std::ostream&
-operator<<(std::ostream& out, const Tracer& obj)
-{
-    obj.print(out);
-    out << std::flush;
-    return out;
-}
-
-#endif // __MEM_RUBY_RECORDER_TRACER_HH__
diff --git a/src/mem/ruby/recorder/Tracer.py b/src/mem/ruby/recorder/Tracer.py
deleted file mode 100644
index 7a689f9f7..000000000
--- a/src/mem/ruby/recorder/Tracer.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2009 Advanced Micro Devices, Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# Authors: Steve Reinhardt
-#          Brad Beckmann
-
-from m5.params import *
-from m5.SimObject import SimObject
-
-class RubyTracer(SimObject):
-    type = 'RubyTracer'
-    cxx_class = 'Tracer'
-    warmup_length = Param.Int(100000, "")
-    ruby_system = Param.RubySystem("")
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index ca37a90de..a0e3b3fbb 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -33,12 +33,11 @@
 #include <string>
 
 #include "mem/protocol/AccessPermission.hh"
-#include "mem/protocol/MachineType.hh"
 #include "mem/ruby/common/Address.hh"
 #include "mem/ruby/common/Consumer.hh"
 #include "mem/ruby/common/DataBlock.hh"
 #include "mem/ruby/network/Network.hh"
-#include "mem/ruby/system/System.hh"
+#include "mem/ruby/recorder/CacheRecorder.hh"
 #include "params/RubyController.hh"
 #include "sim/sim_object.hh"
 
@@ -68,6 +67,8 @@ class AbstractController : public SimObject, public Consumer
     virtual void wakeup() = 0;
     //  virtual void dumpStats(std::ostream & out) = 0;
     virtual void clearStats() = 0;
+    virtual void recordCacheTrace(int cntrl, CacheRecorder* tr) = 0;
+    virtual Sequencer* getSequencer() const = 0;
 };
 
 #endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__
diff --git a/src/mem/ruby/system/CacheMemory.cc b/src/mem/ruby/system/CacheMemory.cc
index 1564128d3..9f1fe6320 100644
--- a/src/mem/ruby/system/CacheMemory.cc
+++ b/src/mem/ruby/system/CacheMemory.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -28,7 +28,9 @@
 
 #include "base/intmath.hh"
 #include "debug/RubyCache.hh"
+#include "mem/protocol/AccessPermission.hh"
 #include "mem/ruby/system/CacheMemory.hh"
+#include "mem/ruby/system/System.hh"
 
 using namespace std;
 
@@ -364,31 +366,42 @@ CacheMemory::profileGenericRequest(GenericRequestType requestType,
 }
 
 void
-CacheMemory::recordCacheContents(CacheRecorder& tr) const
+CacheMemory::recordCacheContents(int cntrl, CacheRecorder* tr) const
 {
+    uint64 warmedUpBlocks = 0;
+    uint64 totalBlocks M5_VAR_USED = (uint64)m_cache_num_sets
+                                                  * (uint64)m_cache_assoc;
+
     for (int i = 0; i < m_cache_num_sets; i++) {
         for (int j = 0; j < m_cache_assoc; j++) {
-            AccessPermission perm = m_cache[i][j]->m_Permission;
-            RubyRequestType request_type = RubyRequestType_NULL;
-            if (perm == AccessPermission_Read_Only) {
-                if (m_is_instruction_only_cache) {
-                    request_type = RubyRequestType_IFETCH;
-                } else {
-                    request_type = RubyRequestType_LD;
+            if (m_cache[i][j] != NULL) {
+                AccessPermission perm = m_cache[i][j]->m_Permission;
+                RubyRequestType request_type = RubyRequestType_NULL;
+                if (perm == AccessPermission_Read_Only) {
+                    if (m_is_instruction_only_cache) {
+                        request_type = RubyRequestType_IFETCH;
+                    } else {
+                        request_type = RubyRequestType_LD;
+                    }
+                } else if (perm == AccessPermission_Read_Write) {
+                    request_type = RubyRequestType_ST;
                 }
-            } else if (perm == AccessPermission_Read_Write) {
-                request_type = RubyRequestType_ST;
-            }
 
-            if (request_type != RubyRequestType_NULL) {
-#if 0
-                tr.addRecord(m_chip_ptr->getID(), m_cache[i][j].m_Address,
-                             Address(0), request_type,
-                             m_replacementPolicy_ptr->getLastAccess(i, j));
-#endif
+                if (request_type != RubyRequestType_NULL) {
+                    tr->addRecord(cntrl, m_cache[i][j]->m_Address.getAddress(),
+                                  0, request_type,
+                                  m_replacementPolicy_ptr->getLastAccess(i, j),
+                                  m_cache[i][j]->getDataBlk());
+                    warmedUpBlocks++;
+                }
             }
         }
     }
+
+    DPRINTF(RubyCache, "%s: %lli blocks of %lli total blocks"
+            "recorded %.2f%% \n", name().c_str(), warmedUpBlocks,
+            (uint64)m_cache_num_sets * (uint64)m_cache_assoc,
+            (float(warmedUpBlocks)/float(totalBlocks))*100.0);
 }
 
 void
diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh
index f0acba9cb..f270e88cd 100644
--- a/src/mem/ruby/system/CacheMemory.hh
+++ b/src/mem/ruby/system/CacheMemory.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -34,21 +34,15 @@
 #include <vector>
 
 #include "base/hashmap.hh"
-#include "mem/protocol/AccessPermission.hh"
 #include "mem/protocol/GenericRequestType.hh"
 #include "mem/protocol/RubyRequest.hh"
-#include "mem/protocol/RubyRequestType.hh"
-#include "mem/ruby/common/Address.hh"
 #include "mem/ruby/common/DataBlock.hh"
-#include "mem/ruby/common/Global.hh"
 #include "mem/ruby/profiler/CacheProfiler.hh"
 #include "mem/ruby/recorder/CacheRecorder.hh"
 #include "mem/ruby/slicc_interface/AbstractCacheEntry.hh"
-#include "mem/ruby/slicc_interface/AbstractController.hh"
 #include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
 #include "mem/ruby/system/LRUPolicy.hh"
 #include "mem/ruby/system/PseudoLRUPolicy.hh"
-#include "mem/ruby/system/System.hh"
 #include "params/RubyCache.hh"
 #include "sim/sim_object.hh"
 
@@ -100,12 +94,7 @@ class CacheMemory : public SimObject
     int getLatency() const { return m_latency; }
 
     // Hook for checkpointing the contents of the cache
-    void recordCacheContents(CacheRecorder& tr) const;
-    void
-    setAsInstructionCache(bool is_icache)
-    {
-        m_is_instruction_only_cache = is_icache;
-    }
+    void recordCacheContents(int cntrl, CacheRecorder* tr) const;
 
     // Set this address to most recently used
     void setMRU(const Address& address);
@@ -146,7 +135,6 @@ class CacheMemory : public SimObject
 
     // Data Members (m_prefix)
     bool m_is_instruction_only_cache;
-    bool m_is_data_only_cache;
 
     // The first index is the # of cache lines.
     // The second index is the the amount associativity.
diff --git a/src/mem/ruby/system/DMASequencer.hh b/src/mem/ruby/system/DMASequencer.hh
index 5f6b9f100..099c1d991 100644
--- a/src/mem/ruby/system/DMASequencer.hh
+++ b/src/mem/ruby/system/DMASequencer.hh
@@ -55,6 +55,9 @@ class DMASequencer : public RubyPort
     /* external interface */
     RequestStatus makeRequest(PacketPtr pkt);
     bool busy() { return m_is_busy;}
+    int outstandingCount() const { return (m_is_busy ? 1 : 0); }
+    bool isDeadlockEventScheduled() const { return false; }
+    void descheduleDeadlockEvent() {}
 
     /* SLICC callback */
     void dataCallback(const DataBlock & dblk);
diff --git a/src/mem/ruby/system/DirectoryMemory.cc b/src/mem/ruby/system/DirectoryMemory.cc
index 03aa68919..d2e00ab3b 100644
--- a/src/mem/ruby/system/DirectoryMemory.cc
+++ b/src/mem/ruby/system/DirectoryMemory.cc
@@ -58,6 +58,7 @@ DirectoryMemory::init()
 
     if (m_use_map) {
         m_sparseMemory = new SparseMemory(m_map_levels);
+        g_system_ptr->registerSparseMemory(m_sparseMemory);
     } else {
         m_entries = new AbstractEntry*[m_num_entries];
         for (int i = 0; i < m_num_entries; i++)
diff --git a/src/mem/ruby/system/MemoryVector.hh b/src/mem/ruby/system/MemoryVector.hh
index 6719b9fb6..9bd3516c2 100644
--- a/src/mem/ruby/system/MemoryVector.hh
+++ b/src/mem/ruby/system/MemoryVector.hh
@@ -29,6 +29,7 @@
 #ifndef __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__
 #define __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__
 
+#include "base/trace.hh"
 #include "mem/ruby/common/Address.hh"
 
 class DirectoryMemory;
@@ -48,6 +49,8 @@ class MemoryVector
 
     void write(const Address & paddr, uint8* data, int len);
     uint8* read(const Address & paddr, uint8* data, int len);
+    uint32 collatePages(uint8* &raw_data);
+    void populatePages(uint8* raw_data);
 
   private:
     uint8* getBlockPtr(const PhysAddress & addr);
@@ -56,6 +59,7 @@ class MemoryVector
     uint8** m_pages;
     uint32 m_num_pages;
     const uint32 m_page_offset_mask;
+    static const uint32 PAGE_SIZE = 4096;
 };
 
 inline
@@ -97,7 +101,7 @@ MemoryVector::resize(uint32 size)
         delete [] m_pages;
     }
     m_size = size;
-    assert(size%4096 == 0);
+    assert(size%PAGE_SIZE == 0);
     m_num_pages = size >> 12;
     m_pages = new uint8*[m_num_pages];
     memset(m_pages, 0, m_num_pages * sizeof(uint8*));
@@ -118,8 +122,8 @@ MemoryVector::write(const Address & paddr, uint8* data, int len)
         }
         if (all_zeros)
             return;
-        m_pages[page_num] = new uint8[4096];
-        memset(m_pages[page_num], 0, 4096);
+        m_pages[page_num] = new uint8[PAGE_SIZE];
+        memset(m_pages[page_num], 0, PAGE_SIZE);
         uint32 offset = paddr.getAddress() & m_page_offset_mask;
         memcpy(&m_pages[page_num][offset], data, len);
     } else {
@@ -147,10 +151,82 @@ MemoryVector::getBlockPtr(const PhysAddress & paddr)
 {
     uint32 page_num = paddr.getAddress() >> 12;
     if (m_pages[page_num] == 0) {
-        m_pages[page_num] = new uint8[4096];
-        memset(m_pages[page_num], 0, 4096);
+        m_pages[page_num] = new uint8[PAGE_SIZE];
+        memset(m_pages[page_num], 0, PAGE_SIZE);
     }
     return &m_pages[page_num][paddr.getAddress()&m_page_offset_mask];
 }
 
+/*!
+ * Function for collating all the pages of the physical memory together.
+ * In case a pointer for a page is NULL, this page needs only a single byte
+ * to represent that the pointer is NULL. Otherwise, it needs 1 + PAGE_SIZE
+ * bytes. The first represents that the page pointer is not NULL, and rest of
+ * the bytes represent the data on the page.
+ */
+
+inline uint32
+MemoryVector::collatePages(uint8* &raw_data)
+{
+    uint32 num_zero_pages = 0;
+    uint32 data_size = 0;
+
+    for (uint32 i = 0;i < m_num_pages; ++i)
+    {
+        if (m_pages[i] == 0) num_zero_pages++;
+    }
+
+    raw_data = new uint8[  sizeof(uint32) /* number of pages*/
+                         + m_num_pages /* whether the page is all zeros */
+                         + PAGE_SIZE * (m_num_pages - num_zero_pages)];
+
+    /* Write the number of pages to be stored. */
+    memcpy(raw_data, &m_num_pages, sizeof(uint32));
+    data_size = sizeof(uint32);
+
+    for (uint32 i = 0;i < m_num_pages; ++i)
+    {
+        if (m_pages[i] == 0) {
+            raw_data[data_size] = 0;
+        } else {
+            raw_data[data_size] = 1;
+            memcpy(raw_data + data_size + 1, m_pages[i], PAGE_SIZE);
+            data_size += PAGE_SIZE;
+        }
+        data_size += 1;
+    }
+
+    return data_size;
+}
+
+/*!
+ * Function for populating the pages of the memory using the available raw
+ * data. Each page has a byte associate with it, which represents whether the
+ * page was NULL or not, when all the pages were collated. The function assumes
+ * that the number of pages in the memory are same as those that were recorded
+ * in the checkpoint.
+ */
+inline void
+MemoryVector::populatePages(uint8* raw_data)
+{
+    uint32 data_size = 0;
+    uint32 num_pages = 0;
+
+    /* Read the number of pages that were stored. */
+    memcpy(&num_pages, raw_data, sizeof(uint32));
+    data_size = sizeof(uint32);
+    assert(num_pages == m_num_pages);
+
+    for (uint32 i = 0;i < m_num_pages; ++i)
+    {
+        assert(m_pages[i] == 0);
+        if (raw_data[data_size] != 0) {
+            m_pages[i] = new uint8[PAGE_SIZE];
+            memcpy(m_pages[i], raw_data + data_size + 1, PAGE_SIZE);
+            data_size += PAGE_SIZE;
+        }
+        data_size += 1;
+    }
+}
+
 #endif // __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__
diff --git a/src/mem/ruby/system/PerfectCacheMemory.hh b/src/mem/ruby/system/PerfectCacheMemory.hh
index 772b3d1f9..b880b6434 100644
--- a/src/mem/ruby/system/PerfectCacheMemory.hh
+++ b/src/mem/ruby/system/PerfectCacheMemory.hh
@@ -32,7 +32,6 @@
 #include "base/hashmap.hh"
 #include "mem/protocol/AccessPermission.hh"
 #include "mem/ruby/common/Address.hh"
-#include "mem/ruby/common/Global.hh"
 
 template<class ENTRY>
 struct PerfectCacheLineState
@@ -57,10 +56,6 @@ class PerfectCacheMemory
 
     static void printConfig(std::ostream& out);
 
-    // perform a cache access and see if we hit or not.  Return true
-    // on a hit.
-    bool tryCacheAccess(const CacheMsg& msg, bool& block_stc, ENTRY*& entry);
-
     // tests to see if an address is present in the cache
     bool isTagPresent(const Address& address) const;
 
@@ -118,15 +113,6 @@ PerfectCacheMemory<ENTRY>::printConfig(std::ostream& out)
 {
 }
 
-template<class ENTRY>
-inline bool
-PerfectCacheMemory<ENTRY>::tryCacheAccess(const CacheMsg& msg,
-                                          bool& block_stc, ENTRY*& entry)
-{
-    panic("not implemented");
-    return true;
-}
-
 // tests to see if an address is present in the cache
 template<class ENTRY>
 inline bool
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index f7bde739e..64faf6aed 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -27,11 +27,11 @@
  */
 
 #include "cpu/testers/rubytest/RubyTester.hh"
+#include "debug/Config.hh"
 #include "debug/Ruby.hh"
 #include "mem/protocol/AccessPermission.hh"
 #include "mem/ruby/slicc_interface/AbstractController.hh"
 #include "mem/ruby/system/RubyPort.hh"
-#include "mem/physical.hh"
 
 RubyPort::RubyPort(const Params *p)
     : MemObject(p)
@@ -51,6 +51,8 @@ RubyPort::RubyPort(const Params *p)
     m_usingRubyTester = p->using_ruby_tester;
     access_phys_mem = p->access_phys_mem;
 
+    drainEvent = NULL;
+
     ruby_system = p->ruby_system;
     waitingOnSequencer = false;
 }
@@ -66,8 +68,10 @@ Port *
 RubyPort::getPort(const std::string &if_name, int idx)
 {
     if (if_name == "port") {
-        return new M5Port(csprintf("%s-port%d", name(), idx), this,
-                          ruby_system, access_phys_mem);
+        M5Port* cpuPort = new M5Port(csprintf("%s-port%d", name(), idx),
+                                     this, ruby_system, access_phys_mem);
+        cpu_ports.push_back(cpuPort);
+        return cpuPort;
     }
 
     if (if_name == "pio_port") {
@@ -508,6 +512,82 @@ RubyPort::ruby_hit_callback(PacketPtr pkt)
             (*i)->sendRetry();
         }
     }
+
+    testDrainComplete();
+}
+
+void
+RubyPort::testDrainComplete()
+{
+    //If we weren't able to drain before, we might be able to now.
+    if (drainEvent != NULL) {
+        unsigned int drainCount = getDrainCount(drainEvent);
+        DPRINTF(Config, "Drain count: %u\n", drainCount);
+        if (drainCount == 0) {
+            drainEvent->process();
+            // Clear the drain event once we're done with it.
+            drainEvent = NULL;
+        }
+    }
+}
+
+unsigned int
+RubyPort::getDrainCount(Event *de)
+{
+    int count = 0;
+    //
+    // If the sequencer is not empty, then requests need to drain.
+    // The outstandingCount is the number of requests outstanding and thus the
+    // number of times M5's timing port will process the drain event.
+    //
+    count += outstandingCount();
+
+    DPRINTF(Config, "outstanding count %d\n", outstandingCount());
+
+    // To simplify the draining process, the sequencer's deadlock detection
+    // event should have been descheduled.
+    assert(isDeadlockEventScheduled() == false);
+
+    if (pio_port != NULL) {
+        count += pio_port->drain(de);
+        DPRINTF(Config, "count after pio check %d\n", count);
+    }
+    if (physMemPort != NULL) {
+        count += physMemPort->drain(de);
+        DPRINTF(Config, "count after physmem check %d\n", count);
+    }
+
+    for (CpuPortIter p_iter = cpu_ports.begin(); p_iter != cpu_ports.end();
+         p_iter++) {
+        M5Port* cpu_port = *p_iter;
+        count += cpu_port->drain(de);
+        DPRINTF(Config, "count after cpu port check %d\n", count);
+    }
+
+    DPRINTF(Config, "final count %d\n", count);
+
+    return count;
+}
+
+unsigned int
+RubyPort::drain(Event *de)
+{
+    if (isDeadlockEventScheduled()) {
+        descheduleDeadlockEvent();
+    }
+
+    int count = getDrainCount(de);
+
+    // Set status
+    if (count != 0) {
+        drainEvent = de;
+
+        changeState(SimObject::Draining);
+        return count;
+    }
+
+    changeState(SimObject::Drained);
+    return 0;
 }
 
 void
diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh
index 88e865766..d8dbe0cda 100644
--- a/src/mem/ruby/system/RubyPort.hh
+++ b/src/mem/ruby/system/RubyPort.hh
@@ -33,7 +33,6 @@
 #include <string>
 
 #include "mem/protocol/RequestStatus.hh"
-#include "mem/ruby/slicc_interface/RubyRequest.hh"
 #include "mem/ruby/system/System.hh"
 #include "mem/mem_object.hh"
 #include "mem/physical.hh"
@@ -115,17 +114,23 @@ class RubyPort : public MemObject
     Port *getPort(const std::string &if_name, int idx);
 
     virtual RequestStatus makeRequest(PacketPtr pkt) = 0;
+    virtual int outstandingCount() const = 0;
+    virtual bool isDeadlockEventScheduled() const = 0;
+    virtual void descheduleDeadlockEvent() = 0;
 
     //
     // Called by the controller to give the sequencer a pointer.
     // A pointer to the controller is needed for atomic support.
     //
     void setController(AbstractController* _cntrl) { m_controller = _cntrl; }
+    int getId() { return m_version; }
+    unsigned int drain(Event *de);
 
   protected:
     const std::string m_name;
     void ruby_hit_callback(PacketPtr pkt);
     void hit(PacketPtr pkt);
+    void testDrainComplete();
 
     int m_version;
     AbstractController* m_controller;
@@ -143,11 +148,19 @@ class RubyPort : public MemObject
         }
     }
 
+    unsigned int getDrainCount(Event *de);
+
     uint16_t m_port_id;
     uint64_t m_request_cnt;
 
     M5Port* physMemPort;
 
+    /*! Vector of CPU Port attached to this Ruby port. */
+    typedef std::vector<M5Port*>::iterator CpuPortIter;
+    std::vector<M5Port*> cpu_ports;
+
+    Event *drainEvent;
+
     PhysicalMemory* physmem;
     RubySystem* ruby_system;
 
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 7137dcc28..3f9ceb34d 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -40,9 +40,7 @@
 #include "mem/protocol/RubyAccessMode.hh"
 #include "mem/ruby/buffers/MessageBuffer.hh"
 #include "mem/ruby/common/Global.hh"
-#include "mem/ruby/common/SubBlock.hh"
 #include "mem/ruby/profiler/Profiler.hh"
-#include "mem/ruby/recorder/Tracer.hh"
 #include "mem/ruby/slicc_interface/RubyRequest.hh"
 #include "mem/ruby/system/CacheMemory.hh"
 #include "mem/ruby/system/Sequencer.hh"
@@ -521,7 +519,11 @@ Sequencer::hitCallback(SequencerRequest* srequest,
     }
 
     // update the data
-    if (pkt->getPtr<uint8_t>(true) != NULL) {
+    if (g_system_ptr->m_warmup_enabled) {
+        assert(pkt->getPtr<uint8_t>(false) != NULL);
+        data.setData(pkt->getPtr<uint8_t>(false),
+                     request_address.getOffset(), pkt->getSize());
+    } else if (pkt->getPtr<uint8_t>(true) != NULL) {
         if ((type == RubyRequestType_LD) ||
             (type == RubyRequestType_IFETCH) ||
             (type == RubyRequestType_RMW_Read) ||
@@ -553,8 +555,17 @@ Sequencer::hitCallback(SequencerRequest* srequest,
         testerSenderState->subBlock->mergeFrom(data);
     }
 
-    ruby_hit_callback(pkt);
     delete srequest;
+
+    if (g_system_ptr->m_warmup_enabled) {
+        delete pkt;
+        g_system_ptr->m_cache_recorder->enqueueNextFetchRequest();
+    } else if (g_system_ptr->m_cooldown_enabled) {
+        delete pkt;
+        g_system_ptr->m_cache_recorder->enqueueNextFlushRequest();
+    } else {
+        ruby_hit_callback(pkt);
+    }
 }
 
 bool
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index 7c2d0af13..4a6d46c01 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -39,8 +39,6 @@
 #include "mem/ruby/system/RubyPort.hh"
 
 class DataBlock;
-class CacheMsg;
-class MachineID;
 class CacheMemory;
 
 class RubySequencerParams;
@@ -100,6 +98,18 @@ class Sequencer : public RubyPort, public Consumer
 
     RequestStatus makeRequest(PacketPtr pkt);
     bool empty() const;
+    int outstandingCount() const { return m_outstanding_count; }
+    bool
+    isDeadlockEventScheduled() const
+    {
+        return deadlockCheckEvent.scheduled();
+    }
+
+    void
+    descheduleDeadlockEvent()
+    {
+        deschedule(deadlockCheckEvent);
+    }
 
     void print(std::ostream& out) const;
     void printStats(std::ostream& out) const;
diff --git a/src/mem/ruby/system/SparseMemory.cc b/src/mem/ruby/system/SparseMemory.cc
index 8e4f37c46..db8d494f8 100644
--- a/src/mem/ruby/system/SparseMemory.cc
+++ b/src/mem/ruby/system/SparseMemory.cc
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2009 Advanced Micro Devices, Inc.
+ * Copyright (c) 2012 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,6 +27,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <queue>
+
 #include "debug/RubyCache.hh"
 #include "mem/ruby/system/SparseMemory.hh"
 #include "mem/ruby/system/System.hh"
@@ -82,19 +85,19 @@ SparseMemory::recursivelyRemoveTables(SparseMapType* curTable, int curLevel)
     SparseMapType::iterator iter;
 
     for (iter = curTable->begin(); iter != curTable->end(); iter++) {
-        SparseMemEntry* entryStruct = &((*iter).second);
+        SparseMemEntry entry = (*iter).second;
 
         if (curLevel != (m_number_of_levels - 1)) {
             // If the not at the last level, analyze those lower level
             // tables first, then delete those next tables
-            SparseMapType* nextTable = (SparseMapType*)(entryStruct->entry);
+            SparseMapType* nextTable = (SparseMapType*)(entry);
             recursivelyRemoveTables(nextTable, (curLevel + 1));
             delete nextTable;
         } else {
             // If at the last level, delete the directory entry
-            delete (AbstractEntry*)(entryStruct->entry);
+            delete (AbstractEntry*)(entry);
         }
-        entryStruct->entry = NULL;
+        entry = NULL;
     }
 
     // Once all entries have been deleted, erase the entries
@@ -134,7 +137,7 @@ SparseMemory::exist(const Address& address) const
         // If the address is found, move on to the next level.
         // Otherwise, return not found
         if (curTable->count(curAddress) != 0) {
-            curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
+            curTable = (SparseMapType*)((*curTable)[curAddress]);
         } else {
             DPRINTF(RubyCache, "Not found\n");
             return false;
@@ -156,7 +159,6 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
 
     Address curAddress;
     SparseMapType* curTable = m_map_head;
-    SparseMemEntry* entryStruct = NULL;
 
     // Initiallize the high bit to be the total number of bits plus
     // the block offset.  However the highest bit index is one less
@@ -179,7 +181,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
         // if the address exists in the cur table, move on.  Otherwise
         // create a new table.
         if (curTable->count(curAddress) != 0) {
-            curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
+            curTable = (SparseMapType*)((*curTable)[curAddress]);
         } else {
             m_adds_per_level[level]++;
 
@@ -194,9 +196,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
 
             // Create the pointer container SparseMemEntry and add it
             // to the table.
-            entryStruct = new SparseMemEntry;
-            entryStruct->entry = newEntry;
-            (*curTable)[curAddress] = *entryStruct;
+            (*curTable)[curAddress] = newEntry;
 
             // Move to the next level of the heirarchy
             curTable = (SparseMapType*)newEntry;
@@ -215,7 +215,7 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
 {
     Address curAddress;
     CurNextInfo nextInfo;
-    SparseMemEntry* entryStruct;
+    SparseMemEntry entry;
 
     // create the appropriate address for this level
     // Note: that set Address is inclusive of the specified range,
@@ -231,11 +231,11 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
 
     assert(curInfo.curTable->count(curAddress) != 0);
 
-    entryStruct = &((*(curInfo.curTable))[curAddress]);
+    entry = (*(curInfo.curTable))[curAddress];
 
     if (curInfo.level < (m_number_of_levels - 1)) {
         // set up next level's info
-        nextInfo.curTable = (SparseMapType*)(entryStruct->entry);
+        nextInfo.curTable = (SparseMapType*)(entry);
         nextInfo.level = curInfo.level + 1;
 
         nextInfo.highBit = curInfo.highBit -
@@ -252,15 +252,15 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
         if (tableSize == 0) {
             m_removes_per_level[curInfo.level]++;
             delete nextInfo.curTable;
-            entryStruct->entry = NULL;
+            entry = NULL;
             curInfo.curTable->erase(curAddress);
         }
     } else {
         // if this is the last level, we have reached the Directory
         // Entry and thus we should delete it including the
         // SparseMemEntry container struct.
-        delete (AbstractEntry*)(entryStruct->entry);
-        entryStruct->entry = NULL;
+        delete (AbstractEntry*)(entry);
+        entry = NULL;
         curInfo.curTable->erase(curAddress);
         m_removes_per_level[curInfo.level]++;
     }
@@ -331,7 +331,7 @@ SparseMemory::lookup(const Address& address)
         // If the address is found, move on to the next level.
         // Otherwise, return not found
         if (curTable->count(curAddress) != 0) {
-            curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
+            curTable = (SparseMapType*)((*curTable)[curAddress]);
         } else {
             DPRINTF(RubyCache, "Not found\n");
             return NULL;
@@ -345,6 +345,70 @@ SparseMemory::lookup(const Address& address)
 }
 
 void
+SparseMemory::recordBlocks(int cntrl_id, CacheRecorder* tr) const
+{
+    queue<SparseMapType*> unexplored_nodes[2];
+    queue<physical_address_t> address_of_nodes[2];
+
+    unexplored_nodes[0].push(m_map_head);
+    address_of_nodes[0].push(0);
+
+    int parity_of_level = 0;
+    physical_address_t address, temp_address;
+    Address curAddress;
+
+    // Initiallize the high bit to be the total number of bits plus
+    // the block offset.  However the highest bit index is one less
+    // than this value.
+    int highBit = m_total_number_of_bits + RubySystem::getBlockSizeBits();
+    int lowBit;
+
+    for (int cur_level = 0; cur_level < m_number_of_levels; cur_level++) {
+
+        // create the appropriate address for this level
+        // Note: that set Address is inclusive of the specified range,
+        // thus the high bit is one less than the total number of bits
+        // used to create the address.
+        lowBit = highBit - m_number_of_bits_per_level[cur_level];
+
+        while (!unexplored_nodes[parity_of_level].empty()) {
+
+            SparseMapType* node = unexplored_nodes[parity_of_level].front();
+            unexplored_nodes[parity_of_level].pop();
+
+            address = address_of_nodes[parity_of_level].front();
+            address_of_nodes[parity_of_level].pop();
+
+            SparseMapType::iterator iter;
+
+            for (iter = node->begin(); iter != node->end(); iter++) {
+                SparseMemEntry entry = (*iter).second;
+                curAddress = (*iter).first;
+
+                if (cur_level != (m_number_of_levels - 1)) {
+                    // If not at the last level, put this node in the queue
+                    unexplored_nodes[1 - parity_of_level].push(
+                                                     (SparseMapType*)(entry));
+                    address_of_nodes[1 - parity_of_level].push(address |
+                                         (curAddress.getAddress() << lowBit));
+                } else {
+                    // If at the last level, add a trace record
+                    temp_address = address | (curAddress.getAddress()
+                                                                   << lowBit);
+                    DataBlock block = ((AbstractEntry*)entry)->getDataBlk();
+                    tr->addRecord(cntrl_id, temp_address, 0, RubyRequestType_ST, 0,
+                                  block);
+                }
+            }
+        }
+
+        // Adjust the highBit value for the next level
+        highBit -= m_number_of_bits_per_level[cur_level];
+        parity_of_level = 1 - parity_of_level;
+    }
+}
+
+void
 SparseMemory::print(ostream& out) const
 {
 }
diff --git a/src/mem/ruby/system/SparseMemory.hh b/src/mem/ruby/system/SparseMemory.hh
index f6937ef54..e4237dbcd 100644
--- a/src/mem/ruby/system/SparseMemory.hh
+++ b/src/mem/ruby/system/SparseMemory.hh
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2009 Advanced Micro Devices, Inc.
+ * Copyright (c) 2012 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -32,15 +33,11 @@
 #include <iostream>
 
 #include "base/hashmap.hh"
-#include "mem/ruby/slicc_interface/AbstractEntry.hh"
 #include "mem/ruby/common/Address.hh"
-#include "mem/ruby/common/Global.hh"
-
-struct SparseMemEntry
-{
-    void* entry;
-};
+#include "mem/ruby/recorder/CacheRecorder.hh"
+#include "mem/ruby/slicc_interface/AbstractEntry.hh"
 
+typedef void* SparseMemEntry;
 typedef m5::hash_map<Address, SparseMemEntry> SparseMapType;
 
 struct CurNextInfo
@@ -63,6 +60,14 @@ class SparseMemory
     void add(const Address& address, AbstractEntry*);
     void remove(const Address& address);
 
+    /*!
+     * Function for recording the contents of memory. This function walks
+     * through all the levels of the sparse memory in a breadth first
+     * fashion. This might need more memory than a depth first approach.
+     * But breadth first seems easier to me than a depth first approach.
+     */
+    void recordBlocks(int cntrl_id, CacheRecorder *) const;
+
     AbstractEntry* lookup(const Address& address);
 
     // Print cache contents
@@ -95,12 +100,4 @@ class SparseMemory
     uint64_t* m_removes_per_level;
 };
 
-inline std::ostream&
-operator<<(std::ostream& out, const SparseMemEntry& obj)
-{
-    out << "SparseMemEntry";
-    out << std::flush;
-    return out;
-}
-
 #endif // __MEM_RUBY_SYSTEM_SPARSEMEMORY_HH__
diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc
index 81824b9b7..6f191819b 100644
--- a/src/mem/ruby/system/System.cc
+++ b/src/mem/ruby/system/System.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,16 +26,19 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <fcntl.h>
+#include <zlib.h>
+
+#include <cstdio>
+
 #include "base/intmath.hh"
 #include "base/output.hh"
-#include "mem/ruby/buffers/MessageBuffer.hh"
+#include "debug/RubySystem.hh"
 #include "mem/ruby/common/Address.hh"
 #include "mem/ruby/network/Network.hh"
 #include "mem/ruby/profiler/Profiler.hh"
-#include "mem/ruby/recorder/Tracer.hh"
-#include "mem/ruby/slicc_interface/AbstractController.hh"
-#include "mem/ruby/system/MemoryVector.hh"
 #include "mem/ruby/system/System.hh"
+#include "sim/simulate.hh"
 
 using namespace std;
 
@@ -49,7 +52,6 @@ int RubySystem::m_memory_size_bits;
 
 Network* RubySystem::m_network_ptr;
 Profiler* RubySystem::m_profiler_ptr;
-Tracer* RubySystem::m_tracer_ptr;
 MemoryVector* RubySystem::m_mem_vec_ptr;
 
 RubySystem::RubySystem(const Params *p)
@@ -88,6 +90,8 @@ RubySystem::RubySystem(const Params *p)
     //
     RubyExitCallback* rubyExitCB = new RubyExitCallback(p->stats_filename);
     registerExitCallback(rubyExitCB);
+    m_warmup_enabled = false;
+    m_cooldown_enabled = false;
 }
 
 void
@@ -109,22 +113,21 @@ RubySystem::registerProfiler(Profiler* profiler_ptr)
 }
 
 void
-RubySystem::registerTracer(Tracer* tracer_ptr)
+RubySystem::registerAbstractController(AbstractController* cntrl)
 {
-  m_tracer_ptr = tracer_ptr;
+  m_abs_cntrl_vec.push_back(cntrl);
 }
 
 void
-RubySystem::registerAbstractController(AbstractController* cntrl)
+RubySystem::registerSparseMemory(SparseMemory* s)
 {
-  m_abs_cntrl_vec.push_back(cntrl);
+    m_sparse_memory_vector.push_back(s);
 }
 
 RubySystem::~RubySystem()
 {
     delete m_network_ptr;
     delete m_profiler_ptr;
-    delete m_tracer_ptr;
     if (m_mem_vec_ptr)
         delete m_mem_vec_ptr;
 }
@@ -167,9 +170,143 @@ RubySystem::printStats(ostream& out)
 }
 
 void
+RubySystem::writeCompressedTrace(uint8* raw_data, string filename,
+                                 uint64 uncompressed_trace_size)
+{
+    // Create the checkpoint file for the memory
+    string thefile = Checkpoint::dir() + "/" + filename.c_str();
+
+    int fd = creat(thefile.c_str(), 0664);
+    if (fd < 0) {
+        perror("creat");
+        fatal("Can't open memory trace file '%s'\n", filename);
+    }
+
+    gzFile compressedMemory = gzdopen(fd, "wb");
+    if (compressedMemory == NULL)
+        fatal("Insufficient memory to allocate compression state for %s\n",
+              filename);
+
+    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
+        uncompressed_trace_size) {
+        fatal("Write failed on memory trace file '%s'\n", filename);
+    }
+
+    if (gzclose(compressedMemory)) {
+        fatal("Close failed on memory trace file '%s'\n", filename);
+    }
+    delete raw_data;
+}
+
+void
 RubySystem::serialize(std::ostream &os)
 {
+    m_cooldown_enabled = true;
+
+    vector<Sequencer*> sequencer_map;
+    Sequencer* sequencer_ptr = NULL;
+    int cntrl_id = -1;
+
+
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
+        if (sequencer_ptr == NULL) {
+            sequencer_ptr = sequencer_map[cntrl];
+            cntrl_id = cntrl;
+        }
+    }
+
+    assert(sequencer_ptr != NULL);
+
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        if (sequencer_map[cntrl] == NULL) {
+            sequencer_map[cntrl] = sequencer_ptr;
+        }
+    }
+
+    // Create the CacheRecorder and record the cache trace
+    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
+
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
+    }
+
+    // save the current tick value
+    Tick curtick_original = curTick();
+    // save the event queue head
+    Event* eventq_head = eventq->replaceHead(NULL);
+
+    // Schedule an event to start cache cooldown
+    RubyEvent* e = new RubyEvent(this);
+    schedule(e,curTick());
+    simulate();
+
+    // Restore eventq head
+    eventq_head = eventq->replaceHead(eventq_head);
+    // Restore curTick
+    curTick(curtick_original);
+
+    uint8* raw_data = NULL;
+
+    if (m_mem_vec_ptr != NULL) {
+        uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
+
+        string memory_trace_file = name() + ".memory.gz";
+        writeCompressedTrace(raw_data, memory_trace_file,
+                             memory_trace_size);
+
+        SERIALIZE_SCALAR(memory_trace_file);
+        SERIALIZE_SCALAR(memory_trace_size);
+
+    } else {
+        for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
+            m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
+                                                    m_cache_recorder);
+        }
+    }
+
+    // Aggergate the trace entries together into a single array
+    raw_data = new uint8_t[4096];
+    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
+                                                                 4096);
+    string cache_trace_file = name() + ".cache.gz";
+    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
+
+    SERIALIZE_SCALAR(cache_trace_file);
+    SERIALIZE_SCALAR(cache_trace_size);
 
+    m_cooldown_enabled = false;
+}
+
+void
+RubySystem::readCompressedTrace(string filename, uint8*& raw_data,
+                                uint64& uncompressed_trace_size)
+{
+    // Read the trace file
+    gzFile compressedTrace;
+
+    // trace file
+    int fd = open(filename.c_str(), O_RDONLY);
+    if (fd < 0) {
+        perror("open");
+        fatal("Unable to open trace file %s", filename);
+    }
+
+    compressedTrace = gzdopen(fd, "rb");
+    if (compressedTrace == NULL) {
+        fatal("Insufficient memory to allocate compression state for %s\n",
+              filename);
+    }
+
+    raw_data = new uint8_t[uncompressed_trace_size];
+    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
+            uncompressed_trace_size) {
+        fatal("Unable to read complete trace from file %s\n", filename);
+    }
+
+    if (gzclose(compressedTrace)) {
+        fatal("Failed to close cache trace file '%s'\n", filename);
+    }
 }
 
 void
@@ -181,18 +318,95 @@ RubySystem::unserialize(Checkpoint *cp, const string &section)
     // value of curTick()
     //
     clearStats();
+    uint8* uncompressed_trace = NULL;
+
+    if (m_mem_vec_ptr != NULL) {
+        string memory_trace_file;
+        uint64 memory_trace_size = 0;
+
+        UNSERIALIZE_SCALAR(memory_trace_file);
+        UNSERIALIZE_SCALAR(memory_trace_size);
+        memory_trace_file = cp->cptDir + "/" + memory_trace_file;
+
+        readCompressedTrace(memory_trace_file, uncompressed_trace,
+                            memory_trace_size);
+        m_mem_vec_ptr->populatePages(uncompressed_trace);
+
+        delete uncompressed_trace;
+        uncompressed_trace = NULL;
+    }
+
+    string cache_trace_file;
+    uint64 cache_trace_size = 0;
+
+    UNSERIALIZE_SCALAR(cache_trace_file);
+    UNSERIALIZE_SCALAR(cache_trace_size);
+    cache_trace_file = cp->cptDir + "/" + cache_trace_file;
+
+    readCompressedTrace(cache_trace_file, uncompressed_trace,
+                        cache_trace_size);
+    m_warmup_enabled = true;
+
+    vector<Sequencer*> sequencer_map;
+    Sequencer* t = NULL;
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
+        if(t == NULL) t = sequencer_map[cntrl];
+    }
+
+    assert(t != NULL);
+
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        if (sequencer_map[cntrl] == NULL) {
+            sequencer_map[cntrl] = t;
+        }
+    }
+
+    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
+                                         sequencer_map);
 }
 
 void
-RubySystem::clearStats() const
+RubySystem::startup()
 {
-    m_profiler_ptr->clearStats();
-    m_network_ptr->clearStats();
+    if (m_warmup_enabled) {
+        // save the current tick value
+        Tick curtick_original = curTick();
+        // save the event queue head
+        Event* eventq_head = eventq->replaceHead(NULL);
+        // set curTick to 0
+        curTick(0);
+
+        // Schedule an event to start cache warmup
+        RubyEvent* e = new RubyEvent(this);
+        schedule(e,curTick());
+        simulate();
+
+        delete m_cache_recorder;
+        m_cache_recorder = NULL;
+        m_warmup_enabled = false;
+        // Restore eventq head
+        eventq_head = eventq->replaceHead(eventq_head);
+        // Restore curTick
+        curTick(curtick_original);
+    }
+}
+
+void
+RubySystem::RubyEvent::process()
+{
+    if (ruby_system->m_warmup_enabled) {
+        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
+    }  else if (ruby_system->m_cooldown_enabled) {
+        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
+    }
 }
 
 void
-RubySystem::recordCacheContents(CacheRecorder& tr) const
+RubySystem::clearStats() const
 {
+    m_profiler_ptr->clearStats();
+    m_network_ptr->clearStats();
 }
 
 #ifdef CHECK_COHERENCE
diff --git a/src/mem/ruby/system/System.hh b/src/mem/ruby/system/System.hh
index 704cc3b27..461abffe2 100644
--- a/src/mem/ruby/system/System.hh
+++ b/src/mem/ruby/system/System.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -38,21 +38,34 @@
 #include "base/callback.hh"
 #include "mem/ruby/common/Global.hh"
 #include "mem/ruby/eventqueue/RubyEventQueue.hh"
-#include "mem/ruby/system/RubyPort.hh"
+#include "mem/ruby/recorder/CacheRecorder.hh"
 #include "mem/ruby/slicc_interface/AbstractController.hh"
+#include "mem/ruby/system/MemoryVector.hh"
+#include "mem/ruby/system/SparseMemory.hh"
 #include "params/RubySystem.hh"
 #include "sim/sim_object.hh"
 
-class AbstractController;
-class CacheRecorder;
-class MemoryVector;
 class Network;
 class Profiler;
-class Tracer;
 
 class RubySystem : public SimObject
 {
   public:
+    class RubyEvent : public Event
+    {
+      public:
+        RubyEvent(RubySystem* _ruby_system)
+        {
+            ruby_system = _ruby_system;
+        }
+      private:
+        void process();
+
+        RubySystem* ruby_system;
+    };
+
+    friend class RubyEvent;
+
     typedef RubySystemParams Params;
     RubySystem(const Params *p);
     ~RubySystem();
@@ -86,13 +99,6 @@ class RubySystem : public SimObject
         return m_profiler_ptr;
     }
 
-    static Tracer*
-    getTracer()
-    {
-        assert(m_tracer_ptr != NULL);
-        return m_tracer_ptr;
-    }
-
     static MemoryVector*
     getMemoryVector()
     {
@@ -100,7 +106,6 @@ class RubySystem : public SimObject
         return m_mem_vec_ptr;
     }
 
-    void recordCacheContents(CacheRecorder& tr) const;
     static void printConfig(std::ostream& out);
     static void printStats(std::ostream& out);
     void clearStats() const;
@@ -114,13 +119,15 @@ class RubySystem : public SimObject
 
     void print(std::ostream& out) const;
 
-    virtual void serialize(std::ostream &os);
-    virtual void unserialize(Checkpoint *cp, const std::string &section);
+    void serialize(std::ostream &os);
+    void unserialize(Checkpoint *cp, const std::string &section);
+    void process();
+    void startup();
 
     void registerNetwork(Network*);
     void registerProfiler(Profiler*);
-    void registerTracer(Tracer*);
     void registerAbstractController(AbstractController*);
+    void registerSparseMemory(SparseMemory*);
 
   private:
     // Private copy constructor and assignment operator
@@ -130,6 +137,11 @@ class RubySystem : public SimObject
     void init();
 
     static void printSystemConfig(std::ostream& out);
+    void readCompressedTrace(std::string filename,
+                             uint8*& raw_data,
+                             uint64& uncompressed_trace_size);
+    void writeCompressedTrace(uint8* raw_data, std::string file,
+                              uint64 uncompressed_trace_size);
 
   private:
     // configuration parameters
@@ -140,14 +152,16 @@ class RubySystem : public SimObject
     static int m_block_size_bits;
     static uint64 m_memory_size_bytes;
     static int m_memory_size_bits;
-
     static Network* m_network_ptr;
 
   public:
     static Profiler* m_profiler_ptr;
-    static Tracer* m_tracer_ptr;
     static MemoryVector* m_mem_vec_ptr;
     std::vector<AbstractController*> m_abs_cntrl_vec;
+    bool m_warmup_enabled;
+    bool m_cooldown_enabled;
+    CacheRecorder* m_cache_recorder;
+    std::vector<SparseMemory*> m_sparse_memory_vector;
 };
 
 inline std::ostream&
diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py
index a3ea1ca8a..85df3f9e8 100644
--- a/src/mem/slicc/symbols/StateMachine.py
+++ b/src/mem/slicc/symbols/StateMachine.py
@@ -264,6 +264,8 @@ public:
     void clearStats();
     void blockOnQueue(Address addr, MessageBuffer* port);
     void unblock(Address addr);
+    void recordCacheTrace(int cntrl, CacheRecorder* tr);
+    Sequencer* getSequencer() const;
 
 private:
 ''')
@@ -674,6 +676,12 @@ $vid->setDescription("[Version " + to_string(m_version) + ", ${ident}, name=${{v
         else:
             mq_ident = "NULL"
 
+        seq_ident = "NULL"
+        for param in self.config_parameters:
+            if param.name == "sequencer":
+                assert(param.pointer)
+                seq_ident = "m_%s_ptr" % param.name
+
         code('''
 int
 $c_ident::getNumControllers()
@@ -687,6 +695,12 @@ $c_ident::getMandatoryQueue() const
     return $mq_ident;
 }
 
+Sequencer*
+$c_ident::getSequencer() const
+{
+    return $seq_ident;
+}
+
 const int &
 $c_ident::getVersion() const
 {
@@ -875,6 +889,23 @@ $c_ident::unset_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr)
 
         code('''
 
+void
+$c_ident::recordCacheTrace(int cntrl, CacheRecorder* tr)
+{
+''')
+        #
+        # Record cache contents for all associated caches.
+        #
+        code.indent()
+        for param in self.config_parameters:
+            if param.type_ast.type.ident == "CacheMemory":
+                assert(param.pointer)
+                code('m_${{param.ident}}_ptr->recordCacheContents(cntrl, tr);')
+
+        code.dedent()
+        code('''
+}
+
 // Actions
 ''')
         if self.TBEType != None and self.EntryType != None:
diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py
index 60693758c..47ca32af2 100644
--- a/src/python/m5/SimObject.py
+++ b/src/python/m5/SimObject.py
@@ -874,29 +874,62 @@ class SimObject(object):
         if hasattr(self, 'type'):
             print >>ini_file, 'type=%s' % self.type
 
-        child_names = self._children.keys()
-        child_names.sort()
-        if len(child_names):
+        if len(self._children.keys()):
             print >>ini_file, 'children=%s' % \
-                  ' '.join(self._children[n].get_name() for n in child_names)
+                  ' '.join(self._children[n].get_name() \
+                  for n in sorted(self._children.keys()))
 
-        param_names = self._params.keys()
-        param_names.sort()
-        for param in param_names:
+        for param in sorted(self._params.keys()):
             value = self._values.get(param)
             if value != None:
                 print >>ini_file, '%s=%s' % (param,
                                              self._values[param].ini_str())
 
-        port_names = self._ports.keys()
-        port_names.sort()
-        for port_name in port_names:
+        for port_name in sorted(self._ports.keys()):
             port = self._port_refs.get(port_name, None)
             if port != None:
                 print >>ini_file, '%s=%s' % (port_name, port.ini_str())
 
         print >>ini_file        # blank line between objects
 
+    # generate a tree of dictionaries expressing all the parameters in the
+    # instantiated system for use by scripts that want to do power, thermal
+    # visualization, and other similar tasks
+    def get_config_as_dict(self):
+        d = attrdict()
+        if hasattr(self, 'type'):
+            d.type = self.type
+        if hasattr(self, 'cxx_class'):
+            d.cxx_class = self.cxx_class
+
+        for param in sorted(self._params.keys()):
+            value = self._values.get(param)
+            try:
+                # Use native type for those supported by JSON and 
+                # strings for everything else. skipkeys=True seems
+                # to not work as well as one would hope
+                if type(self._values[param].value) in \
+                        [str, unicode, int, long, float, bool, None]:
+                    d[param] = self._values[param].value
+                else:
+                    d[param] = str(self._values[param])
+
+            except AttributeError:
+                pass
+
+        for n in sorted(self._children.keys()):
+            d[self._children[n].get_name()] =  self._children[n].get_config_as_dict()
+
+        for port_name in sorted(self._ports.keys()):
+            port = self._port_refs.get(port_name, None)
+            if port != None:
+                # Might want to actually make this reference the object
+                # in the future, although execing the string problem would
+                # get some of the way there
+                d[port_name] = port.ini_str()
+
+        return d
+
     def getCCParams(self):
         if self._ccParams:
             return self._ccParams
diff --git a/src/python/m5/main.py b/src/python/m5/main.py
index 58de62cc3..910cb6ce6 100644
--- a/src/python/m5/main.py
+++ b/src/python/m5/main.py
@@ -87,6 +87,8 @@ def parse_options():
     group("Configuration Options")
     option("--dump-config", metavar="FILE", default="config.ini",
         help="Dump configuration output file [Default: %default]")
+    option("--json-config", metavar="FILE", default="config.json",
+        help="Create JSON output of the configuration [Default: %default]")
 
     # Debugging options
     group("Debugging Options")
@@ -121,7 +123,6 @@ def parse_options():
         execfile(options_file, scope)
 
     arguments = options.parse_args()
-
     return options,arguments
 
 def interact(scope):
diff --git a/src/python/m5/params.py b/src/python/m5/params.py
index ee3678dc9..05fe9b774 100644
--- a/src/python/m5/params.py
+++ b/src/python/m5/params.py
@@ -228,6 +228,12 @@ class SimObjectVector(VectorParamValue):
             for obj in v.descendants():
                 yield obj
 
+    def get_config_as_dict(self):
+        a = []
+        for v in self:
+            a.append(v.get_config_as_dict())
+        return a
+
 class VectorParamDesc(ParamDesc):
     # Convert assigned value to appropriate type.  If the RHS is not a
     # list or tuple, it generates a single-element list.
@@ -256,6 +262,9 @@ class VectorParamDesc(ParamDesc):
         self.ptype.cxx_predecls(code)
         code('%}')
         code()
+        # Make sure the SWIGPY_SLICE_ARG is defined through this inclusion
+        code('%include "std_container.i"')
+        code()
         self.ptype.swig_predecls(code)
         code()
         code('%include "std_vector.i"')
@@ -961,6 +970,9 @@ class Time(ParamValue):
     def ini_str(self):
         return str(self)
 
+    def get_config_as_dict(self):
+        return str(self)
+
 # Enumerated types are a little more complex.  The user specifies the
 # type as Enum(foo) where foo is either a list or dictionary of
 # alternatives (typically strings, but not necessarily so).  (In the
diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py
index b4ccf82c1..38129592c 100644
--- a/src/python/m5/simulate.py
+++ b/src/python/m5/simulate.py
@@ -40,6 +40,7 @@ import SimObject
 import ticks
 import objects
 from util import fatal
+from util import attrdict
 
 # define a MaxTick parameter
 MaxTick = 2**63 - 1
@@ -71,6 +72,17 @@ def instantiate(ckpt_dir=None):
             obj.print_ini(ini_file)
         ini_file.close()
 
+    if options.json_config:
+        try:
+            import json
+            json_file = file(os.path.join(options.outdir, options.json_config), 'w')
+            d = root.get_config_as_dict()
+            json.dump(d, json_file, indent=4)
+            json_file.close()
+        except ImportError:
+            pass
+
+
     # Initialize the global statistics
     stats.initSimStats()
 
diff --git a/src/sim/System.py b/src/sim/System.py
index db214abc0..39505c01a 100644
--- a/src/sim/System.py
+++ b/src/sim/System.py
@@ -54,8 +54,8 @@ class System(SimObject):
     physmem = Param.PhysicalMemory("Physical Memory")
     mem_mode = Param.MemoryMode('atomic', "The mode the memory system is in")
     memories = VectorParam.PhysicalMemory(Self.all, "All memories is the system")
-
     work_item_id = Param.Int(-1, "specific work item id")
+    num_work_ids = Param.Int(16, "Number of distinct work item types")
     work_begin_cpu_id_exit = Param.Int(-1,
         "work started on specific id, now exit simulation")
     work_begin_ckpt_count = Param.Counter(0,
diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc
index 749afeb3b..d5bc8fa0e 100644
--- a/src/sim/pseudo_inst.cc
+++ b/src/sim/pseudo_inst.cc
@@ -417,6 +417,7 @@ workbegin(ThreadContext *tc, uint64_t workid, uint64_t threadid)
     tc->getCpuPtr()->workItemBegin();
     System *sys = tc->getSystemPtr();
     const System::Params *params = sys->params();
+    sys->workItemBegin(threadid, workid);
 
     DPRINTF(WorkItems, "Work Begin workid: %d, threadid %d\n", workid, 
             threadid);
@@ -473,6 +474,7 @@ workend(ThreadContext *tc, uint64_t workid, uint64_t threadid)
     tc->getCpuPtr()->workItemEnd();
     System *sys = tc->getSystemPtr();
     const System::Params *params = sys->params();
+    sys->workItemEnd(threadid, workid);
 
     DPRINTF(WorkItems, "Work End workid: %d, threadid %d\n", workid, threadid);
 
diff --git a/src/sim/system.cc b/src/sim/system.cc
index 3051cb64b..d3bee1ad1 100644
--- a/src/sim/system.cc
+++ b/src/sim/system.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2003-2006 The Regents of The University of Michigan
  * Copyright (c) 2011 Regents of the University of California
  * All rights reserved.
@@ -43,6 +55,7 @@
 #include "config/the_isa.hh"
 #include "cpu/thread_context.hh"
 #include "debug/Loader.hh"
+#include "debug/WorkItems.hh"
 #include "kern/kernel_stats.hh"
 #include "mem/mem_object.hh"
 #include "mem/physical.hh"
@@ -68,8 +81,9 @@ System::System(Params *p)
       memoryMode(p->mem_mode),
       workItemsBegin(0),
       workItemsEnd(0),
+      numWorkIds(p->num_work_ids),
       _params(p),
-      totalNumInsts(0), 
+      totalNumInsts(0),
       instEventQueue("system instruction-based event queue")
 {
     // add self to global system list
@@ -158,6 +172,9 @@ System::~System()
 {
     delete kernelSymtab;
     delete kernel;
+
+    for (uint32_t j = 0; j < numWorkIds; j++)
+        delete workItemStats[j];
 }
 
 void
@@ -320,6 +337,37 @@ System::unserialize(Checkpoint *cp, const string &section)
 }
 
 void
+System::regStats()
+{
+    for (uint32_t j = 0; j < numWorkIds ; j++) {
+        workItemStats[j] = new Stats::Histogram();
+        stringstream namestr;
+        ccprintf(namestr, "work_item_type%d", j);
+        workItemStats[j]->init(20)
+                         .name(name() + "." + namestr.str())
+                         .desc("Run time stat for" + namestr.str())
+                         .prereq(*workItemStats[j]);
+    }
+}
+
+void
+System::workItemEnd(uint32_t tid, uint32_t workid)
+{
+    std::pair<uint32_t,uint32_t> p(tid, workid);
+    if (!lastWorkItemStarted.count(p))
+        return;
+
+    Tick samp = curTick() - lastWorkItemStarted[p];
+    DPRINTF(WorkItems, "Work item end: %d\t%d\t%lld\n", tid, workid, samp);
+
+    if (workid >= numWorkIds)
+        fatal("Got workid greater than specified in system configuration\n");
+
+    workItemStats[workid]->sample(samp);
+    lastWorkItemStarted.erase(p);
+}
+
+void
 System::printSystems()
 {
     vector<System *>::iterator i = systemList.begin();
diff --git a/src/sim/system.hh b/src/sim/system.hh
index 00d8360e0..d675eb727 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -157,14 +157,16 @@ class System : public SimObject
     Enums::MemoryMode memoryMode;
     uint64_t workItemsBegin;
     uint64_t workItemsEnd;
+    uint32_t numWorkIds;
     std::vector<bool> activeCpus;
 
   public:
+    virtual void regStats();
     /**
      * Called by pseudo_inst to track the number of work items started by this
      * system.
      */
-    uint64_t 
+    uint64_t
     incWorkItemsBegin()
     {
         return ++workItemsBegin;
@@ -198,6 +200,14 @@ class System : public SimObject
         return count;
     }
 
+    inline void workItemBegin(uint32_t tid, uint32_t workid)
+    {
+        std::pair<uint32_t,uint32_t> p(tid, workid);
+        lastWorkItemStarted[p] = curTick();
+    }
+
+    void workItemEnd(uint32_t tid, uint32_t workid);
+
     /**
      * Fix up an address used to match PCs for hooking simulator
      * events on to target function executions.  See comment in
@@ -285,6 +295,8 @@ class System : public SimObject
   public:
     Counter totalNumInsts;
     EventQueue instEventQueue;
+    std::map<std::pair<uint32_t,uint32_t>, Tick>  lastWorkItemStarted;
+    std::map<uint32_t, Stats::Histogram*> workItemStats;
 
     ////////////////////////////////////////////
     //