108 files changed, 5177 insertions, 2001 deletions
diff --git a/src/SConscript b/src/SConscript
index cad0736c5..0ee144747 100755
--- a/src/SConscript
+++ b/src/SConscript
@@ -446,7 +446,7 @@ def makeInfoPyFile(target, source, env):
 
 # Generate a file that wraps the basic top level files
 env.Command('python/m5/info.py',
-            [ '#/AUTHORS', '#/LICENSE', '#/README', '#/RELEASE_NOTES' ],
+            [ '#/AUTHORS', '#/LICENSE', '#/README', ],
             MakeAction(makeInfoPyFile, Transform("INFO")))
 PySource('m5', 'python/m5/info.py')
 
diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc
index 6b2113639..e6dd728dd 100644
--- a/src/arch/arm/table_walker.cc
+++ b/src/arch/arm/table_walker.cc
@@ -208,19 +208,20 @@ TableWalker::processWalk()
         return f;
     }
 
+    Request::Flags flag = 0;
+    if (currState->sctlr.c == 0) {
+        flag = Request::UNCACHEABLE;
+    }
+
     if (currState->timing) {
         port->dmaAction(MemCmd::ReadReq, l1desc_addr, sizeof(uint32_t),
                 &doL1DescEvent, (uint8_t*)&currState->l1Desc.data,
-                currState->tc->getCpuPtr()->ticks(1));
+                currState->tc->getCpuPtr()->ticks(1), flag);
         DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before adding: %d\n",
                 stateQueueL1.size());
         stateQueueL1.push_back(currState);
         currState = NULL;
     } else {
-        Request::Flags flag = 0;
-        if (currState->sctlr.c == 0){
-           flag = Request::UNCACHEABLE;
-        }
         port->dmaAction(MemCmd::ReadReq, l1desc_addr, sizeof(uint32_t),
                 NULL, (uint8_t*)&currState->l1Desc.data,
                 currState->tc->getCpuPtr()->ticks(1), flag);
@@ -472,7 +473,7 @@ TableWalker::doL1Descriptor()
     switch (currState->l1Desc.type()) {
       case L1Descriptor::Ignore:
       case L1Descriptor::Reserved:
-        if (!currState->delayed) {
+        if (!currState->timing) {
             currState->tc = NULL;
             currState->req = NULL;
         }
@@ -577,7 +578,7 @@ TableWalker::doL2Descriptor()
 
     if (currState->l2Desc.invalid()) {
         DPRINTF(TLB, "L2 descriptor invalid, causing fault\n");
-        if (!currState->delayed) {
+        if (!currState->timing) {
             currState->tc = NULL;
             currState->req = NULL;
         }
@@ -622,7 +623,7 @@ TableWalker::doL2Descriptor()
     memAttrs(currState->tc, te, currState->sctlr, currState->l2Desc.texcb(),
              currState->l2Desc.shareable());
 
-    if (!currState->delayed) {
+    if (!currState->timing) {
         currState->tc = NULL;
         currState->req = NULL;
     }
diff --git a/src/arch/arm/table_walker.hh b/src/arch/arm/table_walker.hh
index 267a7ad26..96a39cc61 100644
--- a/src/arch/arm/table_walker.hh
+++ b/src/arch/arm/table_walker.hh
@@ -93,14 +93,14 @@ class TableWalker : public MemObject
         {
             if (supersection())
                 panic("Super sections not implemented\n");
-            return mbits(data, 31,20);
+            return mbits(data, 31, 20);
         }
         /** Return the physcal address of the entry, bits in position*/
         Addr paddr(Addr va) const
         {
             if (supersection())
                 panic("Super sections not implemented\n");
-            return mbits(data, 31,20) | mbits(va, 20, 0);
+            return mbits(data, 31, 20) | mbits(va, 19, 0);
         }
 
 
@@ -109,7 +109,7 @@ class TableWalker : public MemObject
         {
             if (supersection())
                 panic("Super sections not implemented\n");
-            return bits(data, 31,20);
+            return bits(data, 31, 20);
         }
 
         /** Is the translation global (no asid used)? */
@@ -127,19 +127,19 @@ class TableWalker : public MemObject
         /** Three bit access protection flags */
         uint8_t ap() const
         {
-            return (bits(data, 15) << 2) | bits(data,11,10);
+            return (bits(data, 15) << 2) | bits(data, 11, 10);
         }
 
         /** Domain Client/Manager: ARM DDI 0406B: B3-31 */
         uint8_t domain() const
         {
-            return bits(data,8,5);
+            return bits(data, 8, 5);
         }
 
         /** Address of L2 descriptor if it exists */
         Addr l2Addr() const
         {
-            return mbits(data, 31,10);
+            return mbits(data, 31, 10);
         }
 
         /** Memory region attributes: ARM DDI 0406B: B3-32.
@@ -149,7 +149,7 @@ class TableWalker : public MemObject
          */
         uint8_t texcb() const
         {
-            return bits(data, 2) | bits(data,3) << 1 | bits(data, 14, 12) << 2;
+            return bits(data, 2) | bits(data, 3) << 1 | bits(data, 14, 12) << 2;
         }
 
         /** If the section is shareable. See texcb() comment. */
@@ -187,7 +187,7 @@ class TableWalker : public MemObject
         /** Is the entry invalid */
         bool invalid() const
         {
-            return bits(data, 1,0) == 0;;
+            return bits(data, 1, 0) == 0;
         }
 
         /** What is the size of the mapping? */
@@ -218,8 +218,8 @@ class TableWalker : public MemObject
         uint8_t texcb() const
         {
             return large() ?
-                (bits(data, 2) | (bits(data,3) << 1) | (bits(data, 14, 12) << 2)) :
-                (bits(data, 2) | (bits(data,3) << 1) | (bits(data, 8, 6) << 2));
+                (bits(data, 2) | (bits(data, 3) << 1) | (bits(data, 14, 12) << 2)) :
+                (bits(data, 2) | (bits(data, 3) << 1) | (bits(data, 8, 6) << 2));
         }
 
         /** Return the physical frame, bits shifted right */
diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc
index e5f5b36f6..230c56200 100644
--- a/src/arch/arm/tlb.cc
+++ b/src/arch/arm/tlb.cc
@@ -696,6 +696,8 @@ TLB::translateTiming(RequestPtr req, ThreadContext *tc,
 #endif
     if (!delay)
         translation->finish(fault, req, tc, mode);
+    else
+        translation->markDelayed();
     return fault;
 }
 
diff --git a/src/arch/generic/debugfaults.hh b/src/arch/generic/debugfaults.hh
new file mode 100644
index 000000000..acffadc34
--- /dev/null
+++ b/src/arch/generic/debugfaults.hh
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2010 Advanced Micro Devices
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_GENERIC_DEBUGFAULTS_HH__
+#define __ARCH_GENERIC_DEBUGFAULTS_HH__
+
+#include "base/misc.hh"
+#include "sim/faults.hh"
+
+#include <string>
+
+namespace GenericISA
+{
+class M5DebugFault : public FaultBase
+{
+  public:
+    enum DebugFunc
+    {
+        PanicFunc,
+        FatalFunc,
+        WarnFunc,
+        WarnOnceFunc
+    };
+
+  protected:
+    std::string message;
+    DebugFunc func;
+
+  public:
+    M5DebugFault(DebugFunc _func, std::string _message) :
+        message(_message), func(_func)
+    {}
+
+    FaultName
+    name() const
+    {
+        switch (func) {
+          case PanicFunc:
+            return "panic fault";
+          case FatalFunc:
+            return "fatal fault";
+          case WarnFunc:
+            return "warn fault";
+          case WarnOnceFunc:
+            return "warn_once fault";
+          default:
+            panic("unrecognized debug function number\n");
+        }
+    }
+
+    void
+    invoke(ThreadContext *tc,
+            StaticInstPtr inst = StaticInst::nullStaticInstPtr)
+    {
+        switch (func) {
+          case PanicFunc:
+            panic(message);
+            break;
+          case FatalFunc:
+            fatal(message);
+            break;
+          case WarnFunc:
+            warn(message);
+            break;
+          case WarnOnceFunc:
+            warn_once(message);
+            break;
+          default:
+            panic("unrecognized debug function number\n");
+        }
+    }
+};
+} // namespace GenericISA
+
+#endif // __ARCH_GENERIC_DEBUGFAULTS_HH__
diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa
index 173fa89df..d97a141de 100644
--- a/src/arch/mips/isa/decoder.isa
+++ b/src/arch/mips/isa/decoder.isa
@@ -367,21 +367,7 @@ decode OPCODE_HI default Unknown::unknown() {
             }});
             0x1: addiu({{ Rt.sw = Rs.sw + imm; }});
             0x2: slti({{ Rt.sw = (Rs.sw < imm) ? 1 : 0 }});
-
-            //Edited to include MIPS AVP Pass/Fail instructions and
-            //default to the sltiu instruction
-            0x3: decode RS_RT_INTIMM {
-                0xabc1: BasicOp::fail({{
-                    exitSimLoop("AVP/SRVP Test Failed");
-                }});
-                0xabc2: BasicOp::pass({{
-                    exitSimLoop("AVP/SRVP Test Passed");
-                }});
-                default: sltiu({{
-                    Rt.uw = (Rs.uw < (uint32_t)sextImm) ? 1 : 0;
-                }});
-            }
-
+            0x3: sltiu({{ Rt.uw = (Rs.uw < (uint32_t)sextImm) ? 1 : 0;}});
             0x4: andi({{ Rt.sw = Rs.sw & zextImm; }});
             0x5: ori({{ Rt.sw = Rs.sw | zextImm; }});
             0x6: xori({{ Rt.sw = Rs.sw ^ zextImm; }});
diff --git a/src/arch/x86/SConscript b/src/arch/x86/SConscript
index 27de9da11..9cb774647 100644
--- a/src/arch/x86/SConscript
+++ b/src/arch/x86/SConscript
@@ -46,6 +46,7 @@ if env['TARGET_ISA'] == 'x86':
     Source('cpuid.cc')
     Source('emulenv.cc')
     Source('faults.cc')
+    Source('insts/badmicroop.cc')
     Source('insts/microfpop.cc')
     Source('insts/microldstop.cc')
     Source('insts/micromediaop.cc')
diff --git a/src/arch/x86/insts/badmicroop.cc b/src/arch/x86/insts/badmicroop.cc
new file mode 100644
index 000000000..ef493f250
--- /dev/null
+++ b/src/arch/x86/insts/badmicroop.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011 Advanced Micro Devices
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/x86/insts/badmicroop.hh"
+#include "arch/x86/isa_traits.hh"
+#include "arch/x86/decoder.hh"
+
+namespace X86ISA
+{
+
+// This microop needs to be allocated on the heap even though it could
+// theoretically be statically allocated. The reference counted pointer would
+// try to delete the static memory when it was destructed.
+const StaticInstPtr badMicroop =
+    new X86ISAInst::MicroPanic(NoopMachInst, "BAD",
+        StaticInst::IsMicroop | StaticInst::IsLastMicroop,
+        "Invalid microop!", 0);
+
+} // namespace X86ISA
diff --git a/src/arch/x86/insts/badmicroop.hh b/src/arch/x86/insts/badmicroop.hh
new file mode 100644
index 000000000..57fe242c4
--- /dev/null
+++ b/src/arch/x86/insts/badmicroop.hh
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2011 Advanced Micro Devices
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_X86_INSTS_BADMICROOP_HH__
+#define __ARCH_X86_INSTS_BADMICROOP_HH__
+
+class StaticInstPtr;
+
+namespace X86ISA
+{
+
+extern const StaticInstPtr badMicroop;
+
+} // namespace X86ISA
+
+#endif //__ARCH_X86_INSTS_BADMICROOP_HH__
diff --git a/src/arch/x86/insts/macroop.hh b/src/arch/x86/insts/macroop.hh
index fcf051a37..4f4176b77 100644
--- a/src/arch/x86/insts/macroop.hh
+++ b/src/arch/x86/insts/macroop.hh
@@ -41,6 +41,7 @@
 #define __ARCH_X86_INSTS_MACROOP_HH__
 
 #include "arch/x86/emulenv.hh"
+#include "arch/x86/insts/badmicroop.hh"
 #include "arch/x86/types.hh"
 #include "arch/x86/insts/static_inst.hh"
 
@@ -76,8 +77,10 @@ class MacroopBase : public X86StaticInst
     StaticInstPtr
     fetchMicroop(MicroPC microPC) const
     {
-        assert(microPC < numMicroops);
-        return microops[microPC];
+        if (microPC >= numMicroops)
+            return badMicroop;
+        else
+            return microops[microPC];
     }
 
     std::string
diff --git a/src/arch/x86/insts/microregop.cc b/src/arch/x86/insts/microregop.cc
index 6aee87449..dedea0f3d 100644
--- a/src/arch/x86/insts/microregop.cc
+++ b/src/arch/x86/insts/microregop.cc
@@ -50,9 +50,6 @@ namespace X86ISA
             bool subtract) const
     {
         DPRINTF(X86, "flagMask = %#x\n", flagMask);
-        if (_destRegIdx[0] & IntFoldBit) {
-            _dest >>= 8;
-        }
         uint64_t flags = oldFlags & ~flagMask;
         if(flagMask & (ECFBit | CFBit))
         {
diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa
index 58b1fbc62..674e69e98 100644
--- a/src/arch/x86/isa/includes.isa
+++ b/src/arch/x86/isa/includes.isa
@@ -53,6 +53,7 @@ output header {{
 #include <sstream>
 #include <iostream>
 
+#include "arch/generic/debugfaults.hh"
 #include "arch/x86/emulenv.hh"
 #include "arch/x86/insts/macroop.hh"
 #include "arch/x86/insts/microfpop.hh"
@@ -113,6 +114,7 @@ output exec {{
 #include "arch/x86/regs/misc.hh"
 #include "arch/x86/tlb.hh"
 #include "base/bigint.hh"
+#include "base/compiler.hh"
 #include "base/condcodes.hh"
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
diff --git a/src/arch/x86/isa/microops/debug.isa b/src/arch/x86/isa/microops/debug.isa
index 4b2ecdd5a..220c1af97 100644
--- a/src/arch/x86/isa/microops/debug.isa
+++ b/src/arch/x86/isa/microops/debug.isa
@@ -45,16 +45,29 @@ output header {{
     class MicroDebugBase : public X86ISA::X86MicroopBase
     {
       protected:
+        typedef GenericISA::M5DebugFault::DebugFunc DebugFunc;
+        DebugFunc func;
         std::string message;
         uint8_t cc;
 
       public:
-        MicroDebugBase(ExtMachInst _machInst, const char * mnem,
+        MicroDebugBase(ExtMachInst machInst, const char * mnem,
                 const char * instMnem, uint64_t setFlags,
-                std::string _message, uint8_t _cc);
+                DebugFunc _func, std::string _message, uint8_t _cc) :
+            X86MicroopBase(machInst, mnem, instMnem, setFlags, No_OpClass),
+                    func(_func), message(_message), cc(_cc)
+        {}
 
-        std::string generateDisassembly(Addr pc,
-                const SymbolTable *symtab) const;
+        std::string
+        generateDisassembly(Addr pc, const SymbolTable *symtab) const
+        {
+            std::stringstream response;
+
+            printMnemonic(response, instMnem, mnemonic);
+            response << "\"" << message << "\"";
+
+            return response.str();
+        }
     };
 }};
 
@@ -70,53 +83,31 @@ def template MicroDebugDeclare {{
 }};
 
 def template MicroDebugExecute {{
-        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+        Fault
+        %(class_name)s::execute(%(CPU_exec_context)s *xc,
                 Trace::InstRecord *traceData) const
         {
             %(op_decl)s
             %(op_rd)s
             if (%(cond_test)s) {
-                %(func)s("%s\n", message);
+                return new GenericISA::M5DebugFault(func, message);
+            } else {
+                return NoFault;
             }
-            return NoFault;
         }
 }};
 
-output decoder {{
-    inline MicroDebugBase::MicroDebugBase(
-            ExtMachInst machInst, const char * mnem, const char * instMnem,
-            uint64_t setFlags, std::string _message, uint8_t _cc) :
-        X86MicroopBase(machInst, mnem, instMnem,
-                setFlags, No_OpClass),
-                message(_message), cc(_cc)
-    {
-    }
-}};
-
 def template MicroDebugConstructor {{
-    inline %(class_name)s::%(class_name)s(
+    %(class_name)s::%(class_name)s(
             ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
             std::string _message, uint8_t _cc) :
         %(base_class)s(machInst, "%(func)s", instMnem,
-                setFlags, _message, _cc)
+                setFlags, %(func_num)s, _message, _cc)
     {
         %(constructor)s;
     }
 }};
 
-output decoder {{
-    std::string MicroDebugBase::generateDisassembly(Addr pc,
-            const SymbolTable *symtab) const
-    {
-        std::stringstream response;
-
-        printMnemonic(response, instMnem, mnemonic);
-        response << "\"" << message << "\"";
-
-        return response.str();
-    }
-}};
-
 let {{
     class MicroDebug(X86Microop):
         def __init__(self, message, flags=None):
@@ -142,13 +133,14 @@ let {{
     header_output = ""
     decoder_output = ""
 
-    def buildDebugMicro(func):
+    def buildDebugMicro(func, func_num):
         global exec_output, header_output, decoder_output
 
         iop = InstObjParams(func, "Micro%sFlags" % func.capitalize(),
                 "MicroDebugBase",
                 {"code": "",
                  "func": func,
+                 "func_num": "GenericISA::M5DebugFault::%s" % func_num,
                  "cond_test": "checkCondition(ccFlagBits, cc)"})
         exec_output += MicroDebugExecute.subst(iop)
         header_output += MicroDebugDeclare.subst(iop)
@@ -158,6 +150,7 @@ let {{
                 "MicroDebugBase",
                 {"code": "",
                  "func": func,
+                 "func_num": "GenericISA::M5DebugFault::%s" % func_num,
                  "cond_test": "true"})
         exec_output += MicroDebugExecute.subst(iop)
         header_output += MicroDebugDeclare.subst(iop)
@@ -169,8 +162,8 @@ let {{
         global microopClasses
         microopClasses[func] = MicroDebugChild
 
-    buildDebugMicro("panic")
-    buildDebugMicro("fatal")
-    buildDebugMicro("warn")
-    buildDebugMicro("warn_once")
+    buildDebugMicro("panic", "PanicFunc")
+    buildDebugMicro("fatal", "FatalFunc")
+    buildDebugMicro("warn", "WarnFunc")
+    buildDebugMicro("warn_once", "WarnOnceFunc")
 }};
diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa
index 216a74c6c..cd649d644 100644
--- a/src/arch/x86/isa/microops/ldstop.isa
+++ b/src/arch/x86/isa/microops/ldstop.isa
@@ -301,6 +301,46 @@ let {{
                 "dataSize" : self.dataSize, "addressSize" : self.addressSize,
                 "memFlags" : self.memFlags}
             return allocator
+
+    class BigLdStOp(X86Microop):
+        def __init__(self, data, segment, addr, disp,
+                dataSize, addressSize, baseFlags, atCPL0, prefetch):
+            self.data = data
+            [self.scale, self.index, self.base] = addr
+            self.disp = disp
+            self.segment = segment
+            self.dataSize = dataSize
+            self.addressSize = addressSize
+            self.memFlags = baseFlags
+            if atCPL0:
+                self.memFlags += " | (CPL0FlagBit << FlagShift)"
+            if prefetch:
+                self.memFlags += " | Request::PREFETCH"
+            self.memFlags += " | (machInst.legacy.addr ? " + \
+                             "(AddrSizeFlagBit << FlagShift) : 0)"
+
+        def getAllocator(self, microFlags):
+            allocString = '''
+                (%(dataSize)s >= 4) ?
+                    (StaticInstPtr)(new %(class_name)sBig(machInst,
+                        macrocodeBlock, %(flags)s, %(scale)s, %(index)s,
+                        %(base)s, %(disp)s, %(segment)s, %(data)s,
+                        %(dataSize)s, %(addressSize)s, %(memFlags)s)) :
+                    (StaticInstPtr)(new %(class_name)s(machInst,
+                        macrocodeBlock, %(flags)s, %(scale)s, %(index)s,
+                        %(base)s, %(disp)s, %(segment)s, %(data)s,
+                        %(dataSize)s, %(addressSize)s, %(memFlags)s))
+            '''
+            allocator = allocString % {
+                "class_name" : self.className,
+                "flags" : self.microFlagsText(microFlags),
+                "scale" : self.scale, "index" : self.index,
+                "base" : self.base,
+                "disp" : self.disp,
+                "segment" : self.segment, "data" : self.data,
+                "dataSize" : self.dataSize, "addressSize" : self.addressSize,
+                "memFlags" : self.memFlags}
+            return allocator
 }};
 
 let {{
@@ -315,7 +355,8 @@ let {{
     EA = bits(SegBase + scale * Index + Base + disp, addressSize * 8 - 1, 0);
     '''
 
-    def defineMicroLoadOp(mnemonic, code, mem_flags="0"):
+    def defineMicroLoadOp(mnemonic, code, bigCode='',
+                          mem_flags="0", big=True):
         global header_output
         global decoder_output
         global exec_output
@@ -324,16 +365,22 @@ let {{
         name = mnemonic.lower()
 
         # Build up the all register version of this micro op
-        iop = InstObjParams(name, Name, 'X86ISA::LdStOp',
-                {"code": code,
-                 "ea_code": calculateEA})
-        header_output += MicroLdStOpDeclare.subst(iop)
-        decoder_output += MicroLdStOpConstructor.subst(iop)
-        exec_output += MicroLoadExecute.subst(iop)
-        exec_output += MicroLoadInitiateAcc.subst(iop)
-        exec_output += MicroLoadCompleteAcc.subst(iop)
-
-        class LoadOp(LdStOp):
+        iops = [InstObjParams(name, Name, 'X86ISA::LdStOp',
+                {"code": code, "ea_code": calculateEA})]
+        if big:
+            iops += [InstObjParams(name, Name + "Big", 'X86ISA::LdStOp',
+                     {"code": bigCode, "ea_code": calculateEA})]
+        for iop in iops:
+            header_output += MicroLdStOpDeclare.subst(iop)
+            decoder_output += MicroLdStOpConstructor.subst(iop)
+            exec_output += MicroLoadExecute.subst(iop)
+            exec_output += MicroLoadInitiateAcc.subst(iop)
+            exec_output += MicroLoadCompleteAcc.subst(iop)
+
+        base = LdStOp
+        if big:
+            base = BigLdStOp
+        class LoadOp(base):
             def __init__(self, data, segment, addr, disp = 0,
                     dataSize="env.dataSize",
                     addressSize="env.addressSize",
@@ -346,12 +393,15 @@ let {{
 
         microopClasses[name] = LoadOp
 
-    defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);')
+    defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);',
+                            'Data = Mem & mask(dataSize * 8);')
     defineMicroLoadOp('Ldst', 'Data = merge(Data, Mem, dataSize);',
-            '(StoreCheck << FlagShift)')
+                              'Data = Mem & mask(dataSize * 8);',
+                      '(StoreCheck << FlagShift)')
     defineMicroLoadOp('Ldstl', 'Data = merge(Data, Mem, dataSize);',
-            '(StoreCheck << FlagShift) | Request::LOCKED')
-    defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;')
+                               'Data = Mem & mask(dataSize * 8);',
+                      '(StoreCheck << FlagShift) | Request::LOCKED')
+    defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;', big = False)
 
     def defineMicroStoreOp(mnemonic, code, \
             postCode="", completeCode="", mem_flags="0"):
diff --git a/src/arch/x86/isa/microops/limmop.isa b/src/arch/x86/isa/microops/limmop.isa
index 2871d5a89..ac78b090d 100644
--- a/src/arch/x86/isa/microops/limmop.isa
+++ b/src/arch/x86/isa/microops/limmop.isa
@@ -114,8 +114,16 @@ let {{
             self.dataSize = dataSize
 
         def getAllocator(self, microFlags):
-            allocator = '''new %(class_name)s(machInst, macrocodeBlock,
-                    %(flags)s, %(dest)s, %(imm)s, %(dataSize)s)''' % {
+            allocString = '''
+                (%(dataSize)s >= 4) ?
+                    (StaticInstPtr)(new %(class_name)sBig(machInst,
+                        macrocodeBlock, %(flags)s, %(dest)s, %(imm)s,
+                        %(dataSize)s)) :
+                    (StaticInstPtr)(new %(class_name)s(machInst,
+                        macrocodeBlock, %(flags)s, %(dest)s, %(imm)s,
+                        %(dataSize)s))
+            '''
+            allocator = allocString % {
                 "class_name" : self.className,
                 "mnemonic" : self.mnemonic,
                 "flags" : self.microFlagsText(microFlags),
@@ -152,12 +160,15 @@ let {{
 
 let {{
     # Build up the all register version of this micro op
-    iop = InstObjParams("limm", "Limm", 'X86MicroopBase',
-            {"code" : "DestReg = merge(DestReg, imm, dataSize);"})
-    header_output += MicroLimmOpDeclare.subst(iop)
-    decoder_output += MicroLimmOpConstructor.subst(iop)
-    decoder_output += MicroLimmOpDisassembly.subst(iop)
-    exec_output += MicroLimmOpExecute.subst(iop)
+    iops = [InstObjParams("limm", "Limm", 'X86MicroopBase',
+            {"code" : "DestReg = merge(DestReg, imm, dataSize);"}),
+            InstObjParams("limm", "LimmBig", 'X86MicroopBase',
+            {"code" : "DestReg = imm & mask(dataSize * 8);"})]
+    for iop in iops:
+        header_output += MicroLimmOpDeclare.subst(iop)
+        decoder_output += MicroLimmOpConstructor.subst(iop)
+        decoder_output += MicroLimmOpDisassembly.subst(iop)
+        exec_output += MicroLimmOpExecute.subst(iop)
 
     iop = InstObjParams("lfpimm", "Lfpimm", 'X86MicroopBase',
             {"code" : "FpDestReg.uqw = imm"})
diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa
index ccfcb3a69..e2a51c127 100644
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@@ -51,6 +51,8 @@ def template MicroRegOpExecute {{
             %(op_decl)s;
             %(op_rd)s;
 
+            IntReg result M5_VAR_USED;
+
             if(%(cond_check)s)
             {
                 %(code)s;
@@ -79,6 +81,8 @@ def template MicroRegOpImmExecute {{
             %(op_decl)s;
             %(op_rd)s;
 
+            IntReg result M5_VAR_USED;
+
             if(%(cond_check)s)
             {
                 %(code)s;
@@ -224,8 +228,8 @@ let {{
             MicroRegOpExecute)
 
     class RegOpMeta(type):
-        def buildCppClasses(self, name, Name, suffix, \
-                code, flag_code, cond_check, else_code, cond_control_flag_init):
+        def buildCppClasses(self, name, Name, suffix, code, big_code, \
+                flag_code, cond_check, else_code, cond_control_flag_init):
 
             # Globals to stick the output in
             global header_output
@@ -235,11 +239,13 @@ let {{
             # Stick all the code together so it can be searched at once
             allCode = "|".join((code, flag_code, cond_check, else_code, 
                                 cond_control_flag_init))
+            allBigCode = "|".join((big_code, flag_code, cond_check, else_code,
+                                   cond_control_flag_init))
 
             # If op2 is used anywhere, make register and immediate versions
             # of this code.
             matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
-            match = matcher.search(allCode)
+            match = matcher.search(allCode + allBigCode)
             if match:
                 typeQual = ""
                 if match.group("typeQual"):
@@ -247,6 +253,7 @@ let {{
                 src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual)
                 self.buildCppClasses(name, Name, suffix,
                         matcher.sub(src2_name, code),
+                        matcher.sub(src2_name, big_code),
                         matcher.sub(src2_name, flag_code),
                         matcher.sub(src2_name, cond_check),
                         matcher.sub(src2_name, else_code),
@@ -254,6 +261,7 @@ let {{
                 imm_name = "%simm8" % match.group("prefix")
                 self.buildCppClasses(name + "i", Name, suffix + "Imm",
                         matcher.sub(imm_name, code),
+                        matcher.sub(imm_name, big_code),
                         matcher.sub(imm_name, flag_code),
                         matcher.sub(imm_name, cond_check),
                         matcher.sub(imm_name, else_code),
@@ -264,27 +272,32 @@ let {{
             # a version without it and fix up this version to use it.
             if flag_code != "" or cond_check != "true":
                 self.buildCppClasses(name, Name, suffix,
-                        code, "", "true", else_code, "")
+                        code, big_code, "", "true", else_code, "")
                 suffix = "Flags" + suffix
 
             # If psrc1 or psrc2 is used, we need to actually insert code to
             # compute it.
-            matcher = re.compile("(?<!\w)psrc1(?!\w)")
-            if matcher.search(allCode):
-                code = "uint64_t psrc1 = pick(SrcReg1, 0, dataSize);" + code
-            matcher = re.compile("(?<!\w)psrc2(?!\w)")
-            if matcher.search(allCode):
-                code = "uint64_t psrc2 = pick(SrcReg2, 1, dataSize);" + code
-            # Also make available versions which do sign extension
-            matcher = re.compile("(?<!\w)spsrc1(?!\w)")
-            if matcher.search(allCode):
-                code = "int64_t spsrc1 = signedPick(SrcReg1, 0, dataSize);" + code
-            matcher = re.compile("(?<!\w)spsrc2(?!\w)")
-            if matcher.search(allCode):
-                code = "int64_t spsrc2 = signedPick(SrcReg2, 1, dataSize);" + code
-            matcher = re.compile("(?<!\w)simm8(?!\w)")
-            if matcher.search(allCode):
-                code = "int8_t simm8 = imm8;" + code
+            for (big, all) in ((False, allCode), (True, allBigCode)):
+                prefix = ""
+                for (rex, decl) in (
+                        ("(?<!\w)psrc1(?!\w)",
+                         "uint64_t psrc1 = pick(SrcReg1, 0, dataSize);"),
+                        ("(?<!\w)psrc2(?!\w)",
+                         "uint64_t psrc2 = pick(SrcReg2, 1, dataSize);"),
+                        ("(?<!\w)spsrc1(?!\w)",
+                         "int64_t spsrc1 = signedPick(SrcReg1, 0, dataSize);"),
+                        ("(?<!\w)spsrc2(?!\w)",
+                         "int64_t spsrc2 = signedPick(SrcReg2, 1, dataSize);"),
+                        ("(?<!\w)simm8(?!\w)",
+                         "int8_t simm8 = imm8;")):
+                    matcher = re.compile(rex)
+                    if matcher.search(all):
+                        prefix += decl + "\n"
+                if big:
+                    if big_code != "":
+                        big_code = prefix + big_code
+                else:
+                    code = prefix + code
 
             base = "X86ISA::RegOp"
 
@@ -297,17 +310,26 @@ let {{
                 templates = immTemplates
 
             # Get everything ready for the substitution
-            iop = InstObjParams(name, Name + suffix, base,
+            iops = [InstObjParams(name, Name + suffix, base,
                     {"code" : code,
                      "flag_code" : flag_code,
                      "cond_check" : cond_check,
                      "else_code" : else_code,
-                     "cond_control_flag_init": cond_control_flag_init})
+                     "cond_control_flag_init" : cond_control_flag_init})]
+            if big_code != "":
+                iops += [InstObjParams(name, Name + suffix + "Big", base,
+                         {"code" : big_code,
+                          "flag_code" : flag_code,
+                          "cond_check" : cond_check,
+                          "else_code" : else_code,
+                          "cond_control_flag_init" :
+                              cond_control_flag_init})]
 
             # Generate the actual code (finally!)
-            header_output += templates[0].subst(iop)
-            decoder_output += templates[1].subst(iop)
-            exec_output += templates[2].subst(iop)
+            for iop in iops:
+                header_output += templates[0].subst(iop)
+                decoder_output += templates[1].subst(iop)
+                exec_output += templates[2].subst(iop)
 
 
         def __new__(mcls, Name, bases, dict):
@@ -322,14 +344,16 @@ let {{
                 cls.className = Name
                 cls.base_mnemonic = name
                 code = cls.code
+                big_code = cls.big_code
                 flag_code = cls.flag_code
                 cond_check = cls.cond_check
                 else_code = cls.else_code
                 cond_control_flag_init = cls.cond_control_flag_init
 
                 # Set up the C++ classes
-                mcls.buildCppClasses(cls, name, Name, "", code, flag_code,
-                        cond_check, else_code, cond_control_flag_init)
+                mcls.buildCppClasses(cls, name, Name, "", code, big_code,
+                        flag_code, cond_check, else_code,
+                        cond_control_flag_init)
 
                 # Hook into the microassembler dict
                 global microopClasses
@@ -352,6 +376,7 @@ let {{
         abstract = True
 
         # Default template parameter values
+        big_code = ""
         flag_code = ""
         cond_check = "true"
         else_code = ";"
@@ -372,26 +397,48 @@ let {{
                 self.className += "Flags"
 
         def getAllocator(self, microFlags):
-            className = self.className
-            if self.mnemonic == self.base_mnemonic + 'i':
-                className += "Imm"
-            allocator = '''new %(class_name)s(machInst, macrocodeBlock,
-                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
-                    %(dataSize)s, %(ext)s)''' % {
-                "class_name" : className,
-                "flags" : self.microFlagsText(microFlags),
-                "src1" : self.src1, "op2" : self.op2,
-                "dest" : self.dest,
-                "dataSize" : self.dataSize,
-                "ext" : self.ext}
-            return allocator
+            if self.big_code != "":
+                className = self.className
+                if self.mnemonic == self.base_mnemonic + 'i':
+                    className += "Imm"
+                allocString = '''
+                    (%(dataSize)s >= 4) ?
+                        (StaticInstPtr)(new %(class_name)sBig(machInst,
+                            macrocodeBlock, %(flags)s, %(src1)s, %(op2)s,
+                            %(dest)s, %(dataSize)s, %(ext)s)) :
+                        (StaticInstPtr)(new %(class_name)s(machInst,
+                            macrocodeBlock, %(flags)s, %(src1)s, %(op2)s,
+                            %(dest)s, %(dataSize)s, %(ext)s))
+                    '''
+                allocator = allocString % {
+                    "class_name" : className,
+                    "flags" : self.microFlagsText(microFlags),
+                    "src1" : self.src1, "op2" : self.op2,
+                    "dest" : self.dest,
+                    "dataSize" : self.dataSize,
+                    "ext" : self.ext}
+                return allocator
+            else:
+                className = self.className
+                if self.mnemonic == self.base_mnemonic + 'i':
+                    className += "Imm"
+                allocator = '''new %(class_name)s(machInst, macrocodeBlock,
+                        %(flags)s, %(src1)s, %(op2)s, %(dest)s,
+                        %(dataSize)s, %(ext)s)''' % {
+                    "class_name" : className,
+                    "flags" : self.microFlagsText(microFlags),
+                    "src1" : self.src1, "op2" : self.op2,
+                    "dest" : self.dest,
+                    "dataSize" : self.dataSize,
+                    "ext" : self.ext}
+                return allocator
 
     class LogicRegOp(RegOp):
         abstract = True
         flag_code = '''
             //Don't have genFlags handle the OF or CF bits
             uint64_t mask = CFBit | ECFBit | OFBit;
-            ccFlagBits = genFlags(ccFlagBits, ext & ~mask, DestReg, psrc1, op2);
+            ccFlagBits = genFlags(ccFlagBits, ext & ~mask, result, psrc1, op2);
             //If a logic microop wants to set these, it wants to set them to 0.
             ccFlagBits &= ~(CFBit & ext);
             ccFlagBits &= ~(ECFBit & ext);
@@ -401,12 +448,12 @@ let {{
     class FlagRegOp(RegOp):
         abstract = True
         flag_code = \
-            "ccFlagBits = genFlags(ccFlagBits, ext, DestReg, psrc1, op2);"
+            "ccFlagBits = genFlags(ccFlagBits, ext, result, psrc1, op2);"
 
     class SubRegOp(RegOp):
         abstract = True
         flag_code = \
-            "ccFlagBits = genFlags(ccFlagBits, ext, DestReg, psrc1, ~op2, true);"
+            "ccFlagBits = genFlags(ccFlagBits, ext, result, psrc1, ~op2, true);"
 
     class CondRegOp(RegOp):
         abstract = True
@@ -428,31 +475,44 @@ let {{
                     src1, src2, flags, dataSize)
 
     class Add(FlagRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 + op2, dataSize);'
+        code = 'DestReg = merge(DestReg, result = (psrc1 + op2), dataSize);'
+        big_code = 'DestReg = result = (psrc1 + op2) & mask(dataSize * 8);'
 
     class Or(LogicRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 | op2, dataSize);'
+        code = 'DestReg = merge(DestReg, result = (psrc1 | op2), dataSize);'
+        big_code = 'DestReg = result = (psrc1 | op2) & mask(dataSize * 8);'
 
     class Adc(FlagRegOp):
         code = '''
             CCFlagBits flags = ccFlagBits;
-            DestReg = merge(DestReg, psrc1 + op2 + flags.cf, dataSize);
+            DestReg = merge(DestReg, result = (psrc1 + op2 + flags.cf), dataSize);
+            '''
+        big_code = '''
+            CCFlagBits flags = ccFlagBits;
+            DestReg = result = (psrc1 + op2 + flags.cf) & mask(dataSize * 8);
             '''
 
     class Sbb(SubRegOp):
         code = '''
             CCFlagBits flags = ccFlagBits;
-            DestReg = merge(DestReg, psrc1 - op2 - flags.cf, dataSize);
+            DestReg = merge(DestReg, result = (psrc1 - op2 - flags.cf), dataSize);
+            '''
+        big_code = '''
+            CCFlagBits flags = ccFlagBits;
+            DestReg = result = (psrc1 - op2 - flags.cf) & mask(dataSize * 8);
             '''
 
     class And(LogicRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 & op2, dataSize)'
+        code = 'DestReg = merge(DestReg, result = (psrc1 & op2), dataSize)'
+        big_code = 'DestReg = result = (psrc1 & op2) & mask(dataSize * 8)'
 
     class Sub(SubRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 - op2, dataSize)'
+        code = 'DestReg = merge(DestReg, result = (psrc1 - op2), dataSize)'
+        big_code = 'DestReg = result = (psrc1 - op2) & mask(dataSize * 8)'
 
     class Xor(LogicRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 ^ op2, dataSize)'
+        code = 'DestReg = merge(DestReg, result = (psrc1 ^ op2), dataSize)'
+        big_code = 'DestReg = result = (psrc1 ^ op2) & mask(dataSize * 8)'
 
     class Mul1s(WrRegOp):
         code = '''
@@ -505,6 +565,7 @@ let {{
 
     class Mulel(RdRegOp):
         code = 'DestReg = merge(SrcReg1, ProdLow, dataSize);'
+        big_code = 'DestReg = ProdLow & mask(dataSize * 8);'
 
     class Muleh(RdRegOp):
         def __init__(self, dest, src1=None, flags=None, dataSize="env.dataSize"):
@@ -513,6 +574,7 @@ let {{
             super(RdRegOp, self).__init__(dest, src1, \
                     "InstRegIndex(NUM_INTREGS)", flags, dataSize)
         code = 'DestReg = merge(SrcReg1, ProdHi, dataSize);'
+        big_code = 'DestReg = ProdHi & mask(dataSize * 8);'
 
     # One or two bit divide
     class Div1(WrRegOp):
@@ -540,7 +602,7 @@ let {{
 
     # Step divide
     class Div2(RegOp):
-        code = '''
+        divCode = '''
             uint64_t dividend = Remainder;
             uint64_t divisor = Divisor;
             uint64_t quotient = Quotient;
@@ -587,11 +649,13 @@ let {{
                 }
             }
             //Keep track of how many bits there are still to pull in.
-            DestReg = merge(DestReg, remaining, dataSize);
+            %s
             //Record the final results
             Remainder = remainder;
             Quotient = quotient;
         '''
+        code = divCode % "DestReg = merge(DestReg, remaining, dataSize);"
+        big_code = divCode % "DestReg = remaining & mask(dataSize * 8);"
         flag_code = '''
             if (remaining == 0)
                 ccFlagBits = ccFlagBits | (ext & EZFBit);
@@ -601,9 +665,11 @@ let {{
 
     class Divq(RdRegOp):
         code = 'DestReg = merge(SrcReg1, Quotient, dataSize);'
+        big_code = 'DestReg = Quotient & mask(dataSize * 8);'
 
     class Divr(RdRegOp):
         code = 'DestReg = merge(SrcReg1, Remainder, dataSize);'
+        big_code = 'DestReg = Remainder & mask(dataSize * 8);'
 
     class Mov(CondRegOp):
         code = 'DestReg = merge(SrcReg1, op2, dataSize)'
@@ -616,6 +682,10 @@ let {{
             uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             DestReg = merge(DestReg, psrc1 << shiftAmt, dataSize);
             '''
+        big_code = '''
+            uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
+            DestReg = (psrc1 << shiftAmt) & mask(dataSize * 8);
+            '''
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
@@ -641,14 +711,19 @@ let {{
         '''
 
     class Srl(RegOp):
+        # Because what happens to the bits shift -in- on a right shift
+        # is not defined in the C/C++ standard, we have to mask them out
+        # to be sure they're zero.
         code = '''
             uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
-            // Because what happens to the bits shift -in- on a right shift
-            // is not defined in the C/C++ standard, we have to mask them out
-            // to be sure they're zero.
             uint64_t logicalMask = mask(dataSize * 8 - shiftAmt);
             DestReg = merge(DestReg, (psrc1 >> shiftAmt) & logicalMask, dataSize);
             '''
+        big_code = '''
+            uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
+            uint64_t logicalMask = mask(dataSize * 8 - shiftAmt);
+            DestReg = (psrc1 >> shiftAmt) & logicalMask;
+            '''
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
@@ -671,15 +746,21 @@ let {{
         '''
 
     class Sra(RegOp):
+        # Because what happens to the bits shift -in- on a right shift
+        # is not defined in the C/C++ standard, we have to sign extend
+        # them manually to be sure.
         code = '''
             uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
-            // Because what happens to the bits shift -in- on a right shift
-            // is not defined in the C/C++ standard, we have to sign extend
-            // them manually to be sure.
             uint64_t arithMask = (shiftAmt == 0) ? 0 :
                 -bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt);
             DestReg = merge(DestReg, (psrc1 >> shiftAmt) | arithMask, dataSize);
             '''
+        big_code = '''
+            uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
+            uint64_t arithMask = (shiftAmt == 0) ? 0 :
+                -bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt);
+            DestReg = ((psrc1 >> shiftAmt) | arithMask) & mask(dataSize * 8);
+            '''
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
@@ -704,13 +785,11 @@ let {{
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t realShiftAmt = shiftAmt % (dataSize * 8);
-            if(realShiftAmt)
-            {
+            if (realShiftAmt) {
                 uint64_t top = psrc1 << (dataSize * 8 - realShiftAmt);
                 uint64_t bottom = bits(psrc1, dataSize * 8, realShiftAmt);
                 DestReg = merge(DestReg, top | bottom, dataSize);
-            }
-            else
+            } else
                 DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
@@ -739,16 +818,14 @@ let {{
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1);
-            if(realShiftAmt)
-            {
+            if (realShiftAmt) {
                 CCFlagBits flags = ccFlagBits;
                 uint64_t top = flags.cf << (dataSize * 8 - realShiftAmt);
                 if (realShiftAmt > 1)
                     top |= psrc1 << (dataSize * 8 - realShiftAmt + 1);
                 uint64_t bottom = bits(psrc1, dataSize * 8 - 1, realShiftAmt);
                 DestReg = merge(DestReg, top | bottom, dataSize);
-            }
-            else
+            } else
                 DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
@@ -780,14 +857,12 @@ let {{
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t realShiftAmt = shiftAmt % (dataSize * 8);
-            if(realShiftAmt)
-            {
+            if (realShiftAmt) {
                 uint64_t top = psrc1 << realShiftAmt;
                 uint64_t bottom =
                     bits(psrc1, dataSize * 8 - 1, dataSize * 8 - realShiftAmt);
                 DestReg = merge(DestReg, top | bottom, dataSize);
-            }
-            else
+            } else
                 DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
@@ -816,8 +891,7 @@ let {{
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1);
-            if(realShiftAmt)
-            {
+            if (realShiftAmt) {
                 CCFlagBits flags = ccFlagBits;
                 uint64_t top = psrc1 << realShiftAmt;
                 uint64_t bottom = flags.cf << (realShiftAmt - 1);
@@ -826,8 +900,7 @@ let {{
                         bits(psrc1, dataSize * 8 - 1,
                                    dataSize * 8 - realShiftAmt + 1);
                 DestReg = merge(DestReg, top | bottom, dataSize);
-            }
-            else
+            } else
                 DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
@@ -853,10 +926,10 @@ let {{
         '''
 
     class Sld(RegOp):
-        code = '''
+        sldCode = '''
             uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t dataBits = dataSize * 8;
-            uint8_t realShiftAmt = shiftAmt % (2 * dataBits);
+            uint8_t realShiftAmt = shiftAmt %% (2 * dataBits);
             uint64_t result;
             if (realShiftAmt == 0) {
                 result = psrc1;
@@ -867,8 +940,10 @@ let {{
                 result = (DoubleBits << (realShiftAmt - dataBits)) |
                          (psrc1 >> (2 * dataBits - realShiftAmt));
             }
-            DestReg = merge(DestReg, result, dataSize);
+            %s
             '''
+        code = sldCode % "DestReg = merge(DestReg, result, dataSize);"
+        big_code = sldCode % "DestReg = result & mask(dataSize * 8);"
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
@@ -899,10 +974,10 @@ let {{
         '''
 
     class Srd(RegOp):
-        code = '''
+        srdCode = '''
             uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t dataBits = dataSize * 8;
-            uint8_t realShiftAmt = shiftAmt % (2 * dataBits);
+            uint8_t realShiftAmt = shiftAmt %% (2 * dataBits);
             uint64_t result;
             if (realShiftAmt == 0) {
                 result = psrc1;
@@ -919,8 +994,10 @@ let {{
                           logicalMask) |
                          (psrc1 << (2 * dataBits - realShiftAmt));
             }
-            DestReg = merge(DestReg, result, dataSize);
+            %s
             '''
+        code = srdCode % "DestReg = merge(DestReg, result, dataSize);"
+        big_code = srdCode % "DestReg = result & mask(dataSize * 8);"
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
@@ -986,6 +1063,12 @@ let {{
             ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
                                        (ccFlagBits & ~EZFBit);
             '''
+        big_code = '''
+            int flag = bits(ccFlagBits, imm8);
+            DestReg = flag & mask(dataSize * 8);
+            ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
+                                       (ccFlagBits & ~EZFBit);
+            '''
         def __init__(self, dest, imm, flags=None, \
                 dataSize="env.dataSize"):
             super(Ruflag, self).__init__(dest, \
@@ -1000,6 +1083,14 @@ let {{
             ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
                                        (ccFlagBits & ~EZFBit);
             '''
+        big_code = '''
+            MiscReg flagMask = 0x3F7FDD5;
+            MiscReg flags = (nccFlagBits | ccFlagBits) & flagMask;
+            int flag = bits(flags, imm8);
+            DestReg = flag & mask(dataSize * 8);
+            ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
+                                       (ccFlagBits & ~EZFBit);
+            '''
         def __init__(self, dest, imm, flags=None, \
                 dataSize="env.dataSize"):
             super(Rflag, self).__init__(dest, \
@@ -1015,6 +1106,15 @@ let {{
             val = sign_bit ? (val | ~maskVal) : (val & maskVal);
             DestReg = merge(DestReg, val, dataSize);
             '''
+        big_code = '''
+            IntReg val = psrc1;
+            // Mask the bit position so that it wraps.
+            int bitPos = op2 & (dataSize * 8 - 1);
+            int sign_bit = bits(val, bitPos, bitPos);
+            uint64_t maskVal = mask(bitPos+1);
+            val = sign_bit ? (val | ~maskVal) : (val & maskVal);
+            DestReg = val & mask(dataSize * 8);
+            '''
         flag_code = '''
             if (!sign_bit)
                 ccFlagBits = ccFlagBits &
@@ -1026,12 +1126,13 @@ let {{
 
     class Zext(RegOp):
         code = 'DestReg = merge(DestReg, bits(psrc1, op2, 0), dataSize);'
+        big_code = 'DestReg = bits(psrc1, op2, 0) & mask(dataSize * 8);'
 
     class Rddr(RegOp):
         def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
             super(Rddr, self).__init__(dest, \
                     src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize)
-        code = '''
+        rdrCode = '''
             CR4 cr4 = CR4Op;
             DR7 dr7 = DR7Op;
             if ((cr4.de == 1 && (src1 == 4 || src1 == 5)) || src1 >= 8) {
@@ -1039,9 +1140,11 @@ let {{
             } else if (dr7.gd) {
                 fault = new DebugException();
             } else {
-                DestReg = merge(DestReg, DebugSrc1, dataSize);
+                %s
             }
         '''
+        code = rdrCode % "DestReg = merge(DestReg, DebugSrc1, dataSize);"
+        big_code = rdrCode % "DestReg = DebugSrc1 & mask(dataSize * 8);"
 
     class Wrdr(RegOp):
         def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
@@ -1066,13 +1169,15 @@ let {{
         def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
             super(Rdcr, self).__init__(dest, \
                     src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize)
-        code = '''
+        rdcrCode = '''
             if (src1 == 1 || (src1 > 4 && src1 < 8) || (src1 > 8)) {
                 fault = new InvalidOpcode();
             } else {
-                DestReg = merge(DestReg, ControlSrc1, dataSize);
+                %s
             }
         '''
+        code = rdcrCode % "DestReg = merge(DestReg, ControlSrc1, dataSize);"
+        big_code = rdcrCode % "DestReg = ControlSrc1 & mask(dataSize * 8);"
 
     class Wrcr(RegOp):
         def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
@@ -1154,24 +1259,20 @@ let {{
         '''
 
     class Rdbase(SegOp):
-        code = '''
-            DestReg = merge(DestReg, SegBaseSrc1, dataSize);
-        '''
+        code = 'DestReg = merge(DestReg, SegBaseSrc1, dataSize);'
+        big_code = 'DestReg = SegBaseSrc1 & mask(dataSize * 8);'
 
     class Rdlimit(SegOp):
-        code = '''
-            DestReg = merge(DestReg, SegLimitSrc1, dataSize);
-        '''
+        code = 'DestReg = merge(DestReg, SegLimitSrc1, dataSize);'
+        big_code = 'DestReg = SegLimitSrc1 & mask(dataSize * 8);'
 
     class RdAttr(SegOp):
-        code = '''
-            DestReg = merge(DestReg, SegAttrSrc1, dataSize);
-        '''
+        code = 'DestReg = merge(DestReg, SegAttrSrc1, dataSize);'
+        big_code = 'DestReg = SegAttrSrc1 & mask(dataSize * 8);'
 
     class Rdsel(SegOp):
-        code = '''
-            DestReg = merge(DestReg, SegSelSrc1, dataSize);
-        '''
+        code = 'DestReg = merge(DestReg, SegSelSrc1, dataSize);'
+        big_code = 'DestReg = SegSelSrc1 & mask(dataSize * 8);'
 
     class Rdval(RegOp):
         def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
diff --git a/src/arch/x86/microcode_rom.hh b/src/arch/x86/microcode_rom.hh
index f8ad410ce..84c503bb9 100644
--- a/src/arch/x86/microcode_rom.hh
+++ b/src/arch/x86/microcode_rom.hh
@@ -32,6 +32,7 @@
 #define __ARCH_X86_MICROCODE_ROM_HH__
 
 #include "arch/x86/emulenv.hh"
+#include "arch/x86/insts/badmicroop.hh"
 #include "cpu/static_inst.hh"
 
 namespace X86ISAInst
@@ -60,8 +61,10 @@ namespace X86ISAInst
         fetchMicroop(MicroPC microPC, StaticInstPtr curMacroop)
         {
             microPC = normalMicroPC(microPC);
-            assert(microPC < numMicroops);
-            return genFuncs[microPC](curMacroop);
+            if (microPC >= numMicroops)
+                return X86ISA::badMicroop;
+            else
+                return genFuncs[microPC](curMacroop);
         }
     };
 }
diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh
index c06ec18bc..5c67e28e1 100644
--- a/src/arch/x86/predecoder.hh
+++ b/src/arch/x86/predecoder.hh
@@ -225,7 +225,11 @@ namespace X86ISA
         {
             assert(emiIsReady);
             emiIsReady = false;
-            nextPC.npc(nextPC.pc() + getInstSize());
+            if (!nextPC.size()) {
+                Addr size = getInstSize();
+                nextPC.size(size);
+                nextPC.npc(nextPC.pc() + size);
+            }
             return emi;
         }
     };
diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh
index 5a208446a..4641141d3 100644
--- a/src/arch/x86/types.hh
+++ b/src/arch/x86/types.hh
@@ -222,7 +222,61 @@ namespace X86ISA
         return true;
     }
 
-    typedef GenericISA::UPCState<MachInst> PCState;
+    class PCState : public GenericISA::UPCState<MachInst>
+    {
+      protected:
+        typedef GenericISA::UPCState<MachInst> Base;
+
+        uint8_t _size;
+
+      public:
+        void
+        set(Addr val)
+        {
+            Base::set(val);
+            _size = 0;
+        }
+
+        PCState() {}
+        PCState(Addr val) { set(val); }
+
+        uint8_t size() const { return _size; }
+        void size(uint8_t newSize) { _size = newSize; }
+
+        bool
+        branching() const
+        {
+            return this->npc() != this->pc() + size();
+        }
+
+        void
+        advance()
+        {
+            Base::advance();
+            _size = 0;
+        }
+
+        void
+        uEnd()
+        {
+            Base::uEnd();
+            _size = 0;
+        }
+
+        void
+        serialize(std::ostream &os)
+        {
+            Base::serialize(os);
+            SERIALIZE_SCALAR(_size);
+        }
+
+        void
+        unserialize(Checkpoint *cp, const std::string &section)
+        {
+            Base::unserialize(cp, section);
+            UNSERIALIZE_SCALAR(_size);
+        }
+    };
 
     struct CoreSpecific {
         int core_type;
diff --git a/src/base/SConscript b/src/base/SConscript
index 2bb6b13ab..3f069bf9e 100644
--- a/src/base/SConscript
+++ b/src/base/SConscript
@@ -35,6 +35,7 @@ if env['CP_ANNOTATE']:
     Source('cp_annotate.cc')
 Source('atomicio.cc')
 Source('bigint.cc')
+Source('bitmap.cc')
 Source('callback.cc')
 Source('circlebuf.cc')
 Source('cprintf.cc')
diff --git a/src/base/bitmap.cc b/src/base/bitmap.cc
new file mode 100644
index 000000000..0d2a9302b
--- /dev/null
+++ b/src/base/bitmap.cc
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2010 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: William Wang
+ *          Ali Saidi
+ */
+
+#include <cassert>
+
+#include "base/bitmap.hh"
+#include "base/misc.hh"
+
+// bitmap class ctor
+Bitmap::Bitmap(VideoConvert::Mode _mode, uint16_t w, uint16_t h, uint8_t *d)
+    : mode(_mode), height(h), width(w), data(d),
+    vc(mode, VideoConvert::rgb8888, width, height)
+{
+}
+
+void
+Bitmap::write(std::ostream *bmp)
+{
+    assert(data);
+
+    // For further information see: http://en.wikipedia.org/wiki/BMP_file_format
+    Magic  magic = {{'B','M'}};
+    Header header = {sizeof(VideoConvert::Rgb8888) * width * height , 0, 0, 54};
+    Info   info = {sizeof(Info), width, height, 1,
+                   sizeof(VideoConvert::Rgb8888) * 8, 0,
+                   sizeof(VideoConvert::Rgb8888) * width * height, 1, 1, 0, 0};
+
+    bmp->write(reinterpret_cast<char*>(&magic),  sizeof(magic));
+    bmp->write(reinterpret_cast<char*>(&header), sizeof(header));
+    bmp->write(reinterpret_cast<char*>(&info),   sizeof(info));
+
+    uint8_t *tmp = vc.convert(data);
+    uint32_t *tmp32 = (uint32_t*)tmp;
+
+    // BMP start store data left to right starting with the bottom row
+    // so we need to do some creative flipping
+    for (int i = height - 1; i >= 0; i--)
+        for (int j = 0; j < width; j++)
+            bmp->write((char*)&tmp32[i * width + j], sizeof(uint32_t));
+
+    bmp->flush();
+
+    delete [] tmp;
+}
+
diff --git a/src/base/bitmap.hh b/src/base/bitmap.hh
new file mode 100644
index 000000000..9dfaa87a1
--- /dev/null
+++ b/src/base/bitmap.hh
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2010 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: William Wang
+ *          Ali Saidi
+ */
+#ifndef __BASE_BITMAP_HH__
+#define __BASE_BITMAP_HH__
+
+#include <fstream>
+
+#include "base/vnc/convert.hh"
+
+/**
+ * @file Declaration of a class that writes a frame buffer to a bitmap
+ */
+
+
+// write frame buffer into a bitmap picture
+class  Bitmap
+{
+  public:
+    /** Create a Bitmap creator that takes data in the given mode & size
+     * and outputs to an fstream
+     * @param mode the type of data that is being provided
+     * @param h the hight of the image
+     * @param w the width of the image
+     * @param d the data for the image in mode
+     */
+    Bitmap(VideoConvert::Mode mode, uint16_t w, uint16_t h, uint8_t *d);
+
+    /** Provide the converter with the data that should be output. It will be
+     * converted into rgb8888 and write out when write() is called.
+     * @param d the data
+     */
+    void rawData(uint8_t* d) { data = d; }
+
+    /** Write the provided data into the fstream provided
+     * @param bmp stream to write to
+     */
+    void write(std::ostream *bmp);
+
+  private:
+    VideoConvert::Mode mode;
+    uint16_t height;
+    uint16_t width;
+    uint8_t *data;
+
+    VideoConvert vc;
+
+    struct Magic
+    {
+        unsigned char magic_number[2];
+    };
+
+    struct Header
+    {
+        uint32_t size;
+        uint16_t reserved1;
+        uint16_t reserved2;
+        uint32_t offset;
+    };
+
+    struct Info
+    {
+        uint32_t Size;
+        uint32_t Width;
+        uint32_t Height;
+        uint16_t Planes;
+        uint16_t BitCount;
+        uint32_t Compression;
+        uint32_t SizeImage;
+        uint32_t XPelsPerMeter;
+        uint32_t YPelsPerMeter;
+        uint32_t ClrUsed;
+        uint32_t ClrImportant;
+    };
+};
+
+#endif // __BASE_BITMAP_HH__
+
diff --git a/src/base/compiler.hh b/src/base/compiler.hh
index 2c655af60..3315fb2f7 100644
--- a/src/base/compiler.hh
+++ b/src/base/compiler.hh
@@ -41,6 +41,7 @@
 #define M5_PRAGMA_NORETURN(x)
 #define M5_DUMMY_RETURN
 #define M5_VAR_USED __attribute__((unused))
+#define M5_ATTR_PACKED __attribute__ ((__packed__))
 #elif defined(__SUNPRO_CC)
 // this doesn't do anything with sun cc, but why not
 #define M5_ATTR_NORETURN  __sun_attr__((__noreturn__))
@@ -48,6 +49,7 @@
 #define DO_PRAGMA(x) _Pragma(#x)
 #define M5_VAR_USED
 #define M5_PRAGMA_NORETURN(x) DO_PRAGMA(does_not_return(x))
+#define M5_ATTR_PACKED __attribute__ ((__packed__))
 #else
 #error "Need to define compiler options in base/compiler.hh"
 #endif
diff --git a/src/base/vnc/SConscript b/src/base/vnc/SConscript
new file mode 100644
index 000000000..c92676555
--- /dev/null
+++ b/src/base/vnc/SConscript
@@ -0,0 +1,48 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2010 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: William Wang
+
+Import('*')
+
+if env['FULL_SYSTEM']:
+    SimObject('VncServer.py')
+    Source('vncserver.cc')
+    TraceFlag('VNC')
+
+Source('convert.cc')
+
diff --git a/src/base/vnc/VncServer.py b/src/base/vnc/VncServer.py
new file mode 100644
index 000000000..21eb3ed28
--- /dev/null
+++ b/src/base/vnc/VncServer.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2010 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: William Wang
+
+from m5.SimObject import SimObject
+from m5.params import *
+from m5.proxy import *
+
+class VncServer(SimObject):
+    type = 'VncServer'
+    port = Param.TcpPort(5900, "listen port")
+    number = Param.Int(0, "vnc client number")
diff --git a/src/base/vnc/convert.cc b/src/base/vnc/convert.cc
new file mode 100644
index 000000000..ea7a9b1c5
--- /dev/null
+++ b/src/base/vnc/convert.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ali Saidi
+ *          William Wang
+ */
+
+#include <cassert>
+
+#include "base/misc.hh"
+#include "base/vnc/convert.hh"
+
+/** @file
+ * This file provides conversion functions for a variety of video modes
+ */
+
+VideoConvert::VideoConvert(Mode input_mode, Mode output_mode, int _width,
+        int _height)
+    : inputMode(input_mode), outputMode(output_mode), width(_width),
+    height(_height)
+{
+    if (inputMode != bgr565 && inputMode != rgb565 && inputMode != bgr8888)
+        fatal("Only support converting from bgr565, rdb565, and bgr8888\n");
+
+    if (outputMode != rgb8888)
+        fatal("Only support converting to rgb8888\n");
+
+    assert(0 < height && height < 4000);
+    assert(0 < width && width < 4000);
+}
+
+VideoConvert::~VideoConvert()
+{
+}
+
+uint8_t*
+VideoConvert::convert(uint8_t *fb)
+{
+    switch (inputMode) {
+      case bgr565:
+        return m565rgb8888(fb, true);
+      case rgb565:
+        return m565rgb8888(fb, false);
+      case bgr8888:
+        return bgr8888rgb8888(fb);
+      default:
+        panic("Unimplemented Mode\n");
+    }
+}
+
+uint8_t*
+VideoConvert::m565rgb8888(uint8_t *fb, bool bgr)
+{
+    uint8_t *out = new uint8_t[area() * sizeof(uint32_t)];
+    uint32_t *out32 = (uint32_t*)out;
+
+    uint16_t *in16 = (uint16_t*)fb;
+
+    for (int x = 0; x < area(); x++) {
+        Bgr565 inpx;
+        Rgb8888 outpx = 0;
+
+        inpx = in16[x];
+
+        if (bgr) {
+            outpx.red = inpx.blue << 3;
+            outpx.green = inpx.green << 2;
+            outpx.blue = inpx.red << 3;
+        } else {
+            outpx.blue = inpx.blue << 3;
+            outpx.green = inpx.green << 2;
+            outpx.red = inpx.red << 3;
+        }
+
+        out32[x] = outpx;
+    }
+
+    return out;
+}
+
+
+uint8_t*
+VideoConvert::bgr8888rgb8888(uint8_t *fb)
+{
+    uint8_t *out = new uint8_t[area() * sizeof(uint32_t)];
+    uint32_t *out32 = (uint32_t*)out;
+
+    uint32_t *in32 = (uint32_t*)fb;
+
+    for (int x = 0; x < area(); x++) {
+        Rgb8888 outpx = 0;
+        Bgr8888 inpx;
+
+
+        inpx = in32[x];
+
+        outpx.red = inpx.blue;
+        outpx.green = inpx.green;
+        outpx.blue = inpx.red;
+
+        out32[x] = outpx;
+    }
+
+    return out;
+}
+
diff --git a/src/base/vnc/convert.hh b/src/base/vnc/convert.hh
new file mode 100644
index 000000000..68a21d677
--- /dev/null
+++ b/src/base/vnc/convert.hh
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ali Saidi
+ */
+
+/** @file
+ * This file provides conversion functions for a variety of video modes
+ */
+
+#ifndef __BASE_VNC_CONVERT_HH__
+#define __BASE_VNC_CONVERT_HH__
+
+#include "base/bitunion.hh"
+
+class VideoConvert
+{
+  public:
+    enum Mode {
+        UnknownMode,
+        bgr565,
+        rgb565,
+        bgr8888,
+        rgb8888,
+        rgb888,
+        bgr888,
+        bgr444,
+        bgr4444,
+        rgb444,
+        rgb4444,
+    };
+
+    // supports bpp32 RGB (bmp) and bpp16 5:6:5 mode BGR (linux)
+    BitUnion32(Rgb8888)
+        Bitfield<7,0> blue;
+        Bitfield<15,8> green;
+        Bitfield<23,16> red;
+        Bitfield<31,24> alpha;
+    EndBitUnion(Rgb8888)
+
+    BitUnion32(Bgr8888)
+        Bitfield<7,0> red;
+        Bitfield<15,8> green;
+        Bitfield<23,16> blue;
+        Bitfield<31,24> alpha;
+    EndBitUnion(Bgr8888)
+
+    BitUnion16(Bgr565)
+        Bitfield<4,0> red;
+        Bitfield<10,5> green;
+        Bitfield<15,11> blue;
+    EndBitUnion(Bgr565)
+
+    BitUnion16(Rgb565)
+        Bitfield<4,0> red;
+        Bitfield<10,5> green;
+        Bitfield<15,11> blue;
+    EndBitUnion(Rgb565)
+
+    /** Setup the converter with the given parameters
+     * @param input_mode type of data that will be provided
+     * @param output_mode type of data that should be output
+     * @param _width width of the frame buffer
+     * @param _height height of the frame buffer
+     */
+    VideoConvert(Mode input_mode, Mode output_mode, int _width, int _height);
+
+    /** Destructor
+     */
+    ~VideoConvert();
+
+    /** Convert the provided frame buffer data into the format specified in the
+     * constructor.
+     * @param fb the frame buffer to convert
+     * @return the converted data (user must free)
+     */
+    uint8_t* convert(uint8_t *fb);
+
+    /** Return the number of pixels that this buffer specifies
+     * @return number of pixels
+     */
+    int area() { return width * height; }
+
+  private:
+
+    /**
+     * Convert a bgr8888 input to rgb8888.
+     * @param fb the data to convert
+     * @return converted data
+     */
+    uint8_t* bgr8888rgb8888(uint8_t *fb);
+
+    /**
+     * Convert a bgr565 or rgb565 input to rgb8888.
+     * @param fb the data to convert
+     * @param bgr true if the input data is bgr565
+     * @return converted data
+     */
+    uint8_t* m565rgb8888(uint8_t *fb, bool bgr);
+
+    Mode inputMode;
+    Mode outputMode;
+    int width;
+    int height;
+};
+
+#endif // __BASE_VNC_CONVERT_HH__
+
diff --git a/src/base/vnc/vncserver.cc b/src/base/vnc/vncserver.cc
new file mode 100644
index 000000000..8936fa67b
--- /dev/null
+++ b/src/base/vnc/vncserver.cc
@@ -0,0 +1,703 @@
+/*
+ * Copyright (c) 2010 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ali Saidi
+ *          William Wang
+ */
+
+/** @file
+ * Implementiation of a VNC server
+ */
+
+#include <cstdio>
+
+#include <sys/ioctl.h>
+#include <sys/termios.h>
+#include <errno.h>
+#include <poll.h>
+#include <unistd.h>
+
+#include "base/atomicio.hh"
+#include "base/misc.hh"
+#include "base/socket.hh"
+#include "base/trace.hh"
+#include "base/vnc/vncserver.hh"
+#include "sim/byteswap.hh"
+
+using namespace std;
+
+/**
+ * Poll event for the listen socket
+ */
+VncServer::ListenEvent::ListenEvent(VncServer *vs, int fd, int e)
+    : PollEvent(fd, e), vncserver(vs)
+{
+}
+
+void
+VncServer::ListenEvent::process(int revent)
+{
+    vncserver->accept();
+}
+
+/**
+ * Poll event for the data socket
+ */
+VncServer::DataEvent::DataEvent(VncServer *vs, int fd, int e)
+    : PollEvent(fd, e), vncserver(vs)
+{
+}
+
+void
+VncServer::DataEvent::process(int revent)
+{
+    if (revent & POLLIN)
+        vncserver->data();
+    else if (revent & POLLNVAL)
+        vncserver->detach();
+}
+
+/**
+ * VncServer
+ */
+VncServer::VncServer(const Params *p)
+    : SimObject(p), listenEvent(NULL), dataEvent(NULL), number(p->number),
+      dataFd(-1), _videoWidth(1), _videoHeight(1), clientRfb(0), keyboard(NULL),
+      mouse(NULL), sendUpdate(false), videoMode(VideoConvert::UnknownMode),
+      vc(NULL)
+{
+    if (p->port)
+        listen(p->port);
+
+    curState = WaitForProtocolVersion;
+
+
+    // currently we only support this one pixel format
+    // unpacked 32bit rgb (rgb888 + 8 bits of nothing/alpha)
+    // keep it around for telling the client and making
+    // sure the client cooperates
+    pixelFormat.bpp = 32;
+    pixelFormat.depth = 24;
+    pixelFormat.bigendian = 0;
+    pixelFormat.truecolor = 1;
+    pixelFormat.redmax = 0xff;
+    pixelFormat.greenmax = 0xff;
+    pixelFormat.bluemax = 0xff;
+    pixelFormat.redshift = 16;
+    pixelFormat.greenshift = 8;
+    pixelFormat.blueshift = 0;
+
+
+    DPRINTF(VNC, "Vnc server created at port %d\n", p->port);
+}
+
+VncServer::~VncServer()
+{
+    if (dataFd != -1)
+        ::close(dataFd);
+
+    if (listenEvent)
+        delete listenEvent;
+
+    if (dataEvent)
+        delete dataEvent;
+}
+
+
+//socket creation and vnc client attach
+void
+VncServer::listen(int port)
+{
+    if (ListenSocket::allDisabled()) {
+        warn_once("Sockets disabled, not accepting vnc client connections");
+        return;
+    }
+
+    while (!listener.listen(port, true)) {
+        DPRINTF(VNC,
+                "can't bind address vnc server port %d in use PID %d\n",
+                port, getpid());
+        port++;
+    }
+
+    int p1, p2;
+    p2 = name().rfind('.') - 1;
+    p1 = name().rfind('.', p2);
+    ccprintf(cerr, "Listening for %s connection on port %d\n",
+             name().substr(p1 + 1, p2 - p1), port);
+
+    listenEvent = new ListenEvent(this, listener.getfd(), POLLIN);
+    pollQueue.schedule(listenEvent);
+}
+
+// attach a vnc client
+void
+VncServer::accept()
+{
+    if (!listener.islistening())
+        panic("%s: cannot accept a connection if not listening!", name());
+
+    int fd = listener.accept(true);
+    if (dataFd != -1) {
+        char message[] = "vnc server already attached!\n";
+        atomic_write(fd, message, sizeof(message));
+        ::close(fd);
+        return;
+    }
+
+    dataFd = fd;
+
+    // Send our version number to the client
+    write((uint8_t*)vncVersion(), strlen(vncVersion()));
+
+    // read the client response
+    dataEvent = new DataEvent(this, dataFd, POLLIN);
+    pollQueue.schedule(dataEvent);
+
+    inform("VNC client attached\n");
+}
+
+// data called by data event
+void
+VncServer::data()
+{
+    // We have new data, see if we can handle it
+    size_t len;
+    DPRINTF(VNC, "Vnc client message recieved\n");
+
+    switch (curState) {
+      case WaitForProtocolVersion:
+        checkProtocolVersion();
+        break;
+      case WaitForSecurityResponse:
+        checkSecurity();
+        break;
+      case WaitForClientInit:
+        // Don't care about shared, just need to read it out of the socket
+        uint8_t shared;
+        len = read(&shared);
+        assert(len == 1);
+
+        // Send our idea of the frame buffer
+        sendServerInit();
+
+        break;
+      case NormalPhase:
+        uint8_t message_type;
+        len = read(&message_type);
+        if (!len) {
+            detach();
+            return;
+        }
+        assert(len == 1);
+
+        switch (message_type) {
+          case ClientSetPixelFormat:
+            setPixelFormat();
+            break;
+          case ClientSetEncodings:
+            setEncodings();
+            break;
+          case ClientFrameBufferUpdate:
+             requestFbUpdate();
+             break;
+          case ClientKeyEvent:
+             recvKeyboardInput();
+             break;
+          case ClientPointerEvent:
+             recvPointerInput();
+             break;
+          case ClientCutText:
+             recvCutText();
+             break;
+          default:
+             panic("Unimplemented message type recv from client: %d\n",
+                     message_type);
+             break;
+        }
+        break;
+      default:
+        panic("Unknown vnc server state\n");
+    }
+}
+
+
+// read from socket
+size_t
+VncServer::read(uint8_t *buf, size_t len)
+{
+    if (dataFd < 0)
+        panic("vnc not properly attached.\n");
+
+    size_t ret;
+    do {
+        ret = ::read(dataFd, buf, len);
+    } while (ret == -1 && errno == EINTR);
+
+
+    if (ret <= 0){
+        DPRINTF(VNC, "Read failed.\n");
+        detach();
+        return 0;
+    }
+
+    return ret;
+}
+
+size_t
+VncServer::read1(uint8_t *buf, size_t len)
+{
+    size_t read_len M5_VAR_USED;
+    read_len = read(buf + 1, len - 1);
+    assert(read_len == len - 1);
+    return read_len;
+}
+
+
+template<typename T>
+size_t
+VncServer::read(T* val)
+{
+    return read((uint8_t*)val, sizeof(T));
+}
+
+// write to socket
+size_t
+VncServer::write(const uint8_t *buf, size_t len)
+{
+    if (dataFd < 0)
+        panic("Vnc client not properly attached.\n");
+
+    ssize_t ret;
+    ret = atomic_write(dataFd, buf, len);
+
+    if (ret < len)
+        detach();
+
+    return ret;
+}
+
+template<typename T>
+size_t
+VncServer::write(T* val)
+{
+    return write((uint8_t*)val, sizeof(T));
+}
+
+size_t
+VncServer::write(const char* str)
+{
+    return write((uint8_t*)str, strlen(str));
+}
+
+// detach a vnc client
+void
+VncServer::detach()
+{
+    if (dataFd != -1) {
+        ::close(dataFd);
+        dataFd = -1;
+    }
+
+    if (!dataEvent || !dataEvent->queued())
+        return;
+
+    pollQueue.remove(dataEvent);
+    delete dataEvent;
+    dataEvent = NULL;
+    curState = WaitForProtocolVersion;
+
+    inform("VNC client detached\n");
+    DPRINTF(VNC, "detach vnc client %d\n", number);
+}
+
+void
+VncServer::sendError(const char* error_msg)
+{
+   uint32_t len = strlen(error_msg);
+   write(&len);
+   write(error_msg);
+}
+
+void
+VncServer::checkProtocolVersion()
+{
+    assert(curState == WaitForProtocolVersion);
+
+    size_t len M5_VAR_USED;
+    char version_string[13];
+
+    // Null terminate the message so it's easier to work with
+    version_string[12] = 0;
+
+    len = read((uint8_t*)version_string, 12);
+    assert(len == 12);
+
+    uint32_t major, minor;
+
+    // Figure out the major/minor numbers
+    if (sscanf(version_string, "RFB %03d.%03d\n", &major, &minor) != 2) {
+        warn(" Malformed protocol version %s\n", version_string);
+        sendError("Malformed protocol version\n");
+        detach();
+    }
+
+    DPRINTF(VNC, "Client request protocol version %d.%d\n", major, minor);
+
+    // If it's not 3.X we don't support it
+    if (major != 3 || minor < 2) {
+        warn("Unsupported VNC client version... disconnecting\n");
+        uint8_t err = AuthInvalid;
+        write(&err);
+        detach();
+    }
+    // Auth is different based on version number
+    if (minor < 7) {
+        uint32_t sec_type = htobe((uint32_t)AuthNone);
+        write(&sec_type);
+    } else {
+        uint8_t sec_cnt = 1;
+        uint8_t sec_type = htobe((uint8_t)AuthNone);
+        write(&sec_cnt);
+        write(&sec_type);
+    }
+
+    // Wait for client to respond
+    curState = WaitForSecurityResponse;
+}
+
+void
+VncServer::checkSecurity()
+{
+    assert(curState == WaitForSecurityResponse);
+
+    uint8_t security_type;
+    size_t len M5_VAR_USED = read(&security_type);
+
+    assert(len == 1);
+
+    if (security_type != AuthNone) {
+        warn("Unknown VNC security type\n");
+        sendError("Unknown security type\n");
+    }
+
+    DPRINTF(VNC, "Sending security auth OK\n");
+
+    uint32_t success = htobe(VncOK);
+    write(&success);
+    curState = WaitForClientInit;
+}
+
+void
+VncServer::sendServerInit()
+{
+    ServerInitMsg msg;
+
+    DPRINTF(VNC, "Sending server init message to client\n");
+
+    msg.fbWidth = htobe(videoWidth());
+    msg.fbHeight = htobe(videoHeight());
+
+    msg.px.bpp = htobe(pixelFormat.bpp);
+    msg.px.depth = htobe(pixelFormat.depth);
+    msg.px.bigendian = htobe(pixelFormat.bigendian);
+    msg.px.truecolor = htobe(pixelFormat.truecolor);
+    msg.px.redmax = htobe(pixelFormat.redmax);
+    msg.px.greenmax = htobe(pixelFormat.greenmax);
+    msg.px.bluemax = htobe(pixelFormat.bluemax);
+    msg.px.redshift = htobe(pixelFormat.redshift);
+    msg.px.greenshift = htobe(pixelFormat.greenshift);
+    msg.px.blueshift = htobe(pixelFormat.blueshift);
+    memset(msg.px.padding, 0, 3);
+    msg.namelen = 2;
+    msg.namelen = htobe(msg.namelen);
+    memcpy(msg.name, "M5", 2);
+
+    write(&msg);
+    curState = NormalPhase;
+}
+
+
+void
+VncServer::setPixelFormat()
+{
+    DPRINTF(VNC, "Received pixel format from client message\n");
+
+    PixelFormatMessage pfm;
+    read1((uint8_t*)&pfm, sizeof(PixelFormatMessage));
+
+    DPRINTF(VNC, " -- bpp = %d; depth = %d; be = %d\n", pfm.px.bpp,
+            pfm.px.depth, pfm.px.bigendian);
+    DPRINTF(VNC, " -- true color = %d red,green,blue max = %d,%d,%d\n",
+            pfm.px.truecolor, betoh(pfm.px.redmax), betoh(pfm.px.greenmax),
+                betoh(pfm.px.bluemax));
+    DPRINTF(VNC, " -- red,green,blue shift = %d,%d,%d\n", pfm.px.redshift,
+            pfm.px.greenshift, pfm.px.blueshift);
+
+    if (betoh(pfm.px.bpp) != pixelFormat.bpp ||
+        betoh(pfm.px.depth) != pixelFormat.depth ||
+        betoh(pfm.px.bigendian) != pixelFormat.bigendian ||
+        betoh(pfm.px.truecolor) != pixelFormat.truecolor ||
+        betoh(pfm.px.redmax) != pixelFormat.redmax ||
+        betoh(pfm.px.greenmax) != pixelFormat.greenmax ||
+        betoh(pfm.px.bluemax) != pixelFormat.bluemax ||
+        betoh(pfm.px.redshift) != pixelFormat.redshift ||
+        betoh(pfm.px.greenshift) != pixelFormat.greenshift ||
+        betoh(pfm.px.blueshift) != pixelFormat.blueshift)
+        fatal("VNC client doesn't support true color raw encoding\n");
+}
+
+void
+VncServer::setEncodings()
+{
+    DPRINTF(VNC, "Received supported encodings from client\n");
+
+    PixelEncodingsMessage pem;
+    read1((uint8_t*)&pem, sizeof(PixelEncodingsMessage));
+
+    pem.num_encodings = betoh(pem.num_encodings);
+
+    DPRINTF(VNC, " -- %d encoding present\n", pem.num_encodings);
+    supportsRawEnc = supportsResizeEnc = false;
+
+    for (int x = 0; x < pem.num_encodings; x++) {
+        int32_t encoding;
+        size_t len M5_VAR_USED;
+        len = read(&encoding);
+        assert(len == sizeof(encoding));
+        DPRINTF(VNC, " -- supports %d\n", betoh(encoding));
+
+        switch (betoh(encoding)) {
+          case EncodingRaw:
+            supportsRawEnc = true;
+            break;
+          case EncodingDesktopSize:
+            supportsResizeEnc = true;
+            break;
+        }
+    }
+
+    if (!supportsRawEnc)
+        fatal("VNC clients must always support raw encoding\n");
+}
+
+void
+VncServer::requestFbUpdate()
+{
+    DPRINTF(VNC, "Received frame buffer update request from client\n");
+
+    FrameBufferUpdateReq fbr;
+    read1((uint8_t*)&fbr, sizeof(FrameBufferUpdateReq));
+
+    fbr.x = betoh(fbr.x);
+    fbr.y = betoh(fbr.y);
+    fbr.width = betoh(fbr.width);
+    fbr.height = betoh(fbr.height);
+
+    DPRINTF(VNC, " -- x = %d y = %d w = %d h = %d\n", fbr.x, fbr.y, fbr.width,
+            fbr.height);
+
+    sendFrameBufferUpdate();
+}
+
+void
+VncServer::recvKeyboardInput()
+{
+    DPRINTF(VNC, "Received keyboard input from client\n");
+    KeyEventMessage kem;
+    read1((uint8_t*)&kem, sizeof(KeyEventMessage));
+
+    kem.key = betoh(kem.key);
+    DPRINTF(VNC, " -- received key code %d (%s)\n", kem.key, kem.down_flag ?
+            "down" : "up");
+
+    if (keyboard)
+        keyboard->keyPress(kem.key, kem.down_flag);
+}
+
+void
+VncServer::recvPointerInput()
+{
+    DPRINTF(VNC, "Received pointer input from client\n");
+    PointerEventMessage pem;
+
+    read1((uint8_t*)&pem, sizeof(PointerEventMessage));;
+
+    pem.x = betoh(pem.x);
+    pem.y = betoh(pem.y);
+    DPRINTF(VNC, " -- pointer at x = %d y = %d buttons = %#x\n", pem.x, pem.y,
+            pem.button_mask);
+
+    if (mouse)
+        mouse->mouseAt(pem.x, pem.y, pem.button_mask);
+}
+
+void
+VncServer::recvCutText()
+{
+    DPRINTF(VNC, "Received client copy buffer message\n");
+
+    ClientCutTextMessage cct;
+    read1((uint8_t*)&cct, sizeof(ClientCutTextMessage));
+
+    char str[1025];
+    size_t data_len = betoh(cct.length);
+    DPRINTF(VNC, "String length %d\n", data_len);
+    while (data_len > 0) {
+        size_t len;
+        size_t bytes_to_read = data_len > 1024 ? 1024 : data_len;
+        len = read((uint8_t*)&str, bytes_to_read);
+        str[bytes_to_read] = 0;
+        data_len -= len;
+        assert(data_len >= 0);
+        DPRINTF(VNC, "Buffer: %s\n", str);
+    }
+
+}
+
+
+void
+VncServer::sendFrameBufferUpdate()
+{
+
+    if (!clientRfb || dataFd <= 0 || curState != NormalPhase || !sendUpdate) {
+        DPRINTF(VNC, "NOT sending framebuffer update\n");
+        return;
+    }
+
+    assert(vc);
+
+    // The client will request data constantly, unless we throttle it
+    sendUpdate = false;
+
+    DPRINTF(VNC, "Sending framebuffer update\n");
+
+    FrameBufferUpdate fbu;
+    FrameBufferRect fbr;
+
+    fbu.type = ServerFrameBufferUpdate;
+    fbu.num_rects = 1;
+    fbr.x = 0;
+    fbr.y = 0;
+    fbr.width = videoWidth();
+    fbr.height = videoHeight();
+    fbr.encoding = EncodingRaw;
+
+    // fix up endian
+    fbu.num_rects = htobe(fbu.num_rects);
+    fbr.x = htobe(fbr.x);
+    fbr.y = htobe(fbr.y);
+    fbr.width = htobe(fbr.width);
+    fbr.height = htobe(fbr.height);
+    fbr.encoding = htobe(fbr.encoding);
+
+    // send headers to client
+    write(&fbu);
+    write(&fbr);
+
+    assert(clientRfb);
+
+    uint8_t *tmp = vc->convert(clientRfb);
+    write(tmp, videoWidth() * videoHeight() * sizeof(uint32_t));
+    delete [] tmp;
+
+}
+
+void
+VncServer::sendFrameBufferResized()
+{
+    assert(clientRfb && dataFd > 0 && curState == NormalPhase);
+    DPRINTF(VNC, "Sending framebuffer resize\n");
+
+    FrameBufferUpdate fbu;
+    FrameBufferRect fbr;
+
+    fbu.type = ServerFrameBufferUpdate;
+    fbu.num_rects = 1;
+    fbr.x = 0;
+    fbr.y = 0;
+    fbr.width = videoWidth();
+    fbr.height = videoHeight();
+    fbr.encoding = EncodingDesktopSize;
+
+    // fix up endian
+    fbu.num_rects = htobe(fbu.num_rects);
+    fbr.x = htobe(fbr.x);
+    fbr.y = htobe(fbr.y);
+    fbr.width = htobe(fbr.width);
+    fbr.height = htobe(fbr.height);
+    fbr.encoding = htobe(fbr.encoding);
+
+    // send headers to client
+    write(&fbu);
+    write(&fbr);
+
+    // No actual data is sent in this message
+}
+
+void
+VncServer::setFrameBufferParams(VideoConvert::Mode mode, int width, int height)
+{
+    DPRINTF(VNC, "Updating video params: mode: %d width: %d height: %d\n", mode,
+            width, height);
+
+    if (mode != videoMode || width != videoWidth() || height != videoHeight()) {
+        videoMode = mode;
+        _videoWidth = width;
+        _videoHeight = height;
+
+        if (vc)
+            delete vc;
+
+        vc = new VideoConvert(mode, VideoConvert::rgb8888, videoWidth(),
+                videoHeight());
+
+        if (dataFd > 0 && clientRfb && curState == NormalPhase) {
+            if (supportsResizeEnc)
+                sendFrameBufferResized();
+            else
+                // The frame buffer changed size and we can't update the client
+                detach();
+        }
+    }
+}
+
+// create the VNC server object
+VncServer *
+VncServerParams::create()
+{
+    return new VncServer(this);
+}
diff --git a/src/base/vnc/vncserver.hh b/src/base/vnc/vncserver.hh
new file mode 100644
index 000000000..23b097b11
--- /dev/null
+++ b/src/base/vnc/vncserver.hh
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) 2010 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ali Saidi
+ *          William Wang
+ */
+
+/** @file
+ * Declaration of a VNC server
+ */
+
+#ifndef __DEV_VNC_SERVER_HH__
+#define __DEV_VNC_SERVER_HH__
+
+#include <iostream>
+
+#include "base/circlebuf.hh"
+#include "base/pollevent.hh"
+#include "base/socket.hh"
+#include "base/vnc/convert.hh"
+#include "cpu/intr_control.hh"
+#include "sim/sim_object.hh"
+#include "params/VncServer.hh"
+
+/**
+ * A device that expects to receive input from the vnc server should derrive
+ * (through mulitple inheritence if necessary from VncKeyboard or VncMouse
+ * and call setKeyboard() or setMouse() respectively on the vnc server.
+ */
+class VncKeyboard
+{
+  public:
+    /**
+     * Called when the vnc server receives a key press event from the
+     * client.
+     * @param key the key passed is an x11 keysym
+     * @param down is the key now down or up?
+     */
+    virtual void keyPress(uint32_t key, bool down) = 0;
+};
+
+class VncMouse
+{
+  public:
+    /**
+     * called whenever the mouse moves or it's button state changes
+     * buttons is a simple mask with each button (0-8) corresponding to
+     * a bit position in the byte with 1 being down and 0 being up
+     * @param x the x position of the mouse
+     * @param y the y position of the mouse
+     * @param buttos the button state as described above
+     */
+    virtual void mouseAt(uint16_t x, uint16_t y, uint8_t buttons) = 0;
+};
+
+class VncServer : public SimObject
+{
+  public:
+
+    /**
+     * \defgroup VncConstants A set of constants and structs from the VNC spec
+     * @{
+     */
+    /** Authentication modes */
+    const static uint32_t AuthInvalid = 0;
+    const static uint32_t AuthNone    = 1;
+
+    /** Error conditions */
+    const static uint32_t VncOK   = 0;
+
+    /** Client -> Server message IDs */
+    enum ClientMessages {
+        ClientSetPixelFormat    = 0,
+        ClientSetEncodings      = 2,
+        ClientFrameBufferUpdate = 3,
+        ClientKeyEvent          = 4,
+        ClientPointerEvent      = 5,
+        ClientCutText           = 6
+    };
+
+    /** Server -> Client message IDs */
+    enum ServerMessages {
+        ServerFrameBufferUpdate     = 0,
+        ServerSetColorMapEntries    = 1,
+        ServerBell                  = 2,
+        ServerCutText               = 3
+    };
+
+    /** Encoding types */
+    enum EncodingTypes {
+        EncodingRaw         = 0,
+        EncodingCopyRect    = 1,
+        EncodingHextile     = 5,
+        EncodingDesktopSize = -223
+    };
+
+    /** keyboard/mouse support */
+    enum MouseEvents {
+        MouseLeftButton     = 0x1,
+        MouseRightButton    = 0x2,
+        MouseMiddleButton   = 0x4
+    };
+
+    const char* vncVersion() const
+    {
+        return "RFB 003.008\n";
+    }
+
+    enum ConnectionState {
+        WaitForProtocolVersion,
+        WaitForSecurityResponse,
+        WaitForClientInit,
+        InitializationPhase,
+        NormalPhase
+    };
+
+    struct PixelFormat {
+        uint8_t bpp;
+        uint8_t depth;
+        uint8_t bigendian;
+        uint8_t truecolor;
+        uint16_t redmax;
+        uint16_t greenmax;
+        uint16_t bluemax;
+        uint8_t redshift;
+        uint8_t greenshift;
+        uint8_t blueshift;
+        uint8_t padding[3];
+    } M5_ATTR_PACKED;
+
+    struct ServerInitMsg {
+        uint16_t fbWidth;
+        uint16_t fbHeight;
+        PixelFormat px;
+        uint32_t namelen;
+        char name[2]; // just to put M5 in here
+    } M5_ATTR_PACKED;
+
+    struct PixelFormatMessage {
+        uint8_t type;
+        uint8_t padding[3];
+        PixelFormat px;
+    } M5_ATTR_PACKED;
+
+    struct PixelEncodingsMessage {
+        uint8_t type;
+        uint8_t padding;
+        uint16_t num_encodings;
+    } M5_ATTR_PACKED;
+
+    struct FrameBufferUpdateReq {
+        uint8_t type;
+        uint8_t incremental;
+        uint16_t x;
+        uint16_t y;
+        uint16_t width;
+        uint16_t height;
+    } M5_ATTR_PACKED;
+
+    struct KeyEventMessage {
+        uint8_t type;
+        uint8_t down_flag;
+        uint8_t padding[2];
+        uint32_t key;
+    } M5_ATTR_PACKED;
+
+    struct PointerEventMessage {
+        uint8_t type;
+        uint8_t button_mask;
+        uint16_t x;
+        uint16_t y;
+    } M5_ATTR_PACKED;
+
+    struct ClientCutTextMessage {
+        uint8_t type;
+        uint8_t padding[3];
+        uint32_t length;
+    } M5_ATTR_PACKED;
+
+    struct FrameBufferUpdate {
+        uint8_t type;
+        uint8_t padding;
+        uint16_t num_rects;
+    } M5_ATTR_PACKED;
+
+    struct FrameBufferRect {
+        uint16_t x;
+        uint16_t y;
+        uint16_t width;
+        uint16_t height;
+        int32_t encoding;
+    } M5_ATTR_PACKED;
+
+    struct ServerCutText {
+        uint8_t type;
+        uint8_t padding[3];
+        uint32_t length;
+    } M5_ATTR_PACKED;
+
+    /** @} */
+
+  protected:
+    /** ListenEvent to accept a vnc client connection */
+    class ListenEvent: public PollEvent
+    {
+      protected:
+        VncServer *vncserver;
+
+      public:
+        ListenEvent(VncServer *vs, int fd, int e);
+        void process(int revent);
+    };
+
+    friend class ListenEvent;
+    ListenEvent *listenEvent;
+
+    /** DataEvent to read data from vnc */
+    class DataEvent: public PollEvent
+    {
+      protected:
+        VncServer *vncserver;
+
+      public:
+        DataEvent(VncServer *vs, int fd, int e);
+        void process(int revent);
+    };
+
+    friend class DataEvent;
+    DataEvent *dataEvent;
+
+    int number;
+    int dataFd; // data stream file describer
+
+    ListenSocket listener;
+
+    void listen(int port);
+    void accept();
+    void data();
+    void detach();
+
+  public:
+    typedef VncServerParams Params;
+    VncServer(const Params *p);
+    ~VncServer();
+
+    // RFB
+  protected:
+
+    /** The rfb prototol state the connection is in */
+    ConnectionState curState;
+
+    /** the width of the frame buffer we are sending to the client */
+    uint16_t _videoWidth;
+
+    /** the height of the frame buffer we are sending to the client */
+    uint16_t _videoHeight;
+
+    /** pointer to the actual data that is stored in the frame buffer device */
+    uint8_t* clientRfb;
+
+    /** The device to notify when we get key events */
+    VncKeyboard *keyboard;
+
+    /** The device to notify when we get mouse events */
+    VncMouse *mouse;
+
+    /** An update needs to be sent to the client. Without doing this the
+     * client will constantly request data that is pointless */
+    bool sendUpdate;
+
+    /** The one and only pixel format we support */
+    PixelFormat pixelFormat;
+
+    /** If the vnc client supports receiving raw data. It always should */
+    bool supportsRawEnc;
+
+    /** If the vnc client supports the desktop resize command */
+    bool supportsResizeEnc;
+
+    /** The mode of data we're getting frame buffer in */
+    VideoConvert::Mode videoMode;
+
+    /** The video converter that transforms data for us */
+    VideoConvert *vc;
+
+  protected:
+    /**
+     * vnc client Interface
+     */
+
+    /** Send an error message to the client
+     * @param error_msg text to send describing the error
+     */
+    void sendError(const char* error_msg);
+
+    /** Read some data from the client
+     * @param buf the data to read
+     * @param len the amount of data to read
+     * @return length read
+     */
+    size_t read(uint8_t *buf, size_t len);
+
+    /** Read len -1 bytes from the client into the buffer provided + 1
+     * assert that we read enough bytes. This function exists to handle
+     * reading all of the protocol structs above when we've already read
+     * the first byte which describes which one we're reading
+     * @param buf the address of the buffer to add one to and read data into
+     * @param len the amount of data  + 1 to read
+     * @return length read
+     */
+    size_t read1(uint8_t *buf, size_t len);
+
+
+    /** Templated version of the read function above to
+     * read simple data to the client
+     * @param val data to recv from the client
+     */
+    template <typename T> size_t read(T* val);
+
+
+    /** Write a buffer to the client.
+     * @param buf buffer to send
+     * @param len length of the buffer
+     * @return number of bytes sent
+     */
+    size_t write(const uint8_t *buf, size_t len);
+
+    /** Templated version of the write function above to
+     * write simple data to the client
+     * @param val data to send to the client
+     */
+    template <typename T> size_t write(T* val);
+
+    /** Send a string to the client
+     * @param str string to transmit
+     */
+    size_t write(const char* str);
+
+    /** Check the client's protocol verion for compatibility and send
+     * the security types we support
+     */
+    void checkProtocolVersion();
+
+    /** Check that the security exchange was successful
+     */
+    void checkSecurity();
+
+    /** Send client our idea about what the frame buffer looks like */
+    void sendServerInit();
+
+    /** Send an error message to the client when something goes wrong
+     * @param error_msg error to send
+     */
+    void sendError(std::string error_msg);
+
+    /** Send a updated frame buffer to the client.
+     * @todo this doesn't do anything smart and just sends the entire image
+     */
+    void sendFrameBufferUpdate();
+
+    /** Receive pixel foramt message from client and process it. */
+    void setPixelFormat();
+
+    /** Receive encodings message from client and process it. */
+    void setEncodings();
+
+    /** Receive message from client asking for updated frame buffer */
+    void requestFbUpdate();
+
+    /** Receive message from client providing new keyboard input */
+    void recvKeyboardInput();
+
+    /** Recv message from client providing new mouse movement or button click */
+    void recvPointerInput();
+
+    /**  Receive message from client that there is text in it's paste buffer.
+     * This is a no-op at the moment, but perhaps we would want to be able to
+     * paste it at some point.
+     */
+    void recvCutText();
+
+    /** Tell the client that the frame buffer resized. This happens when the
+     * simulated system changes video modes (E.g. X11 starts).
+     */
+    void sendFrameBufferResized();
+
+  public:
+    /** Set the address of the frame buffer we are going to show.
+     * To avoid copying, just have the display controller
+     * tell us where the data is instead of constanly copying it around
+     * @param rfb frame buffer that we're going to use
+     */
+    void
+    setFramebufferAddr(uint8_t* rfb)
+    {
+        clientRfb = rfb;
+    }
+
+    /** Set up the device that would like to receive notifications when keys are
+     * pressed in the vnc client keyboard
+     * @param _keyboard an object that derrives from VncKeyboard
+     */
+    void setKeyboard(VncKeyboard *_keyboard) { keyboard = _keyboard; }
+
+    /** Setup the device that would like to receive notifications when mouse
+     * movements or button presses are received from the vnc client.
+     * @param _mouse an object that derrives from VncMouse
+     */
+    void setMouse(VncMouse *_mouse) { mouse = _mouse; }
+
+    /** The frame buffer uses this call to notify the vnc server that
+     * the frame buffer has been updated and a new image needs to be sent to the
+     * client
+     */
+    void
+    setDirty()
+    {
+        sendUpdate = true;
+        sendFrameBufferUpdate();
+    }
+
+    /** What is the width of the screen we're displaying.
+     * This is used for pointer/tablet devices that need to know to calculate
+     * the correct value to send to the device driver.
+     * @return the width of the simulated screen
+     */
+    uint16_t videoWidth() { return _videoWidth; }
+
+    /** What is the height of the screen we're displaying.
+     * This is used for pointer/tablet devices that need to know to calculate
+     * the correct value to send to the device driver.
+     * @return the height of the simulated screen
+     */
+    uint16_t videoHeight() { return _videoHeight; }
+
+    /** Set the mode of the data the frame buffer will be sending us
+     * @param mode the mode
+     */
+    void setFrameBufferParams(VideoConvert::Mode mode, int width, int height);
+};
+
+#endif
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 0c566ec65..8b6662d70 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * Copyright (c) 2009 The University of Edinburgh
  * All rights reserved.
@@ -150,6 +162,29 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Finish a DTB address translation. */
     void finishTranslation(WholeTranslationState *state);
 
+    /** True if the DTB address translation has started. */
+    bool translationStarted;
+
+    /** True if the DTB address translation has completed. */
+    bool translationCompleted;
+
+    /**
+     * Returns true if the DTB address translation is being delayed due to a hw
+     * page table walk.
+     */
+    bool isTranslationDelayed() const
+    {
+        return (translationStarted && !translationCompleted);
+    }
+
+    /**
+     * Saved memory requests (needed when the DTB address translation is
+     * delayed due to a hw page table walk).
+     */
+    RequestPtr savedReq;
+    RequestPtr savedSreqLow;
+    RequestPtr savedSreqHigh;
+
     /** @todo: Consider making this private. */
   public:
     /** The sequence number of the instruction. */
@@ -835,33 +870,42 @@ BaseDynInst<Impl>::readBytes(Addr addr, uint8_t *data,
                              unsigned size, unsigned flags)
 {
     reqMade = true;
-    Request *req = new Request(asid, addr, size, flags, this->pc.instAddr(),
-                               thread->contextId(), threadNumber);
-
+    Request *req = NULL;
     Request *sreqLow = NULL;
     Request *sreqHigh = NULL;
 
-    // Only split the request if the ISA supports unaligned accesses.
-    if (TheISA::HasUnalignedMemAcc) {
-        splitRequest(req, sreqLow, sreqHigh);
-    }
-    initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
-
-    if (fault == NoFault) {
-        effAddr = req->getVaddr();
-        effAddrValid = true;
-        fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
+    if (reqMade && translationStarted) {
+        req = savedReq;
+        sreqLow = savedSreqLow;
+        sreqHigh = savedSreqHigh;
     } else {
-        // Commit will have to clean up whatever happened.  Set this
-        // instruction as executed.
-        this->setExecuted();
+        req = new Request(asid, addr, size, flags, this->pc.instAddr(),
+                          thread->contextId(), threadNumber);
+
+        // Only split the request if the ISA supports unaligned accesses.
+        if (TheISA::HasUnalignedMemAcc) {
+            splitRequest(req, sreqLow, sreqHigh);
+        }
+        initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
     }
 
-    if (fault != NoFault) {
-        // Return a fixed value to keep simulation deterministic even
-        // along misspeculated paths.
-        if (data)
-            bzero(data, size);
+    if (translationCompleted) {
+        if (fault == NoFault) {
+            effAddr = req->getVaddr();
+            effAddrValid = true;
+            fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
+        } else {
+            // Commit will have to clean up whatever happened.  Set this
+            // instruction as executed.
+            this->setExecuted();
+        }
+
+        if (fault != NoFault) {
+            // Return a fixed value to keep simulation deterministic even
+            // along misspeculated paths.
+            if (data)
+                bzero(data, size);
+        }
     }
 
     if (traceData) {
@@ -897,19 +941,26 @@ BaseDynInst<Impl>::writeBytes(uint8_t *data, unsigned size,
     }
 
     reqMade = true;
-    Request *req = new Request(asid, addr, size, flags, this->pc.instAddr(),
-                               thread->contextId(), threadNumber);
-
+    Request *req = NULL;
     Request *sreqLow = NULL;
     Request *sreqHigh = NULL;
 
-    // Only split the request if the ISA supports unaligned accesses.
-    if (TheISA::HasUnalignedMemAcc) {
-        splitRequest(req, sreqLow, sreqHigh);
+    if (reqMade && translationStarted) {
+        req = savedReq;
+        sreqLow = savedSreqLow;
+        sreqHigh = savedSreqHigh;
+    } else {
+        req = new Request(asid, addr, size, flags, this->pc.instAddr(),
+                          thread->contextId(), threadNumber);
+
+        // Only split the request if the ISA supports unaligned accesses.
+        if (TheISA::HasUnalignedMemAcc) {
+            splitRequest(req, sreqLow, sreqHigh);
+        }
+        initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write);
     }
-    initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write);
 
-    if (fault == NoFault) {
+    if (fault == NoFault && translationCompleted) {
         effAddr = req->getVaddr();
         effAddrValid = true;
         fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
@@ -953,6 +1004,8 @@ BaseDynInst<Impl>::initiateTranslation(RequestPtr req, RequestPtr sreqLow,
                                        RequestPtr sreqHigh, uint64_t *res,
                                        BaseTLB::Mode mode)
 {
+    translationStarted = true;
+
     if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) {
         WholeTranslationState *state =
             new WholeTranslationState(req, NULL, res, mode);
@@ -961,6 +1014,12 @@ BaseDynInst<Impl>::initiateTranslation(RequestPtr req, RequestPtr sreqLow,
         DataTranslation<BaseDynInst<Impl> > *trans =
             new DataTranslation<BaseDynInst<Impl> >(this, state);
         cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
+        if (!translationCompleted) {
+            // Save memory requests.
+            savedReq = state->mainReq;
+            savedSreqLow = state->sreqLow;
+            savedSreqHigh = state->sreqHigh;
+        }
     } else {
         WholeTranslationState *state =
             new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode);
@@ -973,6 +1032,12 @@ BaseDynInst<Impl>::initiateTranslation(RequestPtr req, RequestPtr sreqLow,
 
         cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode);
         cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode);
+        if (!translationCompleted) {
+            // Save memory requests.
+            savedReq = state->mainReq;
+            savedSreqLow = state->sreqLow;
+            savedSreqHigh = state->sreqHigh;
+        }
     }
 }
 
@@ -998,6 +1063,8 @@ BaseDynInst<Impl>::finishTranslation(WholeTranslationState *state)
         state->deleteReqs();
     }
     delete state;
+
+    translationCompleted = true;
 }
 
 #endif // __CPU_BASE_DYN_INST_HH__
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index 74f199d5f..7e4d25322 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -107,6 +119,9 @@ BaseDynInst<Impl>::initVars()
     effAddrValid = false;
     physEffAddr = 0;
 
+    translationStarted = false;
+    translationCompleted = false;
+
     isUncacheable = false;
     reqMade = false;
     readyRegs = 0;
diff --git a/src/cpu/inorder/SConscript b/src/cpu/inorder/SConscript
index ae5ec0257..b9c526763 100644
--- a/src/cpu/inorder/SConscript
+++ b/src/cpu/inorder/SConscript
@@ -55,7 +55,7 @@ if 'InOrderCPU' in env['CPU_MODELS']:
         TraceFlag('ThreadModel')
         TraceFlag('RefCount')
         TraceFlag('AddrDep')
-
+	TraceFlag('SkedCache')
 
         CompoundFlag('InOrderCPUAll', [ 'InOrderStage', 'InOrderStall', 'InOrderCPU',
                'InOrderMDU', 'InOrderAGEN', 'InOrderFetchSeq', 'InOrderTLB', 'InOrderBPred',
@@ -63,7 +63,6 @@ if 'InOrderCPU' in env['CPU_MODELS']:
                'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource',
                'ThreadModel', 'AddrDep'])
 
-        Source('pipeline_traits.cc')        
         Source('inorder_dyn_inst.cc')
         Source('inorder_cpu_builder.cc')
         Source('inorder_trace.cc')
diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index ffdcae7df..0ec4c9861 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -324,19 +324,19 @@ InOrderCPU::InOrderCPU(Params *params)
                                             tid, 
                                             asid[tid]);
 
-        dummyReq[tid] = new ResourceRequest(resPool->getResource(0), 
-                                            dummyInst[tid], 
-                                            0, 
-                                            0, 
-                                            0, 
-                                            0);        
+        dummyReq[tid] = new ResourceRequest(resPool->getResource(0));
     }
 
     dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0);
     dummyReqInst->setSquashed();
+    dummyReqInst->resetInstCount();
 
     dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0, 0);
     dummyBufferInst->setSquashed();
+    dummyBufferInst->resetInstCount();
+
+    endOfSkedIt = skedCache.end();
+    frontEndSked = createFrontEndSked();
     
     lastRunningCycle = curTick();
 
@@ -348,7 +348,6 @@ InOrderCPU::InOrderCPU(Params *params)
     reset();
 #endif
 
-    dummyBufferInst->resetInstCount();
     
     // Schedule First Tick Event, CPU will reschedule itself from here on out.
     scheduleTickEvent(0);
@@ -357,8 +356,131 @@ InOrderCPU::InOrderCPU(Params *params)
 InOrderCPU::~InOrderCPU()
 {
     delete resPool;
+
+    std::map<SkedID, ThePipeline::RSkedPtr>::iterator sked_it =
+        skedCache.begin();
+    std::map<SkedID, ThePipeline::RSkedPtr>::iterator sked_end =
+        skedCache.end();
+
+    while (sked_it != sked_end) {
+        delete (*sked_it).second;
+        sked_it++;
+    }
+    skedCache.clear();
 }
 
+std::map<InOrderCPU::SkedID, ThePipeline::RSkedPtr> InOrderCPU::skedCache;
+
+RSkedPtr
+InOrderCPU::createFrontEndSked()
+{
+    RSkedPtr res_sked = new ResourceSked();
+    int stage_num = 0;
+    StageScheduler F(res_sked, stage_num++);
+    StageScheduler D(res_sked, stage_num++);
+
+    // FETCH
+    F.needs(FetchSeq, FetchSeqUnit::AssignNextPC);
+    F.needs(ICache, FetchUnit::InitiateFetch);
+
+    // DECODE
+    D.needs(ICache, FetchUnit::CompleteFetch);
+    D.needs(Decode, DecodeUnit::DecodeInst);
+    D.needs(BPred, BranchPredictor::PredictBranch);
+    D.needs(FetchSeq, FetchSeqUnit::UpdateTargetPC);
+
+
+    DPRINTF(SkedCache, "Resource Sked created for instruction \"front_end\"\n");
+
+    return res_sked;
+}
+
+RSkedPtr
+InOrderCPU::createBackEndSked(DynInstPtr inst)
+{
+    RSkedPtr res_sked = lookupSked(inst);
+    if (res_sked != NULL) {
+        DPRINTF(SkedCache, "Found %s in sked cache.\n",
+                inst->instName());
+        return res_sked;
+    } else {
+        res_sked = new ResourceSked();
+    }
+
+    int stage_num = ThePipeline::BackEndStartStage;
+    StageScheduler X(res_sked, stage_num++);
+    StageScheduler M(res_sked, stage_num++);
+    StageScheduler W(res_sked, stage_num++);
+
+    if (!inst->staticInst) {
+        warn_once("Static Instruction Object Not Set. Can't Create"
+                  " Back End Schedule");
+        return NULL;
+    }
+
+    // EXECUTE
+    for (int idx=0; idx < inst->numSrcRegs(); idx++) {
+        if (!idx || !inst->isStore()) {
+            X.needs(RegManager, UseDefUnit::ReadSrcReg, idx);
+        }
+    }
+
+    if ( inst->isNonSpeculative() ) {
+        // skip execution of non speculative insts until later
+    } else if ( inst->isMemRef() ) {
+        if ( inst->isLoad() ) {
+            X.needs(AGEN, AGENUnit::GenerateAddr);
+        }
+    } else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
+        X.needs(MDU, MultDivUnit::StartMultDiv);
+    } else {
+        X.needs(ExecUnit, ExecutionUnit::ExecuteInst);
+    }
+
+    if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
+        X.needs(MDU, MultDivUnit::EndMultDiv);
+    }
+
+    // MEMORY
+    if ( inst->isLoad() ) {
+        M.needs(DCache, CacheUnit::InitiateReadData);
+    } else if ( inst->isStore() ) {
+        if ( inst->numSrcRegs() >= 2 ) {
+            M.needs(RegManager, UseDefUnit::ReadSrcReg, 1);
+        }
+        M.needs(AGEN, AGENUnit::GenerateAddr);
+        M.needs(DCache, CacheUnit::InitiateWriteData);
+    }
+
+
+    // WRITEBACK
+    if ( inst->isLoad() ) {
+        W.needs(DCache, CacheUnit::CompleteReadData);
+    } else if ( inst->isStore() ) {
+        W.needs(DCache, CacheUnit::CompleteWriteData);
+    }
+
+    if ( inst->isNonSpeculative() ) {
+        if ( inst->isMemRef() ) fatal("Non-Speculative Memory Instruction");
+        W.needs(ExecUnit, ExecutionUnit::ExecuteInst);
+    }
+
+    W.needs(Grad, GraduationUnit::GraduateInst);
+
+    for (int idx=0; idx < inst->numDestRegs(); idx++) {
+        W.needs(RegManager, UseDefUnit::WriteDestReg, idx);
+    }
+
+    // Insert Back Schedule into our cache of
+    // resource schedules
+    addToSkedCache(inst, res_sked);
+
+    DPRINTF(SkedCache, "Back End Sked Created for instruction: %s (%08p)\n",
+            inst->instName(), inst->getMachInst());
+    res_sked->print();
+
+    return res_sked;
+}
 
 void
 InOrderCPU::regStats()
@@ -520,8 +642,7 @@ InOrderCPU::tick()
     }
     activityRec.advance();
    
-    // Any squashed requests, events, or insts then remove them now
-    cleanUpRemovedReqs();
+    // Any squashed events, or insts then remove them now
     cleanUpRemovedEvents();
     cleanUpRemovedInsts();
 
@@ -1299,14 +1420,6 @@ InOrderCPU::cleanUpRemovedInsts()
         DynInstPtr inst = *removeList.front();
         ThreadID tid = inst->threadNumber;
 
-        // Make Sure Resource Schedule Is Emptied Out
-        ThePipeline::ResSchedule *inst_sched = &inst->resSched;
-        while (!inst_sched->empty()) {
-            ScheduleEntry* sch_entry = inst_sched->top();
-            inst_sched->pop();
-            delete sch_entry;
-        }
-
         // Remove From Register Dependency Map, If Necessary
         archRegDepMap[(*removeList.front())->threadNumber].
             remove((*removeList.front()));
@@ -1314,8 +1427,8 @@ InOrderCPU::cleanUpRemovedInsts()
 
         // Clear if Non-Speculative
         if (inst->staticInst &&
-              inst->seqNum == nonSpecSeqNum[tid] &&
-                nonSpecInstActive[tid] == true) {
+            inst->seqNum == nonSpecSeqNum[tid] &&
+            nonSpecInstActive[tid] == true) {
             nonSpecInstActive[tid] = false;
         }
 
@@ -1328,28 +1441,6 @@ InOrderCPU::cleanUpRemovedInsts()
 }
 
 void
-InOrderCPU::cleanUpRemovedReqs()
-{
-    while (!reqRemoveList.empty()) {
-        ResourceRequest *res_req = reqRemoveList.front();
-
-        DPRINTF(RefCount, "[tid:%i] [sn:%lli]: Removing Request "
-                "[stage_num:%i] [res:%s] [slot:%i] [completed:%i].\n",
-                res_req->inst->threadNumber,
-                res_req->inst->seqNum,
-                res_req->getStageNum(),
-                res_req->res->name(),
-                (res_req->isCompleted()) ?
-                res_req->getComplSlot() : res_req->getSlot(),
-                res_req->isCompleted());
-
-        reqRemoveList.pop();
-
-        delete res_req;
-    }
-}
-
-void
 InOrderCPU::cleanUpRemovedEvents()
 {
     while (!cpuEventRemoveList.empty()) {
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 9ff0f12ce..2fa6bdc59 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -296,6 +296,92 @@ class InOrderCPU : public BaseCPU
     TheISA::TLB *getITBPtr();
     TheISA::TLB *getDTBPtr();
 
+    /** Accessor Type for the SkedCache */
+    typedef uint32_t SkedID;
+
+    /** Cache of Instruction Schedule using the instruction's name as a key */
+    static std::map<SkedID, ThePipeline::RSkedPtr> skedCache;
+
+    typedef std::map<SkedID, ThePipeline::RSkedPtr>::iterator SkedCacheIt;
+
+    /** Initialized to last iterator in map, signifying a invalid entry
+        on map searches
+    */
+    SkedCacheIt endOfSkedIt;
+
+    ThePipeline::RSkedPtr frontEndSked;
+
+    /** Add a new instruction schedule to the schedule cache */
+    void addToSkedCache(DynInstPtr inst, ThePipeline::RSkedPtr inst_sked)
+    {
+        SkedID sked_id = genSkedID(inst);
+        assert(skedCache.find(sked_id) == skedCache.end());
+        skedCache[sked_id] = inst_sked;
+    }
+
+
+    /** Find a instruction schedule */
+    ThePipeline::RSkedPtr lookupSked(DynInstPtr inst)
+    {
+        SkedID sked_id = genSkedID(inst);
+        SkedCacheIt lookup_it = skedCache.find(sked_id);
+
+        if (lookup_it != endOfSkedIt) {
+            return (*lookup_it).second;
+        } else {
+            return NULL;
+        }
+    }
+
+    static const uint8_t INST_OPCLASS                       = 26;
+    static const uint8_t INST_LOAD                          = 25;
+    static const uint8_t INST_STORE                         = 24;
+    static const uint8_t INST_CONTROL                       = 23;
+    static const uint8_t INST_NONSPEC                       = 22;
+    static const uint8_t INST_DEST_REGS                     = 18;
+    static const uint8_t INST_SRC_REGS                      = 14;
+
+    inline SkedID genSkedID(DynInstPtr inst)
+    {
+        SkedID id = 0;
+        id = (inst->opClass() << INST_OPCLASS) |
+            (inst->isLoad() << INST_LOAD) |
+            (inst->isStore() << INST_STORE) |
+            (inst->isControl() << INST_CONTROL) |
+            (inst->isNonSpeculative() << INST_NONSPEC) |
+            (inst->numDestRegs() << INST_DEST_REGS) |
+            (inst->numSrcRegs() << INST_SRC_REGS);
+        return id;
+    }
+
+    ThePipeline::RSkedPtr createFrontEndSked();
+    ThePipeline::RSkedPtr createBackEndSked(DynInstPtr inst);
+
+    class StageScheduler {
+      private:
+        ThePipeline::RSkedPtr rsked;
+        int stageNum;
+        int nextTaskPriority;
+
+      public:
+        StageScheduler(ThePipeline::RSkedPtr _rsked, int stage_num)
+            : rsked(_rsked), stageNum(stage_num),
+              nextTaskPriority(0)
+        { }
+
+        void needs(int unit, int request) {
+            rsked->push(new ScheduleEntry(
+                            stageNum, nextTaskPriority++, unit, request
+                            ));
+        }
+
+        void needs(int unit, int request, int param) {
+            rsked->push(new ScheduleEntry(
+                            stageNum, nextTaskPriority++, unit, request, param
+                            ));
+        }
+    };
+
   public:
 
     /** Registers statistics. */
@@ -508,10 +594,7 @@ class InOrderCPU : public BaseCPU
     /** Cleans up all instructions on the instruction remove list. */
     void cleanUpRemovedInsts();
 
-    /** Cleans up all instructions on the request remove list. */
-    void cleanUpRemovedReqs();
-
-    /** Cleans up all instructions on the CPU event remove list. */
+    /** Cleans up all events on the CPU event remove list. */
     void cleanUpRemovedEvents();
 
     /** Debug function to print all instructions on the list. */
@@ -541,11 +624,6 @@ class InOrderCPU : public BaseCPU
      */
     std::queue<ListIt> removeList;
 
-    /** List of all the resource requests that will be removed at the end 
-     *  of this cycle.
-     */
-    std::queue<ResourceRequest*> reqRemoveList;
-
     /** List of all the cpu event requests that will be removed at the end of
      *  the current cycle.
      */
diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc
index 71c6ec3e0..b656ca1c7 100644
--- a/src/cpu/inorder/first_stage.cc
+++ b/src/cpu/inorder/first_stage.cc
@@ -181,7 +181,7 @@ FirstStage::processInsts(ThreadID tid)
             inst->setInstListIt(cpu->addInst(inst));
 
             // Create Front-End Resource Schedule For Instruction
-            ThePipeline::createFrontEndSchedule(inst);
+            inst->setFrontSked(cpu->frontEndSked);
         }
 
         int reqs_processed = 0;            
diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc
index 6afe35862..e9deb7625 100644
--- a/src/cpu/inorder/inorder_dyn_inst.cc
+++ b/src/cpu/inorder/inorder_dyn_inst.cc
@@ -51,7 +51,7 @@ InOrderDynInst::InOrderDynInst(TheISA::ExtMachInst machInst,
                                const TheISA::PCState &instPC,
                                const TheISA::PCState &_predPC,
                                InstSeqNum seq_num, InOrderCPU *cpu)
-  : staticInst(machInst, instPC.instAddr()), traceData(NULL), cpu(cpu)
+    : staticInst(machInst, instPC.instAddr()), traceData(NULL), cpu(cpu)
 {
     seqNum = seq_num;
 
@@ -108,6 +108,8 @@ InOrderDynInst::setMachInst(ExtMachInst machInst)
 void
 InOrderDynInst::initVars()
 {
+    inFrontEnd = true;
+
     fetchMemReq = NULL;
     dataMemReq = NULL;
     splitMemData = NULL;
@@ -123,7 +125,6 @@ InOrderDynInst::initVars()
     readyRegs = 0;
 
     nextStage = 0;
-    nextInstStageNum = 0;
 
     for(int i = 0; i < MaxInstDestRegs; i++)
         instResult[i].val.integer = 0;
@@ -206,8 +207,6 @@ InOrderDynInst::~InOrderDynInst()
 
     --instcount;
 
-    deleteStages();
-
     DPRINTF(InOrderDynInst, "DynInst: [tid:%i] [sn:%lli] Instruction destroyed"
             " (active insts: %i)\n", threadNumber, seqNum, instcount);
 }
@@ -282,29 +281,6 @@ InOrderDynInst::completeAcc(Packet *pkt)
     return this->fault;
 }
 
-InstStage *InOrderDynInst::addStage()
-{
-    this->currentInstStage = new InstStage(this, nextInstStageNum++);
-    instStageList.push_back( this->currentInstStage );
-    return this->currentInstStage;
-}
-
-InstStage *InOrderDynInst::addStage(int stage_num)
-{
-    nextInstStageNum = stage_num;
-    return InOrderDynInst::addStage();
-}
-
-void InOrderDynInst::deleteStages() {
-    std::list<InstStage*>::iterator list_it = instStageList.begin();
-    std::list<InstStage*>::iterator list_end = instStageList.end();
-
-    while(list_it != list_end) {
-        delete *list_it;
-        list_it++;
-    }
-}
-
 Fault
 InOrderDynInst::memAccess()
 {
diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh
index 1c0ee4384..0e6be3da2 100644
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@@ -210,9 +210,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     /**  Data used for a store for operation. */
     uint64_t storeData;
 
-    /** The resource schedule for this inst */
-    ThePipeline::ResSchedule resSched;
-
     /** List of active resource requests for this instruction */
     std::list<ResourceRequest*> reqList;
 
@@ -304,11 +301,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted
 
     int nextStage;
 
-    /* vars to keep track of InstStage's - used for resource sched defn */
-    int nextInstStageNum;
-    ThePipeline::InstStage *currentInstStage;
-    std::list<ThePipeline::InstStage*> instStageList;
-
   private:
     /** Function to initialize variables in the constructors. */
     void initVars();
@@ -337,9 +329,10 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     ////////////////////////////////////////////////////////////
     std::string instName() { return staticInst->getName(); }
 
-
     void setMachInst(ExtMachInst inst);
 
+    ExtMachInst getMachInst() { return staticInst->machInst; }
+
     /** Sets the StaticInst. */
     void setStaticInst(StaticInstPtr &static_inst);
 
@@ -411,68 +404,88 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     // RESOURCE SCHEDULING
     //
     /////////////////////////////////////////////
+    typedef ThePipeline::RSkedPtr RSkedPtr;
+    bool inFrontEnd;
 
-    void setNextStage(int stage_num) { nextStage = stage_num; }
-    int getNextStage() { return nextStage; }
+    RSkedPtr frontSked;
+    RSkedIt frontSked_end;
+
+    RSkedPtr backSked;
+    RSkedIt backSked_end;
 
-    ThePipeline::InstStage *addStage();
-    ThePipeline::InstStage *addStage(int stage);
-    ThePipeline::InstStage *currentStage() { return currentInstStage; }
-    void deleteStages();
+    RSkedIt curSkedEntry;
+
+    void setFrontSked(RSkedPtr front_sked)
+    {
+        frontSked = front_sked;
+        frontSked_end.init(frontSked);
+        frontSked_end = frontSked->end();
+        //DPRINTF(InOrderDynInst, "Set FrontSked End to : %x \n" ,
+        //        frontSked_end.getIt()/*, frontSked->end()*/);
+        //assert(frontSked_end == frontSked->end());
+
+        // This initializes instruction to be able
+        // to walk the resource schedule
+        curSkedEntry.init(frontSked);
+        curSkedEntry = frontSked->begin();
+    }
 
-    /** Add A Entry To Reource Schedule */
-    void addToSched(ScheduleEntry* sched_entry)
-    { resSched.push(sched_entry); }
+    void setBackSked(RSkedPtr back_sked)
+    {
+        backSked = back_sked;
+        backSked_end.init(backSked);
+        backSked_end = backSked->end();
+    }
 
+    void setNextStage(int stage_num) { nextStage = stage_num; }
+    int getNextStage() { return nextStage; }
 
     /** Print Resource Schedule */
-    /** @NOTE: DEBUG ONLY */
-    void printSched()
+    void printSked()
     {
-        ThePipeline::ResSchedule tempSched;
-        std::cerr << "\tInst. Res. Schedule: ";
-        while (!resSched.empty()) {
-            std::cerr << '\t' << resSched.top()->stageNum << "-"
-                 << resSched.top()->resNum << ", ";
-
-            tempSched.push(resSched.top());
-            resSched.pop();
+        if (frontSked != NULL) {
+            frontSked->print();
         }
 
-        std::cerr << std::endl;
-        resSched = tempSched;
+        if (backSked != NULL) {
+            backSked->print();
+        }
     }
 
     /** Return Next Resource Stage To Be Used */
     int nextResStage()
     {
-        if (resSched.empty())
-            return -1;
-        else
-            return resSched.top()->stageNum;
+        assert((inFrontEnd && curSkedEntry != frontSked_end) ||
+               (!inFrontEnd && curSkedEntry != backSked_end));
+
+        return curSkedEntry->stageNum;
     }
 
 
     /** Return Next Resource To Be Used */
     int nextResource()
     {
-        if (resSched.empty())
-            return -1;
-        else
-            return resSched.top()->resNum;
+        assert((inFrontEnd && curSkedEntry != frontSked_end) ||
+               (!inFrontEnd && curSkedEntry != backSked_end));
+
+        return curSkedEntry->resNum;
     }
 
-    /** Remove & Deallocate a schedule entry */
-    void popSchedEntry()
+    /** Finish using a schedule entry, increment to next entry */
+    bool finishSkedEntry()
     {
-        if (!resSched.empty()) {
-            ScheduleEntry* sked = resSched.top();
-            resSched.pop();
-            if (sked != 0) {
-                delete sked;
-                
-            }            
+        curSkedEntry++;
+
+        if (inFrontEnd && curSkedEntry == frontSked_end) {
+            assert(backSked != NULL);
+            curSkedEntry.init(backSked);
+            curSkedEntry = backSked->begin();
+            inFrontEnd = false;
+        } else if (!inFrontEnd && curSkedEntry == backSked_end) {
+            return true;
         }
+
+        return false;
     }
 
     /** Release a Resource Request (Currently Unused) */
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index 744ffd4d2..b267ac00e 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -44,12 +44,17 @@ PipelineStage::PipelineStage(Params *params, unsigned stage_num)
       stageBufferMax(params->stageWidth),
       prevStageValid(false), nextStageValid(false), idle(false)
 {
-    switchedOutBuffer.resize(ThePipeline::MaxThreads);
-    switchedOutValid.resize(ThePipeline::MaxThreads);
-    
     init(params);
 }
 
+PipelineStage::~PipelineStage()
+{
+   for(ThreadID tid = 0; tid < numThreads; tid++) {
+       skidBuffer[tid].clear();
+       stalls[tid].resources.clear();
+   }
+}
+
 void
 PipelineStage::init(Params *params)
 {
@@ -66,6 +71,12 @@ PipelineStage::init(Params *params)
         else
             lastStallingStage[tid] = NumStages - 1;
     }
+
+    if ((InOrderCPU::ThreadModel) params->threadModel ==
+        InOrderCPU::SwitchOnCacheMiss) {
+        switchedOutBuffer.resize(ThePipeline::MaxThreads);
+        switchedOutValid.resize(ThePipeline::MaxThreads);
+    }
 }
 
 
@@ -190,9 +201,6 @@ PipelineStage::takeOverFrom()
 
         stalls[tid].resources.clear();
 
-        while (!insts[tid].empty())
-            insts[tid].pop();
-
         skidBuffer[tid].clear();
     }
     wroteToTimeBuffer = false;
@@ -938,17 +946,24 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed)
                     "\n", tid, inst->seqNum, cpu->resPool->name(res_num));
 
             ResReqPtr req = cpu->resPool->request(res_num, inst);
+            assert(req->valid);
 
-            if (req->isCompleted()) {
+            bool req_completed = req->isCompleted();
+            bool done_in_pipeline = false;
+            if (req_completed) {
                 DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s "
                         "completed.\n", tid, inst->seqNum, 
                         cpu->resPool->name(res_num));
 
-                inst->popSchedEntry();
-
                 reqs_processed++;                
 
                 req->stagePasses++;                
+
+                done_in_pipeline = inst->finishSkedEntry();
+                if (done_in_pipeline) {
+                    DPRINTF(InOrderDynInst, "[tid:%i]: [sn:%i] finished "
+                            "in pipeline.\n", tid, inst->seqNum);
+                }
             } else {
                 DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed."
                         "\n", tid, inst->seqNum, cpu->resPool->name(res_num));
@@ -982,23 +997,20 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed)
                     // Activate Next Ready Thread at end of cycle
                     DPRINTF(ThreadModel, "Attempting to activate next ready "
                             "thread due to cache miss.\n");
-                    cpu->activateNextReadyContext();                                                                                               
-                }
-                
-                // Mark request for deletion
-                // if it isnt currently being used by a resource
-                if (!req->hasSlot()) {                   
-                    DPRINTF(InOrderStage, "[sn:%i] Deleting Request, has no "
-                            "slot in resource.\n", inst->seqNum);
-                    
-                    cpu->reqRemoveList.push(req);
-                } else {
-                    DPRINTF(InOrderStage, "[sn:%i] Ignoring Request Deletion, "
-                            "in resource [slot:%i].\n", inst->seqNum,
-                            req->getSlot());
+                    cpu->activateNextReadyContext();
                 }
-                
-                
+            }
+
+            // If this request is no longer needs to take up bandwidth in the
+            // resource, go ahead and free that bandwidth up
+            if (req->doneInResource) {
+                req->freeSlot();
+            }
+
+            // No longer need to process this instruction if the last
+            // request it had wasn't completed or if there is nothing
+            // else for it to do in the pipeline
+            if (done_in_pipeline || !req_completed) {
                 break;
             }
 
diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh
index dfa88de87..ec70fefc5 100644
--- a/src/cpu/inorder/pipeline_stage.hh
+++ b/src/cpu/inorder/pipeline_stage.hh
@@ -91,10 +91,7 @@ class PipelineStage
   public:
     PipelineStage(Params *params, unsigned stage_num);
 
-    /** MUST use init() function if this constructor is used. */
-    PipelineStage() { }
-
-    virtual ~PipelineStage() { }
+    virtual ~PipelineStage();
 
     /** PipelineStage initialization. */
     void init(Params *params);
@@ -268,16 +265,6 @@ class PipelineStage
      */
     unsigned instsProcessed;    
 
-    /** Queue of all instructions coming from previous stage on this cycle. */
-    std::queue<DynInstPtr> insts[ThePipeline::MaxThreads];
-
-    /** Queue of instructions that are finished processing and ready to go 
-     *  next stage. This is used to prevent from processing an instrution more 
-     *  than once on any stage. NOTE: It is up to the PROGRAMMER must manage 
-     *  this as a queue
-     */
-    std::list<DynInstPtr> instsToNextStage;
-
     /** Skid buffer between previous stage and this one. */
     std::list<DynInstPtr> skidBuffer[ThePipeline::MaxThreads];
 
diff --git a/src/cpu/inorder/pipeline_traits.cc b/src/cpu/inorder/pipeline_traits.cc
deleted file mode 100644
index a6fad68f7..000000000
--- a/src/cpu/inorder/pipeline_traits.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2007 MIPS Technologies, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Korey Sewell
- *
- */
-
-#include "cpu/inorder/pipeline_traits.hh"
-#include "cpu/inorder/inorder_dyn_inst.hh"
-#include "cpu/inorder/resources/resource_list.hh"
-
-using namespace std;
-
-namespace ThePipeline {
-
-//@TODO: create my own Instruction Schedule Class
-//that operates as a Priority QUEUE
-int getNextPriority(DynInstPtr &inst, int stage_num)
-{
-    int cur_pri = 20;
-
-    /*
-    std::priority_queue<ScheduleEntry*, std::vector<ScheduleEntry*>,
-        entryCompare>::iterator sked_it = inst->resSched.begin();
-
-    std::priority_queue<ScheduleEntry*, std::vector<ScheduleEntry*>,
-        entryCompare>::iterator sked_end = inst->resSched.end();
-
-    while (sked_it != sked_end) {
-
-        if (sked_it.top()->stageNum == stage_num) {
-            cur_pri = sked_it.top()->priority;
-        }
-
-        sked_it++;
-    }
-    */
-
-    return cur_pri;
-}
-
-void createFrontEndSchedule(DynInstPtr &inst)
-{
-    InstStage *F = inst->addStage();
-    InstStage *D = inst->addStage();
-
-    // FETCH
-    F->needs(FetchSeq, FetchSeqUnit::AssignNextPC);
-    F->needs(ICache, FetchUnit::InitiateFetch);
-
-    // DECODE
-    D->needs(ICache, FetchUnit::CompleteFetch);
-    D->needs(Decode, DecodeUnit::DecodeInst);
-    D->needs(BPred, BranchPredictor::PredictBranch);
-    D->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC);
-
-    inst->resSched.init();
-}
-
-bool createBackEndSchedule(DynInstPtr &inst)
-{
-    if (!inst->staticInst) {
-        return false;
-    }
-
-    InstStage *X = inst->addStage();
-    InstStage *M = inst->addStage();
-    InstStage *W = inst->addStage();
-
-    // EXECUTE
-    for (int idx=0; idx < inst->numSrcRegs(); idx++) {
-        if (!idx || !inst->isStore()) {
-            X->needs(RegManager, UseDefUnit::ReadSrcReg, idx);
-        }
-    }
-
-    if ( inst->isNonSpeculative() ) {
-        // skip execution of non speculative insts until later
-    } else if ( inst->isMemRef() ) {
-        if ( inst->isLoad() ) {
-            X->needs(AGEN, AGENUnit::GenerateAddr);
-        }
-    } else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
-        X->needs(MDU, MultDivUnit::StartMultDiv);
-    } else {
-        X->needs(ExecUnit, ExecutionUnit::ExecuteInst);
-    }
-
-    if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
-        X->needs(MDU, MultDivUnit::EndMultDiv);
-    }
-
-    // MEMORY
-    if ( inst->isLoad() ) {
-        M->needs(DCache, CacheUnit::InitiateReadData);
-    } else if ( inst->isStore() ) {
-        if ( inst->numSrcRegs() >= 2 ) {            
-            M->needs(RegManager, UseDefUnit::ReadSrcReg, 1);
-        }        
-        M->needs(AGEN, AGENUnit::GenerateAddr);
-        M->needs(DCache, CacheUnit::InitiateWriteData);
-    }
-
-
-    // WRITEBACK
-    if ( inst->isLoad() ) {
-        W->needs(DCache, CacheUnit::CompleteReadData);
-    } else if ( inst->isStore() ) {
-        W->needs(DCache, CacheUnit::CompleteWriteData);
-    }
-
-    if ( inst->isNonSpeculative() ) {
-        if ( inst->isMemRef() ) fatal("Non-Speculative Memory Instruction");
-        W->needs(ExecUnit, ExecutionUnit::ExecuteInst);
-    }
-
-    for (int idx=0; idx < inst->numDestRegs(); idx++) {
-        W->needs(RegManager, UseDefUnit::WriteDestReg, idx);
-    }
-
-    W->needs(Grad, GraduationUnit::GraduateInst);
-
-    return true;
-}
-
-InstStage::InstStage(DynInstPtr inst, int stage_num)
-{
-    stageNum = stage_num;
-    nextTaskPriority = 0;
-    instSched = &inst->resSched;
-}
-
-void
-InstStage::needs(int unit, int request) {
-    instSched->push( new ScheduleEntry(
-                         stageNum, nextTaskPriority++, unit, request
-                         ));
-}
-
-void
-InstStage::needs(int unit, int request, int param) {
-    instSched->push( new ScheduleEntry(
-                         stageNum, nextTaskPriority++, unit, request, param
-                         ));
-}
-
-};
diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh
index df964e254..573c0200a 100644
--- a/src/cpu/inorder/pipeline_traits.hh
+++ b/src/cpu/inorder/pipeline_traits.hh
@@ -51,7 +51,7 @@ class ResourceSked;
 namespace ThePipeline {
     // Pipeline Constants
     const unsigned NumStages = 5;
-    const ThreadID MaxThreads = 8;
+    const ThreadID MaxThreads = 1;
     const unsigned BackEndStartStage = 2;
 
     // List of Resources The Pipeline Uses
@@ -77,23 +77,7 @@ namespace ThePipeline {
     // RESOURCE SCHEDULING
     //////////////////////////
     typedef ResourceSked ResSchedule;
-
-    void createFrontEndSchedule(DynInstPtr &inst);
-    bool createBackEndSchedule(DynInstPtr &inst);
-    int getNextPriority(DynInstPtr &inst, int stage_num);
-
-    class InstStage {
-      private:
-        int nextTaskPriority;
-        int stageNum;
-        ResSchedule *instSched;
-
-      public:
-        InstStage(DynInstPtr inst, int stage_num);
-
-        void needs(int unit, int request);
-        void needs(int unit, int request, int param);
-    };
+    typedef ResourceSked* RSkedPtr;
 };
 
 
diff --git a/src/cpu/inorder/reg_dep_map.cc b/src/cpu/inorder/reg_dep_map.cc
index 98a0727a9..48820b50e 100644
--- a/src/cpu/inorder/reg_dep_map.cc
+++ b/src/cpu/inorder/reg_dep_map.cc
@@ -45,6 +45,14 @@ RegDepMap::RegDepMap(int size)
     regMap.resize(size);
 }
 
+RegDepMap::~RegDepMap()
+{
+    for (int i = 0; i < regMap.size(); i++) {
+        regMap[i].clear();
+    }
+    regMap.clear();
+}
+
 string
 RegDepMap::name()
 {
diff --git a/src/cpu/inorder/reg_dep_map.hh b/src/cpu/inorder/reg_dep_map.hh
index fa4fe45f3..047e4d129 100644
--- a/src/cpu/inorder/reg_dep_map.hh
+++ b/src/cpu/inorder/reg_dep_map.hh
@@ -48,7 +48,7 @@ class RegDepMap
   public:
     RegDepMap(int size = TheISA::TotalNumRegs);
 
-    ~RegDepMap() { }
+    ~RegDepMap();
 
     std::string name();
 
diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc
index 51beb5aa0..24211532e 100644
--- a/src/cpu/inorder/resource.cc
+++ b/src/cpu/inorder/resource.cc
@@ -31,6 +31,8 @@
 
 #include <vector>
 #include <list>
+
+#include "base/str.hh"
 #include "cpu/inorder/resource.hh"
 #include "cpu/inorder/cpu.hh"
 using namespace std;
@@ -40,22 +42,42 @@ Resource::Resource(string res_name, int res_id, int res_width,
     : resName(res_name), id(res_id),
       width(res_width), latency(res_latency), cpu(_cpu)
 {
+    reqs.resize(width);
+
     // Use to deny a instruction a resource.
-    deniedReq = new ResourceRequest(this, NULL, 0, 0, 0, 0);
+    deniedReq = new ResourceRequest(this);
+    deniedReq->valid = true;
 }
 
 Resource::~Resource()
 {
-    delete [] resourceEvent;
-    delete deniedReq;    
+    if (resourceEvent) {
+        delete [] resourceEvent;
+    }
+
+    delete deniedReq;
+
+    for (int i = 0; i < width; i++) {
+        delete reqs[i];
+    }
 }
 
 
 void
 Resource::init()
 {
-    // Set Up Resource Events to Appropriate Resource BandWidth
-    resourceEvent = new ResourceEvent[width];
+    // If the resource has a zero-cycle (no latency)
+    // function, then no reason to have events
+    // that will process them for the right tick
+    if (latency > 0) {
+        resourceEvent = new ResourceEvent[width];
+    } else {
+        resourceEvent = NULL;
+    }
+
+    for (int i = 0; i < width; i++) {
+        reqs[i] = new ResourceRequest(this);
+    }
 
     initSlots();
 }
@@ -66,7 +88,10 @@ Resource::initSlots()
     // Add available slot numbers for resource
     for (int slot_idx = 0; slot_idx < width; slot_idx++) {
         availSlots.push_back(slot_idx);
-        resourceEvent[slot_idx].init(this, slot_idx);
+
+        if (resourceEvent) {
+            resourceEvent[slot_idx].init(this, slot_idx);
+        }
     }
 }
 
@@ -91,42 +116,34 @@ Resource::slotsInUse()
 void
 Resource::freeSlot(int slot_idx)
 {
+    DPRINTF(Resource, "Deallocating [slot:%i].\n",
+            slot_idx);
+
     // Put slot number on this resource's free list
     availSlots.push_back(slot_idx);
 
-    // Erase Request Pointer From Request Map
-    std::map<int, ResReqPtr>::iterator req_it = reqMap.find(slot_idx);
-
-    assert(req_it != reqMap.end());
-    reqMap.erase(req_it);
-
+    // Invalidate Request & Reset it's flags
+    reqs[slot_idx]->clearRequest();
 }
 
-// TODO: More efficiently search for instruction's slot within
-// resource.
 int
 Resource::findSlot(DynInstPtr inst)
 {
-    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-    map<int, ResReqPtr>::iterator map_end = reqMap.end();
-
     int slot_num = -1;
 
-    while (map_it != map_end) {
-        if ((*map_it).second->getInst()->seqNum ==
-            inst->seqNum) {
-            slot_num = (*map_it).second->getSlot();
+    for (int i = 0; i < width; i++) {
+        if (reqs[i]->valid &&
+            reqs[i]->getInst()->seqNum == inst->seqNum) {
+            slot_num = reqs[i]->getSlot();
         }
-        map_it++;
     }
-
     return slot_num;
 }
 
 int
 Resource::getSlot(DynInstPtr inst)
 {
-    int slot_num;
+    int slot_num = -1;
 
     if (slotsAvail() != 0) {
         slot_num = availSlots[0];
@@ -136,24 +153,6 @@ Resource::getSlot(DynInstPtr inst)
         assert(slot_num == *vect_it);
 
         availSlots.erase(vect_it);
-    } else {
-        DPRINTF(Resource, "[tid:%i]: No slots in resource "
-                "available to service [sn:%i].\n", inst->readTid(),
-                inst->seqNum);
-        slot_num = -1;
-
-        map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-        map<int, ResReqPtr>::iterator map_end = reqMap.end();
-
-        while (map_it != map_end) {
-            if ((*map_it).second) {
-                DPRINTF(Resource, "Currently Serving request from: "
-                        "[tid:%i] [sn:%i].\n",
-                        (*map_it).second->getInst()->readTid(),
-                        (*map_it).second->getInst()->seqNum);
-            }
-            map_it++;
-        }
     }
 
     return slot_num;
@@ -183,9 +182,12 @@ Resource::request(DynInstPtr inst)
         slot_num = getSlot(inst);
 
         if (slot_num != -1) {
+            DPRINTF(Resource, "Allocating [slot:%i] for [tid:%i]: [sn:%i]\n",
+                    slot_num, inst->readTid(), inst->seqNum);
+
             // Get Stage # from Schedule Entry
-            stage_num = inst->resSched.top()->stageNum;
-            unsigned cmd = inst->resSched.top()->cmd;
+            stage_num = inst->curSkedEntry->stageNum;
+            unsigned cmd = inst->curSkedEntry->cmd;
 
             // Generate Resource Request
             inst_req = getRequest(inst, stage_num, id, slot_num, cmd);
@@ -200,10 +202,12 @@ Resource::request(DynInstPtr inst)
                         inst->readTid());
             }
 
-            reqMap[slot_num] = inst_req;
-
             try_request = true;
+        } else {
+            DPRINTF(Resource, "No slot available for [tid:%i]: [sn:%i]\n",
+                    inst->readTid(), inst->seqNum);
         }
+
     }
 
     if (try_request) {
@@ -236,32 +240,21 @@ ResReqPtr
 Resource::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                      int slot_num, unsigned cmd)
 {
-    return new ResourceRequest(this, inst, stage_num, id, slot_num,
-                               cmd);
+    reqs[slot_num]->setRequest(inst, stage_num, id, slot_num, cmd);
+    return reqs[slot_num];
 }
 
 ResReqPtr
 Resource::findRequest(DynInstPtr inst)
 {
-    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-    map<int, ResReqPtr>::iterator map_end = reqMap.end();
-
-    bool found = false;
-    ResReqPtr req = NULL;
-    
-    while (map_it != map_end) {
-        if ((*map_it).second &&
-            (*map_it).second->getInst() == inst) {            
-            req = (*map_it).second;
-            //return (*map_it).second;
-            assert(found == false);
-            found = true;            
+    for (int i = 0; i < width; i++) {
+        if (reqs[i]->valid &&
+            reqs[i]->getInst() == inst) {
+            return reqs[i];
         }
-        map_it++;
     }
 
-    return req;    
-    //return NULL;
+    return NULL;
 }
 
 void
@@ -275,9 +268,9 @@ void
 Resource::execute(int slot_idx)
 {
     DPRINTF(Resource, "[tid:%i]: Executing %s resource.\n",
-            reqMap[slot_idx]->getTid(), name());
-    reqMap[slot_idx]->setCompleted(true);
-    reqMap[slot_idx]->done();
+            reqs[slot_idx]->getTid(), name());
+    reqs[slot_idx]->setCompleted(true);
+    reqs[slot_idx]->done();
 }
 
 void
@@ -293,15 +286,10 @@ void
 Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
                  ThreadID tid)
 {
-    std::vector<int> slot_remove_list;
+    for (int i = 0; i < width; i++) {
+        ResReqPtr req_ptr = reqs[i];
 
-    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-    map<int, ResReqPtr>::iterator map_end = reqMap.end();
-
-    while (map_it != map_end) {
-        ResReqPtr req_ptr = (*map_it).second;
-
-        if (req_ptr &&
+        if (req_ptr->valid &&
             req_ptr->getInst()->readTid() == tid &&
             req_ptr->getInst()->seqNum > squash_seq_num) {
 
@@ -316,19 +304,8 @@ Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
             if (resourceEvent[req_slot_num].scheduled())
                 unscheduleEvent(req_slot_num);
 
-            // Mark request for later removal
-            cpu->reqRemoveList.push(req_ptr);
-
-            // Mark slot for removal from resource
-            slot_remove_list.push_back(req_ptr->getSlot());
+            freeSlot(req_slot_num);
         }
-
-        map_it++;
-    }
-
-    // Now Delete Slot Entry from Req. Map
-    for (int i = 0; i < slot_remove_list.size(); i++) {
-        freeSlot(slot_remove_list[i]);
     }
 }
 
@@ -350,10 +327,8 @@ Resource::ticks(int num_cycles)
 void
 Resource::scheduleExecution(int slot_num)
 {
-    int res_latency = getLatency(slot_num);
-
-    if (res_latency >= 1) {
-        scheduleEvent(slot_num, res_latency);
+    if (latency >= 1) {
+        scheduleEvent(slot_num, latency);
     } else {
         execute(slot_num);
     }
@@ -363,8 +338,8 @@ void
 Resource::scheduleEvent(int slot_idx, int delay)
 {
     DPRINTF(Resource, "[tid:%i]: Scheduling event for [sn:%i] on tick %i.\n",
-            reqMap[slot_idx]->inst->readTid(),
-            reqMap[slot_idx]->inst->seqNum,
+            reqs[slot_idx]->inst->readTid(),
+            reqs[slot_idx]->inst->seqNum,
             cpu->ticks(delay) + curTick());
     resourceEvent[slot_idx].scheduleEvent(delay);
 }
@@ -401,32 +376,11 @@ int ResourceRequest::resReqID = 0;
 
 int ResourceRequest::maxReqCount = 0;
 
-ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst, 
-                                 int stage_num, int res_idx, int slot_num, 
-                                 unsigned _cmd)
-    : res(_res), inst(_inst), cmd(_cmd),  stageNum(stage_num),
-      resIdx(res_idx), slotNum(slot_num), completed(false),
-      squashed(false), processing(false), memStall(false)
+ResourceRequest::ResourceRequest(Resource *_res)
+    : res(_res), inst(NULL), stagePasses(0), valid(false), doneInResource(false),
+      completed(false), squashed(false), processing(false),
+      memStall(false)
 {
-#ifdef DEBUG
-        reqID = resReqID++;
-        res->cpu->resReqCount++;
-        DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, 
-                res->cpu->resReqCount);
-
-        if (res->cpu->resReqCount > 100) {
-            fatal("Too many undeleted resource requests. Memory leak?\n");
-        }
-
-        if (res->cpu->resReqCount > maxReqCount) {            
-            maxReqCount = res->cpu->resReqCount;
-        }
-        
-#endif
-
-        stagePasses = 0;
-        complSlotNum = -1;
-        
 }
 
 ResourceRequest::~ResourceRequest()
@@ -436,6 +390,46 @@ ResourceRequest::~ResourceRequest()
         DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, 
                 res->cpu->resReqCount);
 #endif
+        inst = NULL;
+}
+
+std::string
+ResourceRequest::name()
+{
+    return res->name() + "."  + to_string(slotNum);
+}
+
+void
+ResourceRequest::setRequest(DynInstPtr _inst, int stage_num,
+                            int res_idx, int slot_num, unsigned _cmd)
+{
+    valid = true;
+    inst = _inst;
+    stageNum = stage_num;
+    resIdx = res_idx;
+    slotNum = slot_num;
+    cmd = _cmd;
+}
+
+void
+ResourceRequest::clearRequest()
+{
+    valid = false;
+    inst = NULL;
+    stagePasses = 0;
+    completed = false;
+    doneInResource = false;
+    squashed = false;
+    memStall = false;
+}
+
+void
+ResourceRequest::freeSlot()
+{
+    assert(res);
+
+    // Free Slot So Another Instruction Can Use This Resource
+    res->freeSlot(slotNum);
 }
 
 void
@@ -447,25 +441,7 @@ ResourceRequest::done(bool completed)
 
     setCompleted(completed);
 
-    // Used for debugging purposes
-    if (completed) {
-        complSlotNum = slotNum;
-    
-        // Would like to start a convention such as all requests deleted in
-        // resources/pipeline
-        // but a little more complex then it seems...
-        // For now, all COMPLETED requests deleted in resource..
-        //          all FAILED requests deleted in pipeline stage
-        //          *all SQUASHED requests deleted in resource
-        res->cpu->reqRemoveList.push(res->reqMap[slotNum]);
-    }
-    
-    // Free Slot So Another Instruction Can Use This Resource
-    res->freeSlot(slotNum);
-
-    // change slot # to -1, since we check slotNum to see if request is
-    // still valid
-    slotNum = -1;
+    doneInResource = true;
 }
 
 ResourceEvent::ResourceEvent()
@@ -493,7 +469,8 @@ ResourceEvent::process()
 const char *
 ResourceEvent::description()
 {
-    string desc = resource->name() + " event";
+    string desc = resource->name() + "-event:slot[" + to_string(slotIdx)
+        + "]";
 
     return desc.c_str();
 }
diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh
index bd9ec48ca..7899a215f 100644
--- a/src/cpu/inorder/resource.hh
+++ b/src/cpu/inorder/resource.hh
@@ -221,8 +221,10 @@ class Resource {
     const int latency;
 
   public:
-    /** Mapping of slot-numbers to the resource-request pointers */
-    std::map<int, ResReqPtr> reqMap;
+    /** List of all Requests the Resource is Servicing. Each request
+        represents part of the resource's bandwidth
+    */
+    std::vector<ResReqPtr> reqs;
 
     /** A list of all the available execution slots for this resource.
      *  This correlates with the actual resource event idx.
@@ -245,7 +247,7 @@ class Resource {
 class ResourceEvent : public Event
 {
   public:
-    /** Pointer to the CPU. */
+    /** Pointer to the Resource this is an event for */
     Resource *resource;
 
 
@@ -297,21 +299,29 @@ class ResourceRequest
 
     static int maxReqCount;
     
+    friend class Resource;
+
   public:
-    ResourceRequest(Resource *_res, DynInstPtr _inst, int stage_num,
-                    int res_idx, int slot_num, unsigned _cmd);
+    ResourceRequest(Resource *_res);
     
     virtual ~ResourceRequest();
+
+    std::string name();
     
     int reqID;
 
+    virtual void setRequest(DynInstPtr _inst, int stage_num,
+                    int res_idx, int slot_num, unsigned _cmd);
+
+    virtual void clearRequest();
+
     /** Acknowledge that this is a request is done and remove
      *  from resource.
      */
     void done(bool completed = true);
-
-    short stagePasses;
     
+    void freeSlot();
+
     /////////////////////////////////////////////
     //
     // GET RESOURCE REQUEST IDENTIFICATION / INFO
@@ -319,11 +329,9 @@ class ResourceRequest
     /////////////////////////////////////////////
     /** Get Resource Index */
     int getResIdx() { return resIdx; }
-
        
     /** Get Slot Number */
     int getSlot() { return slotNum; }
-    int getComplSlot() { return complSlotNum; }
     bool hasSlot()  { return slotNum >= 0; }     
 
     /** Get Stage Number */
@@ -353,6 +361,12 @@ class ResourceRequest
     /** Command For This Resource */
     unsigned cmd;
 
+    short stagePasses;
+
+    bool valid;
+
+    bool doneInResource;
+
     ////////////////////////////////////////
     //
     // GET RESOURCE REQUEST STATUS FROM VARIABLES
@@ -380,7 +394,6 @@ class ResourceRequest
     int stageNum;
     int resIdx;
     int slotNum;
-    int complSlotNum;
     
     /** Resource Request Status */
     bool completed;
diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc
index a037cbe9e..4e2f930ab 100644
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@@ -55,7 +55,7 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params)
 
     memObjects.push_back(ICache);
     resources.push_back(new FetchUnit("icache_port", ICache,
-                                      stage_width * MaxThreads, 0, _cpu,
+                                      stage_width * 2 + MaxThreads, 0, _cpu,
                                       params));
 
     resources.push_back(new DecodeUnit("Decode-Unit", Decode, 
@@ -68,7 +68,7 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params)
                                        0, _cpu, params));
 
     resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, 
-                                       stage_width * MaxThreads, 0, _cpu,
+                                       stage_width * 3, 0, _cpu,
                                        params));
 
     resources.push_back(new AGENUnit("AGEN-Unit", AGEN, 
@@ -77,20 +77,21 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params)
     resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, 
                                           stage_width, 0, _cpu, params));
 
-    resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, 
-                                        params));
+    resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU,
+                                        stage_width * 2, 0, _cpu, params));
 
     memObjects.push_back(DCache);
     resources.push_back(new CacheUnit("dcache_port", DCache, 
-                                      stage_width * MaxThreads, 0, _cpu,
+                                      stage_width * 2 + MaxThreads, 0, _cpu,
                                       params));
 
     resources.push_back(new GraduationUnit("Graduation-Unit", Grad, 
-                                           stage_width * MaxThreads, 0, _cpu,
+                                           stage_width, 0, _cpu,
                                            params));
 
     resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, 
                                        0, _cpu, params));
+
 }
 
 ResourcePool::~ResourcePool()
@@ -122,6 +123,16 @@ ResourcePool::name()
     return cpu->name() + ".ResourcePool";
 }
 
+void
+ResourcePool::print()
+{
+    for (int i=0; i < resources.size(); i++) {
+        DPRINTF(InOrderDynInst, "Res:%i %s\n",
+                i, resources[i]->name());
+    }
+
+}
+
 
 void
 ResourcePool::regStats()
diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh
index e8061d3ff..fde38b4e9 100644
--- a/src/cpu/inorder/resource_pool.hh
+++ b/src/cpu/inorder/resource_pool.hh
@@ -130,6 +130,8 @@ class ResourcePool {
 
     void init();
 
+    void print();
+
     /** Register Statistics in All Resources */
     void regStats();
 
diff --git a/src/cpu/inorder/resource_sked.cc b/src/cpu/inorder/resource_sked.cc
index 4104e6989..4cf791228 100644
--- a/src/cpu/inorder/resource_sked.cc
+++ b/src/cpu/inorder/resource_sked.cc
@@ -34,30 +34,30 @@
 
 #include <vector>
 #include <list>
-#include <stdio.h>
+#include <cstdio>
 
 using namespace std;
 using namespace ThePipeline;
 
 ResourceSked::ResourceSked()
 {
-    sked.resize(NumStages);
+    stages.resize(NumStages);
 }
 
 void
 ResourceSked::init()
 {
-    assert(!sked[0].empty());
+    assert(!stages[0].empty());
 
-    curSkedEntry = sked[0].begin();
+    curSkedEntry = stages[0].begin();
 }
 
 int
 ResourceSked::size()
 {
     int total = 0;
-    for (int i = 0; i < sked.size(); i++) {
-        total += sked[i].size();
+    for (int i = 0; i < stages.size(); i++) {
+        total += stages[i].size();
     }
 
     return total;
@@ -69,6 +69,26 @@ ResourceSked::empty()
     return size() == 0;
 }
 
+
+ResourceSked::SkedIt
+ResourceSked::begin()
+{
+    int num_stages = stages.size();
+    for (int i = 0; i < num_stages; i++) {
+        if (stages[i].size() > 0)
+            return stages[i].begin();
+    }
+
+    return stages[num_stages - 1].end();
+}
+
+ResourceSked::SkedIt
+ResourceSked::end()
+{
+    int num_stages = stages.size();
+    return stages[num_stages - 1].end();
+}
+
 ScheduleEntry*
 ResourceSked::top()
 {
@@ -82,18 +102,18 @@ ResourceSked::pop()
 {
     int stage_num = (*curSkedEntry)->stageNum;
 
-    sked[stage_num].erase(curSkedEntry);
+    stages[stage_num].erase(curSkedEntry);
 
-    if (!sked[stage_num].empty()) {
-        curSkedEntry = sked[stage_num].begin();
+    if (!stages[stage_num].empty()) {
+        curSkedEntry = stages[stage_num].begin();
     } else {
         int next_stage = stage_num + 1;
 
         while (next_stage < NumStages) {
-            if (sked[next_stage].empty()) {
+            if (stages[next_stage].empty()) {
                 next_stage++;
             } else {
-                curSkedEntry = sked[next_stage].begin();
+                curSkedEntry = stages[next_stage].begin();
                 break;
             }
         }
@@ -108,7 +128,7 @@ ResourceSked::push(ScheduleEntry* sked_entry)
 
     SkedIt pri_iter = findIterByPriority(sked_entry, stage_num);
 
-    sked[stage_num].insert(pri_iter, sked_entry);
+    stages[stage_num].insert(pri_iter, sked_entry);
 }
 
 void
@@ -122,23 +142,23 @@ ResourceSked::pushBefore(ScheduleEntry* sked_entry, int sked_cmd,
     SkedIt pri_iter = findIterByCommand(sked_entry, stage_num,
                                         sked_cmd, sked_cmd_idx);
 
-    assert(pri_iter != sked[stage_num].end() &&
+    assert(pri_iter != stages[stage_num].end() &&
            "Could not find command to insert in front of.");
 
-    sked[stage_num].insert(pri_iter, sked_entry);
+    stages[stage_num].insert(pri_iter, sked_entry);
 }
 
 ResourceSked::SkedIt
 ResourceSked::findIterByPriority(ScheduleEntry* sked_entry, int stage_num)
 {
-    if (sked[stage_num].empty()) {
-        return sked[stage_num].end();
+    if (stages[stage_num].empty()) {
+        return stages[stage_num].end();
     }
 
     int priority = sked_entry->priority;
 
-    SkedIt sked_it = sked[stage_num].begin();
-    SkedIt sked_end = sked[stage_num].end();
+    SkedIt sked_it = stages[stage_num].begin();
+    SkedIt sked_end = stages[stage_num].end();
 
     while (sked_it != sked_end) {
         if ((*sked_it)->priority > priority)
@@ -154,12 +174,12 @@ ResourceSked::SkedIt
 ResourceSked::findIterByCommand(ScheduleEntry* sked_entry, int stage_num,
                                 int sked_cmd, int sked_cmd_idx)
 {
-    if (sked[stage_num].empty()) {
-        return sked[stage_num].end();
+    if (stages[stage_num].empty()) {
+        return stages[stage_num].end();
     }
 
-    SkedIt sked_it = sked[stage_num].begin();
-    SkedIt sked_end = sked[stage_num].end();
+    SkedIt sked_it = stages[stage_num].begin();
+    SkedIt sked_end = stages[stage_num].end();
 
     while (sked_it != sked_end) {
         if ((*sked_it)->cmd == sked_cmd &&
@@ -175,12 +195,16 @@ ResourceSked::findIterByCommand(ScheduleEntry* sked_entry, int stage_num,
 void
 ResourceSked::print()
 {
-    for (int i = 0; i < sked.size(); i++) {
-        cprintf("Stage %i\n====\n", i);
-        SkedIt sked_it = sked[i].begin();
-        SkedIt sked_end = sked[i].end();
+    for (int i = 0; i < stages.size(); i++) {
+        //ccprintf(cerr, "Stage %i\n====\n", i);
+        SkedIt sked_it = stages[i].begin();
+        SkedIt sked_end = stages[i].end();
         while (sked_it != sked_end) {
-            cprintf("\t res:%i cmd:%i idx:%i\n", (*sked_it)->resNum, (*sked_it)->cmd, (*sked_it)->idx);
+            DPRINTF(SkedCache, "\t stage:%i res:%i cmd:%i idx:%i\n",
+                    (*sked_it)->stageNum,
+                    (*sked_it)->resNum,
+                    (*sked_it)->cmd,
+                    (*sked_it)->idx);
             sked_it++;
         }
     }
diff --git a/src/cpu/inorder/resource_sked.hh b/src/cpu/inorder/resource_sked.hh
index 22e29d728..bd002e161 100644
--- a/src/cpu/inorder/resource_sked.hh
+++ b/src/cpu/inorder/resource_sked.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 The Regents of The University of Michigan
+ * Copyright (c) 2010-2011 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -34,7 +34,19 @@
 
 #include <vector>
 #include <list>
+#include <cstdlib>
 
+/** ScheduleEntry class represents a single function that an instruction
+    wants to do at any pipeline stage. For example, if an instruction
+    needs to be decoded and do a branch prediction all in one stage
+    then each of those tasks would need it's own ScheduleEntry.
+
+    Each schedule entry corresponds to some resource that the instruction
+    wants to interact with.
+
+    The file pipeline_traits.cc shows how a typical instruction schedule is
+    made up of these schedule entries.
+*/
 class ScheduleEntry {
   public:
     ScheduleEntry(int stage_num, int _priority, int res_num, int _cmd = 0,
@@ -43,45 +55,225 @@ class ScheduleEntry {
         idx(_idx), priority(_priority)
     { }
 
-    // Stage number to perform this service.
+    /** Stage number to perform this service. */
     int stageNum;
 
-    // Resource ID to access
+    /** Resource ID to access */
     int resNum;
 
-    // See specific resource for meaning
+    /** See specific resource for meaning */
     unsigned cmd;
 
-    // See specific resource for meaning
+    /** See specific resource for meaning */
     unsigned idx;
 
-    // Some Resources May Need Priority
+    /** Some Resources May Need Priority */
     int priority;
 };
 
+/** The ResourceSked maintains the complete schedule
+    for an instruction. That schedule includes what
+    resources an instruction wants to acquire at each
+    pipeline stage and is represented by a collection
+    of ScheduleEntry objects (described above) that
+    must be executed in-order.
+
+    In every pipeline stage, the InOrder model will
+    process all entries on the resource schedule for
+    that stage and then send the instruction to the next
+    stage if and only if the instruction successfully
+    completed each ScheduleEntry.
+*/
 class ResourceSked {
   public:
     typedef std::list<ScheduleEntry*>::iterator SkedIt;
+    typedef std::vector<std::list<ScheduleEntry*> > StageList;
 
     ResourceSked();
 
+    /** Initializee the current entry pointer to
+        pipeline stage 0 and the 1st schedule entry
+    */
     void init();
 
+    /** Goes through the remaining stages on the schedule
+        and sums all the remaining entries left to be
+        processed
+    */
     int size();
+
+    /** Is the schedule empty? */
     bool empty();
+
+    /** Beginning Entry of this schedule */
+    SkedIt begin();
+
+    /** Ending Entry of this schedule */
+    SkedIt end();
+
+    /** What is the next task for this instruction schedule? */
     ScheduleEntry* top();
+
+    /** Top() Task is completed, remove it from schedule */
     void pop();
+
+    /** Add To Schedule based on stage num and priority of
+        Schedule Entry
+    */
     void push(ScheduleEntry* sked_entry);
+
+    /** Add Schedule Entry to be in front of another Entry */
     void pushBefore(ScheduleEntry* sked_entry, int sked_cmd, int sked_cmd_idx);
+
+    /** Print what's left on the instruction schedule */
     void print();
 
+    StageList *getStages()
+    {
+        return &stages;
+    }
+
   private:
+    /** Current Schedule Entry Pointer */
     SkedIt curSkedEntry;
-    std::vector<std::list<ScheduleEntry*> > sked;
 
+    /** The Stage-by-Stage Resource Schedule:
+        Resized to Number of Stages in the constructor
+    */
+    StageList stages;
+
+    /** Find a place to insert the instruction using  the
+        schedule entries priority
+    */
     SkedIt findIterByPriority(ScheduleEntry *sked_entry, int stage_num);
+
+    /** Find a place to insert the instruction using a particular command
+        to look for.
+    */
     SkedIt findIterByCommand(ScheduleEntry *sked_entry, int stage_num,
                              int sked_cmd, int sked_cmd_idx = -1);
 };
 
+/** Wrapper class around the SkedIt iterator in the Resource Sked so that
+    we can use ++ operator to automatically go to the next available
+    resource schedule entry but otherwise maintain same functionality
+    as a normal iterator.
+*/
+class RSkedIt
+{
+  public:
+    RSkedIt()
+        : curStage(0), numStages(0)
+    { }
+
+
+    /** init() must be called before the use of any other member
+        in the RSkedIt class.
+    */
+    void init(ResourceSked* rsked)
+    {
+        stages = rsked->getStages();
+        numStages = stages->size();
+    }
+
+    /* Update the encapsulated "myIt" iterator, but only
+       update curStage/curStage_end if the iterator is valid.
+       The iterator could be invalid in the case where
+       someone is saving the end of a list (i.e. std::list->end())
+    */
+    RSkedIt operator=(ResourceSked::SkedIt const &rhs)
+    {
+        myIt = rhs;
+        if (myIt != (*stages)[numStages-1].end()) {
+            curStage = (*myIt)->stageNum;
+            curStage_end = (*stages)[curStage].end();
+        }
+        return *this;
+    }
+
+    /** Increment to the next entry in current stage.
+        If no more entries then find the next stage that has
+        resource schedule to complete.
+        If no more stages, then return the end() iterator from
+        the last stage to indicate we are done.
+    */
+    RSkedIt &operator++(int unused)
+    {
+        if (++myIt == curStage_end) {
+            curStage++;
+            while (curStage < numStages) {
+                if ((*stages)[curStage].empty()) {
+                    curStage++;
+                } else {
+                    myIt = (*stages)[curStage].begin();
+                    curStage_end = (*stages)[curStage].end();
+                    return *this;
+                }
+            }
+
+            myIt = (*stages)[numStages - 1].end();
+        }
+
+        return *this;
+    }
+
+    /** The "pointer" operator can be used on a RSkedIt and it
+        will use the encapsulated iterator
+    */
+    ScheduleEntry* operator->()
+    {
+        return *myIt;
+    }
+
+    /** Dereferencing a RSKedIt will access the encapsulated
+        iterator
+    */
+    ScheduleEntry* operator*()
+    {
+        return *myIt;
+    }
+
+    /** Equality for RSkedIt only compares the "myIt" iterators,
+        as the other members are just ancillary
+    */
+    bool operator==(RSkedIt const &rhs)
+    {
+        return this->myIt == rhs.myIt;
+    }
+
+    /** Inequality for RSkedIt only compares the "myIt" iterators,
+        as the other members are just ancillary
+    */
+    bool operator!=(RSkedIt const &rhs)
+    {
+        return this->myIt != rhs.myIt;
+    }
+
+    /* The == and != operator overloads should be sufficient
+       here if need otherwise direct access to the schedule
+       iterator, then this can be used */
+    ResourceSked::SkedIt getIt()
+    {
+        return myIt;
+    }
+
+  private:
+    /** Schedule Iterator that this class is encapsulating */
+    ResourceSked::SkedIt myIt;
+
+    /** Ptr to resource schedule that the 'myIt' iterator
+        belongs to
+    */
+    ResourceSked::StageList *stages;
+
+    /**  The last iterator in the current stage. */
+    ResourceSked::SkedIt curStage_end;
+
+    /** Current Stage that "myIt" refers to. */
+    int curStage;
+
+    /** Number of stages in the "*stages" object. */
+    int numStages;
+};
+
 #endif //__CPU_INORDER_RESOURCE_SKED_HH__
diff --git a/src/cpu/inorder/resources/agen_unit.cc b/src/cpu/inorder/resources/agen_unit.cc
index f1862b94a..764cd9446 100644
--- a/src/cpu/inorder/resources/agen_unit.cc
+++ b/src/cpu/inorder/resources/agen_unit.cc
@@ -50,8 +50,8 @@ AGENUnit::regStats()
 void
 AGENUnit::execute(int slot_num)
 {
-    ResourceRequest* agen_req = reqMap[slot_num];
-    DynInstPtr inst = reqMap[slot_num]->inst;
+    ResourceRequest* agen_req = reqs[slot_num];
+    DynInstPtr inst = reqs[slot_num]->inst;
 #if TRACING_ON
     ThreadID tid = inst->readTid();
 #endif
diff --git a/src/cpu/inorder/resources/branch_predictor.cc b/src/cpu/inorder/resources/branch_predictor.cc
index 8ca5a9718..5a22e40eb 100644
--- a/src/cpu/inorder/resources/branch_predictor.cc
+++ b/src/cpu/inorder/resources/branch_predictor.cc
@@ -66,7 +66,7 @@ BranchPredictor::execute(int slot_num)
 {
     // After this is working, change this to a reinterpret cast
     // for performance considerations
-    ResourceRequest* bpred_req = reqMap[slot_num];
+    ResourceRequest* bpred_req = reqs[slot_num];
     DynInstPtr inst = bpred_req->inst;
     ThreadID tid = inst->readTid();
     int seq_num = inst->seqNum;
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 8b4dd4402..b17e5b3da 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -133,6 +133,10 @@ CacheUnit::getPort(const string &if_name, int idx)
 void
 CacheUnit::init()
 {
+    for (int i = 0; i < width; i++) {
+        reqs[i] = new CacheRequest(this);
+    }
+
     // Currently Used to Model TLB Latency. Eventually
     // Switch to Timing TLB translations.
     resourceEvent = new CacheUnitEvent[width];
@@ -250,20 +254,16 @@ CacheUnit::removeAddrDependency(DynInstPtr inst)
 ResReqPtr
 CacheUnit::findRequest(DynInstPtr inst)
 {
-    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-    map<int, ResReqPtr>::iterator map_end = reqMap.end();
-
-    while (map_it != map_end) {
+    for (int i = 0; i < width; i++) {
         CacheRequest* cache_req =
-            dynamic_cast<CacheRequest*>((*map_it).second);
+            dynamic_cast<CacheRequest*>(reqs[i]);
         assert(cache_req);
 
-        if (cache_req &&
+        if (cache_req->valid &&
             cache_req->getInst() == inst &&
-            cache_req->instIdx == inst->resSched.top()->idx) {
+            cache_req->instIdx == inst->curSkedEntry->idx) {
             return cache_req;
         }
-        map_it++;
     }
 
     return NULL;
@@ -272,20 +272,16 @@ CacheUnit::findRequest(DynInstPtr inst)
 ResReqPtr
 CacheUnit::findRequest(DynInstPtr inst, int idx)
 {
-    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-    map<int, ResReqPtr>::iterator map_end = reqMap.end();
-
-    while (map_it != map_end) {
+    for (int i = 0; i < width; i++) {
         CacheRequest* cache_req =
-            dynamic_cast<CacheRequest*>((*map_it).second);
+            dynamic_cast<CacheRequest*>(reqs[i]);
         assert(cache_req);
 
-        if (cache_req &&
+        if (cache_req->valid &&
             cache_req->getInst() == inst &&
             cache_req->instIdx == idx) {
             return cache_req;
         }
-        map_it++;
     }
 
     return NULL;
@@ -296,7 +292,8 @@ ResReqPtr
 CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                      int slot_num, unsigned cmd)
 {
-    ScheduleEntry* sched_entry = inst->resSched.top();
+    ScheduleEntry* sched_entry = *inst->curSkedEntry;
+    CacheRequest* cache_req = dynamic_cast<CacheRequest*>(reqs[slot_num]);
 
     if (!inst->validMemAddr()) {
         panic("Mem. Addr. must be set before requesting cache access\n");
@@ -343,10 +340,10 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
               sched_entry->cmd, name());
     }
 
-    return new CacheRequest(this, inst, stage_num, id, slot_num,
-                            sched_entry->cmd, 0, pkt_cmd,
-                            0/*flags*/, this->cpu->readCpuId(),
-                            inst->resSched.top()->idx);
+    cache_req->setRequest(inst, stage_num, id, slot_num,
+                          sched_entry->cmd, pkt_cmd,
+                          inst->curSkedEntry->idx);
+    return cache_req;
 }
 
 void
@@ -357,17 +354,17 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
 
     // Check to see if this instruction is requesting the same command
     // or a different one
-    if (cache_req->cmd != inst->resSched.top()->cmd &&
-        cache_req->instIdx == inst->resSched.top()->idx) {
+    if (cache_req->cmd != inst->curSkedEntry->cmd &&
+        cache_req->instIdx == inst->curSkedEntry->idx) {
         // If different, then update command in the request
-        cache_req->cmd = inst->resSched.top()->cmd;
+        cache_req->cmd = inst->curSkedEntry->cmd;
         DPRINTF(InOrderCachePort,
                 "[tid:%i]: [sn:%i]: Updating the command for this "
                 "instruction\n ", inst->readTid(), inst->seqNum);
 
         service_request = true;
-    } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead &&
-               inst->resSched.top()->idx != CacheUnit::InitSecondSplitWrite) {        
+    } else if (inst->curSkedEntry->idx != CacheUnit::InitSecondSplitRead &&
+               inst->curSkedEntry->idx != CacheUnit::InitSecondSplitWrite) {
         // If same command, just check to see if memory access was completed
         // but dont try to re-execute
         DPRINTF(InOrderCachePort,
@@ -487,14 +484,20 @@ CacheUnit::read(DynInstPtr inst, Addr addr,
         inst->splitMemData = new uint8_t[size];
         
         if (!inst->splitInstSked) {
+            assert(0 && "Split Requests Not Supported for Now...");
+
             // Schedule Split Read/Complete for Instruction
             // ==============================
             int stage_num = cache_req->getStageNum();
-        
-            int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
+            RSkedPtr inst_sked = (stage_num >= ThePipeline::BackEndStartStage) ?
+                inst->backSked : inst->frontSked;
+
+            // this is just an arbitrarily high priority to ensure that this
+            // gets pushed to the back of the list
+            int stage_pri = 20;
         
             int isplit_cmd = CacheUnit::InitSecondSplitRead;
-            inst->resSched.push(new
+            inst_sked->push(new
                                 ScheduleEntry(stage_num,
                                               stage_pri,
                                               cpu->resPool->getResIdx(DCache),
@@ -502,7 +505,7 @@ CacheUnit::read(DynInstPtr inst, Addr addr,
                                               1));
 
             int csplit_cmd = CacheUnit::CompleteSecondSplitRead;
-            inst->resSched.push(new
+            inst_sked->push(new
                                 ScheduleEntry(stage_num + 1,
                                               1/*stage_pri*/,
                                               cpu->resPool->getResIdx(DCache),
@@ -590,27 +593,33 @@ CacheUnit::write(DynInstPtr inst, uint8_t *data, unsigned size,
         inst->splitInst = true;        
 
         if (!inst->splitInstSked) {
+            assert(0 && "Split Requests Not Supported for Now...");
+
             // Schedule Split Read/Complete for Instruction
             // ==============================
             int stage_num = cache_req->getStageNum();
+            RSkedPtr inst_sked = (stage_num >= ThePipeline::BackEndStartStage) ?
+                inst->backSked : inst->frontSked;
         
-            int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
+            // this is just an arbitrarily high priority to ensure that this
+            // gets pushed to the back of the list
+            int stage_pri = 20;
         
             int isplit_cmd = CacheUnit::InitSecondSplitWrite;
-            inst->resSched.push(new
-                                ScheduleEntry(stage_num,
-                                              stage_pri,
-                                              cpu->resPool->getResIdx(DCache),
-                                              isplit_cmd,
-                                              1));
+            inst_sked->push(new
+                            ScheduleEntry(stage_num,
+                                          stage_pri,
+                                          cpu->resPool->getResIdx(DCache),
+                                          isplit_cmd,
+                                          1));
 
             int csplit_cmd = CacheUnit::CompleteSecondSplitWrite;
-            inst->resSched.push(new
-                                ScheduleEntry(stage_num + 1,
-                                              1/*stage_pri*/,
-                                              cpu->resPool->getResIdx(DCache),
-                                              csplit_cmd,
-                                              1));
+            inst_sked->push(new
+                            ScheduleEntry(stage_num + 1,
+                                          1/*stage_pri*/,
+                                          cpu->resPool->getResIdx(DCache),
+                                          csplit_cmd,
+                                          1));
             inst->splitInstSked = true;
         } else {
             DPRINTF(InOrderCachePort, "[tid:%i] sn:%i] Retrying Split Read "
@@ -639,8 +648,6 @@ CacheUnit::write(DynInstPtr inst, uint8_t *data, unsigned size,
 
     if (inst->fault == NoFault) {
         if (!cache_req->splitAccess) {            
-            // Remove this line since storeData is saved in INST?
-            cache_req->reqData = new uint8_t[size];
             doCacheAccess(inst, write_res);
         } else {            
             doCacheAccess(inst, write_res, cache_req);            
@@ -655,16 +662,19 @@ CacheUnit::write(DynInstPtr inst, uint8_t *data, unsigned size,
 void
 CacheUnit::execute(int slot_num)
 {
-    CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(reqMap[slot_num]);
+    CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(reqs[slot_num]);
     assert(cache_req);
 
-    if (cachePortBlocked) {
+    if (cachePortBlocked &&
+        (cache_req->cmd == InitiateReadData ||
+         cache_req->cmd == InitiateWriteData ||
+         cache_req->cmd == InitSecondSplitRead ||
+         cache_req->cmd == InitSecondSplitWrite)) {
         DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n");
-        cache_req->setCompleted(false);
+        cache_req->done(false);
         return;
     }
 
-
     DynInstPtr inst = cache_req->inst;
 #if TRACING_ON
     ThreadID tid = inst->readTid();
@@ -681,7 +691,12 @@ CacheUnit::execute(int slot_num)
         acc_type = "read";
 #endif        
       case InitiateWriteData:
-            
+        if (cachePortBlocked) {
+            DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n");
+            cache_req->done(false);
+            return;
+        }
+
         DPRINTF(InOrderCachePort,
                 "[tid:%u]: [sn:%i] Initiating data %s access to %s for "
                 "addr. %08p\n", tid, inst->seqNum, acc_type, name(),
@@ -796,7 +811,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res,
     CacheReqPtr cache_req;
     
     if (split_req == NULL) {        
-        cache_req = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
+        cache_req = dynamic_cast<CacheReqPtr>(reqs[inst->getCurResSlot()]);
     } else{
         cache_req = split_req;
     }        
@@ -855,7 +870,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res,
                     "[tid:%i] [sn:%i] cannot access cache, because port "
                     "is blocked. now waiting to retry request\n", tid, 
                     inst->seqNum);
-            cache_req->setCompleted(false);
+            cache_req->done(false);
             cachePortBlocked = true;
         } else {
             DPRINTF(InOrderCachePort,
@@ -879,7 +894,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res,
         // Make cache request again since access due to
         // inability to access
         DPRINTF(InOrderStall, "STALL: \n");
-        cache_req->setCompleted(false);
+        cache_req->done(false);
     }
 
 }
@@ -902,7 +917,7 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
                 cache_pkt->cacheReq->getTid(),
                 cache_pkt->cacheReq->seqNum);
 
-        cache_pkt->cacheReq->done();
+        cache_pkt->cacheReq->freeSlot();
         delete cache_pkt;
 
         cpu->wakeCPU();
@@ -1047,10 +1062,10 @@ CacheUnitEvent::CacheUnitEvent()
 void
 CacheUnitEvent::process()
 {
-    DynInstPtr inst = resource->reqMap[slotIdx]->inst;
-    int stage_num = resource->reqMap[slotIdx]->getStageNum();
+    DynInstPtr inst = resource->reqs[slotIdx]->inst;
+    int stage_num = resource->reqs[slotIdx]->getStageNum();
     ThreadID tid = inst->threadNumber;
-    CacheReqPtr req_ptr = dynamic_cast<CacheReqPtr>(resource->reqMap[slotIdx]);
+    CacheReqPtr req_ptr = dynamic_cast<CacheReqPtr>(resource->reqs[slotIdx]);
 
     DPRINTF(InOrderTLB, "Waking up from TLB Miss caused by [sn:%i].\n",
             inst->seqNum);
@@ -1061,13 +1076,15 @@ CacheUnitEvent::process()
     tlb_res->tlbBlocked[tid] = false;
 
     tlb_res->cpu->pipelineStage[stage_num]->
-        unsetResStall(tlb_res->reqMap[slotIdx], tid);
+        unsetResStall(tlb_res->reqs[slotIdx], tid);
 
     req_ptr->tlbStall = false;
 
     if (req_ptr->isSquashed()) {
-        req_ptr->done();
+        req_ptr->freeSlot();
     }
+
+    tlb_res->cpu->wakeCPU();
 }
 
 void
@@ -1112,15 +1129,10 @@ void
 CacheUnit::squash(DynInstPtr inst, int stage_num,
                   InstSeqNum squash_seq_num, ThreadID tid)
 {
-    vector<int> slot_remove_list;
-
-    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-    map<int, ResReqPtr>::iterator map_end = reqMap.end();
-
-    while (map_it != map_end) {
-        ResReqPtr req_ptr = (*map_it).second;
+    for (int i = 0; i < width; i++) {
+        ResReqPtr req_ptr = reqs[i];
 
-        if (req_ptr &&
+        if (req_ptr->valid &&
             req_ptr->getInst()->readTid() == tid &&
             req_ptr->getInst()->seqNum > squash_seq_num) {
 
@@ -1133,7 +1145,6 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
                         "squashed, ignoring squash process.\n",
                         req_ptr->getInst()->readTid(),
                         req_ptr->getInst()->seqNum);
-                map_it++;                
                 continue;                
             }
 
@@ -1147,18 +1158,14 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
             if (cache_req->tlbStall) {
                 tlbBlocked[tid] = false;
 
-                int stall_stage = reqMap[req_slot_num]->getStageNum();
+                int stall_stage = reqs[req_slot_num]->getStageNum();
 
                 cpu->pipelineStage[stall_stage]->
-                    unsetResStall(reqMap[req_slot_num], tid);
+                    unsetResStall(reqs[req_slot_num], tid);
             }
 
             if (!cache_req->tlbStall && !cache_req->isMemAccPending()) {
-                // Mark request for later removal
-                cpu->reqRemoveList.push(req_ptr);
-
-                // Mark slot for removal from resource
-                slot_remove_list.push_back(req_ptr->getSlot());
+                freeSlot(req_slot_num);
             } else {
                 DPRINTF(InOrderCachePort,
                         "[tid:%i] Request from [sn:%i] squashed, but still "
@@ -1170,14 +1177,8 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
                         req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum,
                         req_ptr->getInst()->splitInst);
             }
-
         }
-
-        map_it++;
     }
 
-    // Now Delete Slot Entry from Req. Map
-    for (int i = 0; i < slot_remove_list.size(); i++)
-        freeSlot(slot_remove_list[i]);
 }
 
diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh
index afcb36a24..097b6fa7a 100644
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@@ -219,20 +219,18 @@ class CacheUnitEvent : public ResourceEvent {
     void process();
 };
 
+//@todo: Move into CacheUnit Class for private access to "valid" field
 class CacheRequest : public ResourceRequest
 {
   public:
-    CacheRequest(CacheUnit *cres, DynInstPtr inst, int stage_num, int res_idx,
-                 int slot_num, unsigned cmd, int req_size,
-                 MemCmd::Command pkt_cmd, unsigned flags, int cpu_id, int idx)
-        : ResourceRequest(cres, inst, stage_num, res_idx, slot_num, cmd),
-          pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL),
-          retryPkt(NULL), memAccComplete(false), memAccPending(false),
-          tlbStall(false), splitAccess(false), splitAccessNum(-1),
-          split2ndAccess(false), instIdx(idx), fetchBufferFill(false)
+    CacheRequest(CacheUnit *cres)
+        :  ResourceRequest(cres), memReq(NULL), reqData(NULL),
+           dataPkt(NULL), retryPkt(NULL), memAccComplete(false),
+           memAccPending(false), tlbStall(false), splitAccess(false),
+           splitAccessNum(-1), split2ndAccess(false),
+           fetchBufferFill(false)
     { }
 
-
     virtual ~CacheRequest()
     {
         if (reqData && !splitAccess) {
@@ -240,6 +238,37 @@ class CacheRequest : public ResourceRequest
         }
     }
 
+    void setRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num,
+                    unsigned _cmd, MemCmd::Command pkt_cmd, int idx)
+    {
+        pktCmd = pkt_cmd;
+        instIdx = idx;
+
+        ResourceRequest::setRequest(_inst, stage_num, res_idx, slot_num, _cmd);
+    }
+
+    void clearRequest()
+    {
+        if (reqData && !splitAccess) {
+            delete [] reqData;
+        }
+
+        memReq = NULL;
+        reqData = NULL;
+        dataPkt = NULL;
+        retryPkt = NULL;
+        memAccComplete = false;
+        memAccPending = false;
+        tlbStall = false;
+        splitAccess = false;
+        splitAccessNum = -1;
+        split2ndAccess = false;
+        instIdx = 0;
+        fetchBufferFill = false;
+
+        ResourceRequest::clearRequest();
+    }
+
     virtual PacketDataPtr getData()
     { return reqData; }
 
diff --git a/src/cpu/inorder/resources/decode_unit.cc b/src/cpu/inorder/resources/decode_unit.cc
index c2f7ae22d..71d33ab90 100644
--- a/src/cpu/inorder/resources/decode_unit.cc
+++ b/src/cpu/inorder/resources/decode_unit.cc
@@ -49,21 +49,24 @@ DecodeUnit::DecodeUnit(std::string res_name, int res_id, int res_width,
 void
 DecodeUnit::execute(int slot_num)
 {
-    ResourceRequest* decode_req = reqMap[slot_num];
-    DynInstPtr inst = reqMap[slot_num]->inst;
+    ResourceRequest* decode_req = reqs[slot_num];
+    DynInstPtr inst = reqs[slot_num]->inst;
     ThreadID tid = inst->readTid();
 
     switch (decode_req->cmd)
     {
       case DecodeInst:
         {
-            bool done_sked = ThePipeline::createBackEndSchedule(inst);
+            inst->setBackSked(cpu->createBackEndSked(inst));
 
-            if (done_sked) {
+            if (inst->backSked != NULL) {
                 DPRINTF(InOrderDecode,
                     "[tid:%i]: Setting Destination Register(s) for [sn:%i].\n",
                     tid, inst->seqNum);
                 regDepMap[tid]->insert(inst);
+
+                //inst->printSked();
+
                 decode_req->done();
             } else {
                 DPRINTF(Resource,
diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc
index 36bf2a4dc..b2540cff8 100644
--- a/src/cpu/inorder/resources/execution_unit.cc
+++ b/src/cpu/inorder/resources/execution_unit.cc
@@ -42,7 +42,7 @@ ExecutionUnit::ExecutionUnit(string res_name, int res_id, int res_width,
                              int res_latency, InOrderCPU *_cpu,
                              ThePipeline::Params *params)
     : Resource(res_name, res_id, res_width, res_latency, _cpu),
-      lastExecuteTick(0), lastControlTick(0)
+      lastExecuteTick(0), lastControlTick(0), serializeTick(0)
 { }
 
 void
@@ -82,27 +82,52 @@ ExecutionUnit::regStats()
 void
 ExecutionUnit::execute(int slot_num)
 {
-    ResourceRequest* exec_req = reqMap[slot_num];
-    DynInstPtr inst = reqMap[slot_num]->inst;
+    ResourceRequest* exec_req = reqs[slot_num];
+    DynInstPtr inst = reqs[slot_num]->inst;
     Fault fault = NoFault;
     int seq_num = inst->seqNum;
+    Tick cur_tick = curTick();
+
+    if (cur_tick == serializeTick) {
+        DPRINTF(InOrderExecute, "Can not execute [tid:%i][sn:%i][PC:%s] %s. "
+                "All instructions are being serialized this cycle\n",
+                inst->readTid(), seq_num, inst->pcState(), inst->instName());
+        exec_req->done(false);
+        return;
+    }
 
-    DPRINTF(InOrderExecute, "[tid:%i] Executing [sn:%i] [PC:%s] %s.\n",
-            inst->readTid(), seq_num, inst->pcState(), inst->instName());
 
     switch (exec_req->cmd)
     {
       case ExecuteInst:
         {
-            if (curTick() != lastExecuteTick) {
-                lastExecuteTick = curTick();
+            if (inst->isNop()) {
+                DPRINTF(InOrderExecute, "[tid:%i] [sn:%i] [PC:%s] Ignoring execution"
+                        "of %s.\n", inst->readTid(), seq_num, inst->pcState(),
+                        inst->instName());
+                inst->setExecuted();
+                exec_req->done();
+                return;
+            } else {
+                DPRINTF(InOrderExecute, "[tid:%i] Executing [sn:%i] [PC:%s] %s.\n",
+                        inst->readTid(), seq_num, inst->pcState(), inst->instName());
             }
 
+            if (cur_tick != lastExecuteTick) {
+                lastExecuteTick = cur_tick;
+            }
+
+            assert(!inst->isMemRef());
+
+            if (inst->isSerializeAfter()) {
+                serializeTick = cur_tick;
+                DPRINTF(InOrderExecute, "Serializing execution after [tid:%i] "
+                        "[sn:%i] [PC:%s] %s.\n", inst->readTid(), seq_num,
+                        inst->pcState(), inst->instName());
+            }
 
-            if (inst->isMemRef()) {
-                panic("%s not configured to handle memory ops.\n", resName);
-            } else if (inst->isControl()) {
-                if (lastControlTick == curTick()) {
+            if (inst->isControl()) {
+                if (lastControlTick == cur_tick) {
                     DPRINTF(InOrderExecute, "Can not Execute More than One Control "
                             "Inst Per Cycle. Blocking Request.\n");
                     exec_req->done(false);
diff --git a/src/cpu/inorder/resources/execution_unit.hh b/src/cpu/inorder/resources/execution_unit.hh
index a6694ddb5..b03a6655e 100644
--- a/src/cpu/inorder/resources/execution_unit.hh
+++ b/src/cpu/inorder/resources/execution_unit.hh
@@ -76,6 +76,7 @@ class ExecutionUnit : public Resource {
     Stats::Scalar executions;
     Tick lastExecuteTick;
     Tick lastControlTick;
+    Tick serializeTick;
 };
 
 
diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc
index 6f84a333d..d23ea0a82 100644
--- a/src/cpu/inorder/resources/fetch_seq_unit.cc
+++ b/src/cpu/inorder/resources/fetch_seq_unit.cc
@@ -62,13 +62,17 @@ FetchSeqUnit::init()
 {
     resourceEvent = new FetchSeqEvent[width];
 
+    for (int i = 0; i < width; i++) {
+        reqs[i] = new ResourceRequest(this);
+    }
+
     initSlots();
 }
 
 void
 FetchSeqUnit::execute(int slot_num)
 {
-    ResourceRequest* fs_req = reqMap[slot_num];
+    ResourceRequest* fs_req = reqs[slot_num];
     DynInstPtr inst = fs_req->inst;
     ThreadID tid = inst->readTid();
     int stage_num = fs_req->getStageNum();
@@ -96,7 +100,7 @@ FetchSeqUnit::execute(int slot_num)
                 fs_req->done();
             } else {
                 DPRINTF(InOrderStall, "STALL: [tid:%i]: NPC not valid\n", tid);
-                fs_req->setCompleted(false);
+                fs_req->done(false);
             }
         }
         break;
diff --git a/src/cpu/inorder/resources/fetch_unit.cc b/src/cpu/inorder/resources/fetch_unit.cc
index 0e9866708..a0d830ecf 100644
--- a/src/cpu/inorder/resources/fetch_unit.cc
+++ b/src/cpu/inorder/resources/fetch_unit.cc
@@ -56,6 +56,31 @@ FetchUnit::FetchUnit(string res_name, int res_id, int res_width,
       predecoder(NULL)
 { }
 
+FetchUnit::~FetchUnit()
+{
+    std::list<FetchBlock*>::iterator fetch_it = fetchBuffer.begin();
+    std::list<FetchBlock*>::iterator end_it = fetchBuffer.end();
+    while (fetch_it != end_it) {
+        delete (*fetch_it)->block;
+        delete *fetch_it;
+        fetch_it++;
+    }
+    fetchBuffer.clear();
+
+
+    std::list<FetchBlock*>::iterator pend_it = pendingFetch.begin();
+    std::list<FetchBlock*>::iterator pend_end = pendingFetch.end();
+    while (pend_it != pend_end) {
+        if ((*pend_it)->block) {
+            delete (*pend_it)->block;
+        }
+
+        delete *pend_it;
+        pend_it++;
+    }
+    pendingFetch.clear();
+}
+
 void
 FetchUnit::createMachInst(std::list<FetchBlock*>::iterator fetch_it,
                           DynInstPtr inst)
@@ -118,33 +143,24 @@ ResReqPtr
 FetchUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                      int slot_num, unsigned cmd)
 {
-    ScheduleEntry* sched_entry = inst->resSched.top();
+    ScheduleEntry* sched_entry = *inst->curSkedEntry;
+    CacheRequest* cache_req = dynamic_cast<CacheRequest*>(reqs[slot_num]);
 
     if (!inst->validMemAddr()) {
         panic("Mem. Addr. must be set before requesting cache access\n");
     }
 
-    MemCmd::Command pkt_cmd;
-
-    switch (sched_entry->cmd)
-    {
-      case InitiateFetch:
-        pkt_cmd = MemCmd::ReadReq;
+    assert(sched_entry->cmd == InitiateFetch);
 
-        DPRINTF(InOrderCachePort,
-                "[tid:%i]: Fetch request from [sn:%i] for addr %08p\n",
-                inst->readTid(), inst->seqNum, inst->getMemAddr());
-        break;
+    DPRINTF(InOrderCachePort,
+            "[tid:%i]: Fetch request from [sn:%i] for addr %08p\n",
+            inst->readTid(), inst->seqNum, inst->getMemAddr());
 
-      default:
-        panic("%i: Unexpected request type (%i) to %s", curTick(),
-              sched_entry->cmd, name());
-    }
+    cache_req->setRequest(inst, stage_num, id, slot_num,
+                          sched_entry->cmd, MemCmd::ReadReq,
+                          inst->curSkedEntry->idx);
 
-    return new CacheRequest(this, inst, stage_num, id, slot_num,
-                            sched_entry->cmd, 0, pkt_cmd,
-                            0/*flags*/, this->cpu->readCpuId(),
-                            inst->resSched.top()->idx);
+    return cache_req;
 }
 
 void
@@ -214,12 +230,12 @@ FetchUnit::markBlockUsed(std::list<FetchBlock*>::iterator block_it)
 void
 FetchUnit::execute(int slot_num)
 {
-    CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(reqMap[slot_num]);
+    CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(reqs[slot_num]);
     assert(cache_req);
 
-    if (cachePortBlocked) {
+    if (cachePortBlocked && cache_req->cmd == InitiateFetch) {
         DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n");
-        cache_req->setCompleted(false);
+        cache_req->done(false);
         return;
     }
 
@@ -270,7 +286,7 @@ FetchUnit::execute(int slot_num)
             // If not, block this request.
             if (pendingFetch.size() >= fetchBuffSize) {
                 DPRINTF(InOrderCachePort, "No room available in fetch buffer.\n");
-                cache_req->setCompleted(false);
+                cache_req->done();
                 return;
             }
 
@@ -337,6 +353,8 @@ FetchUnit::execute(int slot_num)
                     return;
                 }
 
+                delete [] (*repl_it)->block;
+                delete *repl_it;
                 fetchBuffer.erase(repl_it);
             }
 
@@ -414,6 +432,7 @@ FetchUnit::processCacheCompletion(PacketPtr pkt)
                 cache_pkt->cacheReq->seqNum);
 
         cache_pkt->cacheReq->done();
+        cache_pkt->cacheReq->freeSlot();
         delete cache_pkt;
 
         cpu->wakeCPU();
@@ -447,7 +466,7 @@ FetchUnit::processCacheCompletion(PacketPtr pkt)
     short asid = cpu->asid[tid];
 
     assert(!cache_req->isSquashed());
-    assert(inst->resSched.top()->cmd == CompleteFetch);
+    assert(inst->curSkedEntry->cmd == CompleteFetch);
 
     DPRINTF(InOrderCachePort,
             "[tid:%u]: [sn:%i]: Processing fetch access for block %#x\n",
@@ -514,6 +533,10 @@ FetchUnit::squashCacheRequest(CacheReqPtr req_ptr)
                 DPRINTF(InOrderCachePort, "[sn:%i] Removing Pending Fetch "
                         "for block %08p (cnt=%i)\n", inst->seqNum,
                         block_addr, (*block_it)->cnt);
+                if ((*block_it)->block) {
+                    delete [] (*block_it)->block;
+                }
+                delete *block_it;
                 pendingFetch.erase(block_it);
             }
         }
diff --git a/src/cpu/inorder/resources/fetch_unit.hh b/src/cpu/inorder/resources/fetch_unit.hh
index 035f3f4a1..fa133b9eb 100644
--- a/src/cpu/inorder/resources/fetch_unit.hh
+++ b/src/cpu/inorder/resources/fetch_unit.hh
@@ -55,6 +55,8 @@ class FetchUnit : public CacheUnit
     FetchUnit(std::string res_name, int res_id, int res_width,
               int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
 
+    virtual ~FetchUnit();
+
     typedef ThePipeline::DynInstPtr DynInstPtr;
     typedef TheISA::ExtMachInst ExtMachInst;
 
diff --git a/src/cpu/inorder/resources/graduation_unit.cc b/src/cpu/inorder/resources/graduation_unit.cc
index 8ccdaa36a..edc2fb3ff 100644
--- a/src/cpu/inorder/resources/graduation_unit.cc
+++ b/src/cpu/inorder/resources/graduation_unit.cc
@@ -37,8 +37,7 @@ GraduationUnit::GraduationUnit(std::string res_name, int res_id, int res_width,
                                int res_latency, InOrderCPU *_cpu,
                                ThePipeline::Params *params)
     : Resource(res_name, res_id, res_width, res_latency, _cpu),
-      lastCycleGrad(0), numCycleGrad(0)
-      
+      lastNonSpecTick(0)
 {
     for (ThreadID tid = 0; tid < ThePipeline::MaxThreads; tid++) {
         nonSpecInstActive[tid] = &cpu->nonSpecInstActive[tid];
@@ -49,23 +48,27 @@ GraduationUnit::GraduationUnit(std::string res_name, int res_id, int res_width,
 void
 GraduationUnit::execute(int slot_num)
 {
-    ResourceRequest* grad_req = reqMap[slot_num];
-    DynInstPtr inst = reqMap[slot_num]->inst;
+    ResourceRequest* grad_req = reqs[slot_num];
+    DynInstPtr inst = reqs[slot_num]->inst;
     ThreadID tid = inst->readTid();
-    int stage_num = inst->resSched.top()->stageNum;
+    int stage_num = inst->curSkedEntry->stageNum;
 
     switch (grad_req->cmd)
     {
       case GraduateInst:
         {
-            // Make sure this is the last thing on the resource schedule
-            assert(inst->resSched.size() == 1);
+            if (lastNonSpecTick == curTick()) {
+                DPRINTF(InOrderGraduation, "Unable to graduate [sn:%i]. "
+                        "Only 1 nonspec inst. per cycle can graduate.\n");
+                grad_req->done(false);
+                return;
+            }
 
-             // Handle Any Faults Before Graduating Instruction
+            // Handle Any Faults Before Graduating Instruction
             if (inst->fault != NoFault) {
                 cpu->trap(inst->fault, tid, inst);
                 grad_req->setCompleted(false);
-                 return;
+                return;
             }
 
             DPRINTF(InOrderGraduation,
@@ -80,6 +83,7 @@ GraduationUnit::execute(int slot_num)
                 DPRINTF(InOrderGraduation,
                         "[tid:%i] Non-speculative inst [sn:%i] graduated\n",
                         tid, inst->seqNum);
+                lastNonSpecTick = curTick();
             }
 
             if (inst->traceData) {
diff --git a/src/cpu/inorder/resources/graduation_unit.hh b/src/cpu/inorder/resources/graduation_unit.hh
index aae41993f..59631bfcb 100644
--- a/src/cpu/inorder/resources/graduation_unit.hh
+++ b/src/cpu/inorder/resources/graduation_unit.hh
@@ -57,9 +57,7 @@ class GraduationUnit : public Resource {
     void execute(int slot_num);
 
   protected:
-    Tick lastCycleGrad;
-    int numCycleGrad;
-
+    Tick lastNonSpecTick;
     bool *nonSpecInstActive[ThePipeline::MaxThreads];
 
     InstSeqNum *nonSpecSeqNum[ThePipeline::MaxThreads];
diff --git a/src/cpu/inorder/resources/inst_buffer.cc b/src/cpu/inorder/resources/inst_buffer.cc
index 18dd26a78..46f5cce72 100644
--- a/src/cpu/inorder/resources/inst_buffer.cc
+++ b/src/cpu/inorder/resources/inst_buffer.cc
@@ -62,7 +62,7 @@ InstBuffer::regStats()
 void
 InstBuffer::execute(int slot_idx)
 {
-    ResReqPtr ib_req = reqMap[slot_idx];
+    ResReqPtr ib_req = reqs[slot_idx];
     DynInstPtr inst = ib_req->inst;
     ThreadID tid = inst->readTid();
     int stage_num = ib_req->getStageNum();
@@ -99,19 +99,22 @@ InstBuffer::execute(int slot_idx)
                         inst->seqNum, next_stage);
 
                 // Add to schedule: Insert into buffer in next stage
-                int stage_pri = ThePipeline::getNextPriority(inst,
-                                                             next_stage);
+                int stage_pri = 20;
+                RSkedPtr insert_sked = (stage_num >= ThePipeline::BackEndStartStage) ?
+                    inst->backSked : inst->frontSked;
 
-                inst->resSched.push(new ScheduleEntry(next_stage,
+                insert_sked->push(new ScheduleEntry(next_stage,
                                                       stage_pri,
                                                       id,
                                                       InstBuffer::InsertInst));
 
                 // Add to schedule: Remove from buffer in next next (bypass)
                 // stage
-                stage_pri = ThePipeline::getNextPriority(inst, bypass_stage);
+                stage_pri = 20;
+                RSkedPtr bypass_sked = (stage_num >= ThePipeline::BackEndStartStage) ?
+                    inst->backSked : inst->frontSked;
 
-                inst->resSched.push(new ScheduleEntry(bypass_stage,
+               bypass_sked->push(new ScheduleEntry(bypass_stage,
                                                       stage_pri,
                                                       id,
                                                       InstBuffer::RemoveInst));
diff --git a/src/cpu/inorder/resources/inst_buffer_new.cc b/src/cpu/inorder/resources/inst_buffer_new.cc
deleted file mode 100644
index 2e5a9666a..000000000
--- a/src/cpu/inorder/resources/inst_buffer_new.cc
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright (c) 2007 MIPS Technologies, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Korey Sewell
- *
- */
-
-#include <vector>
-#include <list>
-
-#include "arch/isa_traits.hh"
-#include "config/the_isa.hh"
-#include "cpu/inorder/pipeline_traits.hh"
-#include "cpu/inorder/resources/inst_buffer.hh"
-#include "cpu/inorder/cpu.hh"
-
-using namespace std;
-using namespace TheISA;
-using namespace ThePipeline;
-
-InstBuffer::InstBuffer(string res_name, int res_id, int res_width,
-                 int res_latency, InOrderCPU *_cpu)
-    : Resource(res_name, res_id, res_width, res_latency, _cpu)
-{ }
-
-ResReqPtr
-InstBuffer::getRequest(DynInstPtr inst, int stage_num, int res_idx,
-                     int slot_num)
-{
-    // After this is working, change this to a reinterpret cast
-    // for performance considerations
-    InstBufferEntry* ib_entry = dynamic_cast<InstBufferEntry*>(inst->resSched.top());
-    assert(ib_entry);
-
-    return new InstBufferRequest(this, inst, stage_num, id, slot_num,
-                             ib_entry->cmd);
-}
-
-void
-InstBuffer::execute(int slot_idx)
-{
-    // After this is working, change this to a reinterpret cast
-    // for performance considerations
-    InstBufferRequest* ib_req = dynamic_cast<InstBufferRequest*>(reqMap[slot_idx]);
-    assert(ib_req);
-
-    DynInstPtr inst = ib_req->inst;
-    ThreadID tid = inst->readTid();
-    int seq_num = inst->seqNum;
-    ib_req->fault = NoFault;
-
-    switch (ib_req->cmd)
-    {
-      case InsertInst:
-        {
-            DPRINTF(Resource, "[tid:%i]: Inserting [sn:%i] into buffer.\n",
-                tid, seq_num);
-            insert(inst);
-            ib_req->done();
-        }
-        break;
-
-      case RemoveInst:
-        {
-            DPRINTF(Resource, "[tid:%i]: Removing [sn:%i] from buffer.\n",
-                    tid, seq_num);
-            remove(inst);
-            ib_req->done();
-        }
-        break;
-
-      default:
-        fatal("Unrecognized command to %s", resName);
-    }
-
-    DPRINTF(Resource, "Buffer now contains %i insts.\n", instList.size());
-}
-
-void
-InstBuffer::insert(DynInstPtr inst)
-{
-    instList.push_back(inst);
-}
-
-void
-InstBuffer::remove(DynInstPtr inst)
-{
-    std::list<DynInstPtr>::iterator list_it = instList.begin();
-    std::list<DynInstPtr>::iterator list_end = instList.end();
-
-    while (list_it != list_end) {
-        if((*list_it) == inst) {
-            instList.erase(list_it);
-            break;
-        }
-        list_it++;
-    }
-}
-
-void
-InstBuffer::pop()
-{ instList.pop_front(); }
-
-ThePipeline::DynInstPtr
-InstBuffer::top()
-{ return instList.front(); }
-
-void
-InstBuffer::squash(InstSeqNum squash_seq_num, ThreadID tid)
-{
-    list<DynInstPtr>::iterator list_it = instList.begin();
-    list<DynInstPtr>::iterator list_end = instList.end();
-    queue<list<DynInstPtr>::iterator> remove_list;
-
-    // Collect All Instructions to be Removed in Remove List
-    while (list_it != list_end) {
-        if((*list_it)->seqNum > squash_seq_num) {
-            DPRINTF(Resource, "[tid:%i]: Squashing [sn:%i] in resource.\n",
-                    tid, (*list_it)->seqNum);
-            (*list_it)->setSquashed();
-            remove_list.push(list_it);
-        }
-
-        list_it++;
-    }
-
-    // Removed Instructions from InstList & Clear Remove List
-    while (!remove_list.empty()) {
-        instList.erase(remove_list.front());
-        remove_list.pop();
-    }
-
-    Resource::squash(squash_seq_num, tid);
-}
diff --git a/src/cpu/inorder/resources/inst_buffer_new.hh b/src/cpu/inorder/resources/inst_buffer_new.hh
deleted file mode 100644
index b1d5a7b09..000000000
--- a/src/cpu/inorder/resources/inst_buffer_new.hh
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2007 MIPS Technologies, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Korey Sewell
- *
- */
-
-#ifndef __CPU_INORDER_INST_BUFF_UNIT_HH__
-#define __CPU_INORDER_INST_BUFF_UNIT_HH__
-
-#include <vector>
-#include <list>
-#include <string>
-
-#include "cpu/inorder/resource.hh"
-#include "cpu/inorder/inorder_dyn_inst.hh"
-#include "cpu/inorder/pipeline_traits.hh"
-#include "cpu/inorder/cpu.hh"
-
-class InstBuffer : public Resource {
-  public:
-    typedef InOrderDynInst::DynInstPtr DynInstPtr;
-
-  public:
-    enum Command {
-        InsertInst,
-        InsertAddr,
-        RemoveInst,
-        RemoveAddr
-    };
-
-  public:
-    InstBuffer(std::string res_name, int res_id, int res_width,
-              int res_latency, InOrderCPU *_cpu);
-    virtual ~InstBuffer() {}
-
-    virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num,
-                                        int res_idx, int slot_num);
-
-    virtual void execute(int slot_num);
-
-    virtual void insert(DynInstPtr inst);
-
-    virtual void remove(DynInstPtr inst);
-
-    virtual void pop();
-
-    virtual DynInstPtr top();
-
-    virtual void squash(InstSeqNum squash_seq_num, ThreadID tid);
-
-  protected:
-    /** List of instructions this resource is currently
-     *  processing.
-     */
-    std::list<DynInstPtr> instList;
-
-    /** @todo: Add Resource Stats Here */
-
-};
-
-struct InstBufferEntry : public ThePipeline::ScheduleEntry {
-    InstBufferEntry(int stage_num, int res_num, InstBuffer::Command _cmd) :
-        ScheduleEntry(stage_num, res_num), cmd(_cmd)
-    { }
-
-    InstBuffer::Command cmd;
-};
-
-class InstBufferRequest : public ResourceRequest {
-  public:
-    typedef InOrderDynInst::DynInstPtr DynInstPtr;
-
-  public:
-    InstBufferRequest(InstBuffer *res, DynInstPtr inst, int stage_num, int res_idx, int slot_num,
-                  InstBuffer::Command _cmd)
-        : ResourceRequest(res, inst, stage_num, res_idx, slot_num),
-          cmd(_cmd)
-    { }
-
-    InstBuffer::Command cmd;
-};
-
-
-#endif //__CPU_INORDER_INST_BUFF_UNIT_HH__
diff --git a/src/cpu/inorder/resources/mult_div_unit.cc b/src/cpu/inorder/resources/mult_div_unit.cc
index 5aa0b0aa1..ad8b2b47b 100644
--- a/src/cpu/inorder/resources/mult_div_unit.cc
+++ b/src/cpu/inorder/resources/mult_div_unit.cc
@@ -76,6 +76,10 @@ MultDivUnit::init()
     // Set Up Resource Events to Appropriate Resource BandWidth
     resourceEvent = new MDUEvent[width];
 
+    for (int i = 0; i < width; i++) {
+        reqs[i] = new ResourceRequest(this);
+    }
+
     initSlots();
 }
 
@@ -92,7 +96,7 @@ void
 MultDivUnit::freeSlot(int slot_idx)
 {
     DPRINTF(InOrderMDU, "Freeing slot for inst:%i\n | slots-free:%i | "
-            "slots-used:%i\n", reqMap[slot_idx]->getInst()->seqNum,
+            "slots-used:%i\n", reqs[slot_idx]->getInst()->seqNum,
             slotsAvail(), slotsInUse());
     
     Resource::freeSlot(slot_idx);    
@@ -110,9 +114,9 @@ MultDivUnit::requestAgain(DynInstPtr inst, bool &service_request)
 
     // Check to see if this instruction is requesting the same command
     // or a different one
-    if (mult_div_req->cmd != inst->resSched.top()->cmd) {
+    if (mult_div_req->cmd != inst->curSkedEntry->cmd) {
         // If different, then update command in the request
-        mult_div_req->cmd = inst->resSched.top()->cmd;
+        mult_div_req->cmd = inst->curSkedEntry->cmd;
         DPRINTF(InOrderMDU,
                 "[tid:%i]: [sn:%i]: Updating the command for this "
                 "instruction\n", inst->readTid(), inst->seqNum);
@@ -132,7 +136,7 @@ MultDivUnit::getSlot(DynInstPtr inst)
 
     // If we have this instruction's request already then return
     if (slot_num != -1 &&         
-        inst->resSched.top()->cmd == reqMap[slot_num]->cmd)
+        inst->curSkedEntry->cmd == reqs[slot_num]->cmd)
         return slot_num;
     
     unsigned repeat_rate = 0;
@@ -202,8 +206,8 @@ MultDivUnit::getDivOpSize(DynInstPtr inst)
 void 
 MultDivUnit::execute(int slot_num)
 {
-    ResourceRequest* mult_div_req = reqMap[slot_num];
-    DynInstPtr inst = reqMap[slot_num]->inst;
+    ResourceRequest* mult_div_req = reqs[slot_num];
+    DynInstPtr inst = reqs[slot_num]->inst;
  
     switch (mult_div_req->cmd)
     {
@@ -275,8 +279,8 @@ MultDivUnit::execute(int slot_num)
 void 
 MultDivUnit::exeMulDiv(int slot_num)
 {
-    ResourceRequest* mult_div_req = reqMap[slot_num];
-    DynInstPtr inst = reqMap[slot_num]->inst;
+    ResourceRequest* mult_div_req = reqs[slot_num];
+    DynInstPtr inst = reqs[slot_num]->inst;
 
     inst->fault = inst->execute();
 
@@ -310,7 +314,7 @@ MDUEvent::process()
 
     mdu_res->exeMulDiv(slotIdx);
 
-    ResourceRequest* mult_div_req = resource->reqMap[slotIdx];
+    ResourceRequest* mult_div_req = resource->reqs[slotIdx];
 
     mult_div_req->done();    
 }
diff --git a/src/cpu/inorder/resources/tlb_unit.cc b/src/cpu/inorder/resources/tlb_unit.cc
index 59840d15b..37aec2209 100644
--- a/src/cpu/inorder/resources/tlb_unit.cc
+++ b/src/cpu/inorder/resources/tlb_unit.cc
@@ -72,6 +72,10 @@ TLBUnit::init()
 {
     resourceEvent = new TLBUnitEvent[width];
 
+    for (int i = 0; i < width; i++) {
+        reqs[i] = new TLBUnitRequest(this);
+    }
+
     initSlots();
 }
 
@@ -90,8 +94,9 @@ TLBUnit::getRequest(DynInstPtr _inst, int stage_num,
                             int res_idx, int slot_num,
                             unsigned cmd)
 {
-    return new TLBUnitRequest(this, _inst, stage_num, res_idx, slot_num,
-                          cmd);
+    TLBUnitRequest *tlb_req = dynamic_cast<TLBUnitRequest*>(reqs[slot_num]);
+    tlb_req->setRequest(inst, stage_num, id, slot_num, cmd);
+    return ud_req;
 }
 
 void
@@ -99,7 +104,7 @@ TLBUnit::execute(int slot_idx)
 {
     // After this is working, change this to a reinterpret cast
     // for performance considerations
-    TLBUnitRequest* tlb_req = dynamic_cast<TLBUnitRequest*>(reqMap[slot_idx]);
+    TLBUnitRequest* tlb_req = dynamic_cast<TLBUnitRequest*>(reqs[slot_idx]);
     assert(tlb_req != 0x0);
 
     DynInstPtr inst = tlb_req->inst;
@@ -200,8 +205,8 @@ TLBUnitEvent::TLBUnitEvent()
 void
 TLBUnitEvent::process()
 {
-    DynInstPtr inst = resource->reqMap[slotIdx]->inst;
-    int stage_num = resource->reqMap[slotIdx]->getStageNum();
+    DynInstPtr inst = resource->reqs[slotIdx]->inst;
+    int stage_num = resource->reqs[slotIdx]->getStageNum();
     ThreadID tid = inst->threadNumber;
 
     DPRINTF(InOrderTLB, "Waking up from TLB Miss caused by [sn:%i].\n",
@@ -212,31 +217,18 @@ TLBUnitEvent::process()
 
     tlb_res->tlbBlocked[tid] = false;
 
-    tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid);
-
-    // Effectively NOP the instruction but still allow it
-    // to commit
-    //while (!inst->resSched.empty() &&
-    //   inst->resSched.top()->stageNum != ThePipeline::NumStages - 1) {
-    //inst->resSched.pop();
-    //}
+    tlb_res->cpu->pipelineStage[stage_num]->
+        unsetResStall(tlb_res->reqs[slotIdx], tid);
 }
 
 void
 TLBUnit::squash(DynInstPtr inst, int stage_num,
                    InstSeqNum squash_seq_num, ThreadID tid)
 {
-     //@TODO: Figure out a way to consolidate common parts
-     //       of this squash code
-     std::vector<int> slot_remove_list;
-
-     map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-     map<int, ResReqPtr>::iterator map_end = reqMap.end();
-
-     while (map_it != map_end) {
-         ResReqPtr req_ptr = (*map_it).second;
+    for (int i = 0; i < width; i++) {
+        ResReqPtr req_ptr = reqs[i];
 
-         if (req_ptr &&
+         if (req_ptr->valid &&
              req_ptr->getInst()->readTid() == tid &&
              req_ptr->getInst()->seqNum > squash_seq_num) {
 
@@ -250,26 +242,16 @@ TLBUnit::squash(DynInstPtr inst, int stage_num,
 
              tlbBlocked[tid] = false;
 
-             int stall_stage = reqMap[req_slot_num]->getStageNum();
+             int stall_stage = reqs[req_slot_num]->getStageNum();
 
-             cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid);
+             cpu->pipelineStage[stall_stage]->
+                 unsetResStall(reqs[req_slot_num], tid);
 
              if (resourceEvent[req_slot_num].scheduled())
                  unscheduleEvent(req_slot_num);
 
-             // Mark request for later removal
-             cpu->reqRemoveList.push(req_ptr);
-
-             // Mark slot for removal from resource
-             slot_remove_list.push_back(req_ptr->getSlot());
+             freeSlot(req_slot_num);
          }
-
-         map_it++;
-     }
-
-     // Now Delete Slot Entry from Req. Map
-     for (int i = 0; i < slot_remove_list.size(); i++) {
-         freeSlot(slot_remove_list[i]);
      }
 }
 
diff --git a/src/cpu/inorder/resources/tlb_unit.hh b/src/cpu/inorder/resources/tlb_unit.hh
index eb1bf55f0..904ac3eba 100644
--- a/src/cpu/inorder/resources/tlb_unit.hh
+++ b/src/cpu/inorder/resources/tlb_unit.hh
@@ -99,9 +99,15 @@ class TLBUnitRequest : public ResourceRequest {
     typedef ThePipeline::DynInstPtr DynInstPtr;
 
   public:
-    TLBUnitRequest(TLBUnit *res, DynInstPtr inst, int stage_num, int res_idx, int slot_num,
-                   unsigned _cmd)
-        : ResourceRequest(res, inst, stage_num, res_idx, slot_num, _cmd)
+    TLBUnitRequest(TLBUnit *res)
+        : ResourceRequest(res), memReq(NULL)
+    {
+    }
+
+    RequestPtr memReq;
+
+    void setRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num,
+                    unsigned _cmd)
     {
         Addr aligned_addr;
         int req_size;
@@ -131,9 +137,10 @@ class TLBUnitRequest : public ResourceRequest {
                                            inst->readTid());
             memReq = inst->dataMemReq;
         }
+
+        ResourceRequest::setRequest(inst, stage_num, res_idx, slot_num, _cmd);
     }
 
-    RequestPtr memReq;
 };
 
 
diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc
index 743011573..19246a30b 100644
--- a/src/cpu/inorder/resources/use_def.cc
+++ b/src/cpu/inorder/resources/use_def.cc
@@ -88,33 +88,48 @@ UseDefUnit::regStats()
     Resource::regStats();
 }
 
+void
+UseDefUnit::init()
+{
+    // Set Up Resource Events to Appropriate Resource BandWidth
+    if (latency > 0) {
+        resourceEvent = new ResourceEvent[width];
+    } else {
+        resourceEvent = NULL;
+    }
+
+    for (int i = 0; i < width; i++) {
+        reqs[i] = new UseDefRequest(this);
+    }
+
+    initSlots();
+}
+
 ResReqPtr
 UseDefUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                      int slot_num, unsigned cmd)
 {
-    return new UseDefRequest(this, inst, stage_num, id, slot_num, cmd,
-                             inst->resSched.top()->idx);
+    UseDefRequest *ud_req = dynamic_cast<UseDefRequest*>(reqs[slot_num]);
+    ud_req->setRequest(inst, stage_num, id, slot_num, cmd,
+                       inst->curSkedEntry->idx);
+    return ud_req;
 }
 
 
 ResReqPtr
 UseDefUnit::findRequest(DynInstPtr inst)
 {
-    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-    map<int, ResReqPtr>::iterator map_end = reqMap.end();
-
-    while (map_it != map_end) {
-        UseDefRequest* ud_req = 
-            dynamic_cast<UseDefRequest*>((*map_it).second);
+    for (int i = 0; i < width; i++) {
+        UseDefRequest* ud_req =
+            dynamic_cast<UseDefRequest*>(reqs[i]);
         assert(ud_req);
 
-        if (ud_req &&
+        if (ud_req->valid &&
             ud_req->getInst() == inst &&
-            ud_req->cmd == inst->resSched.top()->cmd &&
-            ud_req->useDefIdx == inst->resSched.top()->idx) {
+            ud_req->cmd == inst->curSkedEntry->cmd &&
+            ud_req->useDefIdx == inst->curSkedEntry->idx) {
             return ud_req;
         }
-        map_it++;
     }
 
     return NULL;
@@ -125,7 +140,7 @@ UseDefUnit::execute(int slot_idx)
 {
     // After this is working, change this to a reinterpret cast
     // for performance considerations
-    UseDefRequest* ud_req = dynamic_cast<UseDefRequest*>(reqMap[slot_idx]);
+    UseDefRequest* ud_req = dynamic_cast<UseDefRequest*>(reqs[slot_idx]);
     assert(ud_req);
 
     DynInstPtr inst = ud_req->inst;
@@ -408,15 +423,10 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
     DPRINTF(InOrderUseDef, "[tid:%i]: Updating Due To Squash After [sn:%i].\n",
             tid, squash_seq_num);
 
-    std::vector<int> slot_remove_list;
-
-    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-    map<int, ResReqPtr>::iterator map_end = reqMap.end();
+    for (int i = 0; i < width; i++) {
+        ResReqPtr req_ptr = reqs[i];
 
-    while (map_it != map_end) {
-        ResReqPtr req_ptr = (*map_it).second;
-
-        if (req_ptr &&
+        if (req_ptr->valid &&
             req_ptr->getInst()->readTid() == tid &&
             req_ptr->getInst()->seqNum > squash_seq_num) {
 
@@ -431,20 +441,9 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
                 
                 unscheduleEvent(req_slot_num);
             }
-            
-            // Mark request for later removal
-            cpu->reqRemoveList.push(req_ptr);
 
-            // Mark slot for removal from resource
-            slot_remove_list.push_back(req_ptr->getSlot());
+            freeSlot(req_slot_num);
         }
-
-        map_it++;
-    }
-
-    // Now Delete Slot Entry from Req. Map
-    for (int i = 0; i < slot_remove_list.size(); i++) {
-        freeSlot(slot_remove_list[i]);
     }
 
     if (outReadSeqNum[tid] >= squash_seq_num) {
diff --git a/src/cpu/inorder/resources/use_def.hh b/src/cpu/inorder/resources/use_def.hh
index d2cc55315..21770cec6 100644
--- a/src/cpu/inorder/resources/use_def.hh
+++ b/src/cpu/inorder/resources/use_def.hh
@@ -56,6 +56,8 @@ class UseDefUnit : public Resource {
     UseDefUnit(std::string res_name, int res_id, int res_width,
                int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
 
+    void init();
+
     ResourceRequest* getRequest(DynInstPtr _inst, int stage_num,
                                         int res_idx, int slot_num,
                                         unsigned cmd);
@@ -96,14 +98,20 @@ class UseDefUnit : public Resource {
         typedef ThePipeline::DynInstPtr DynInstPtr;
 
       public:
-        UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, 
-                      int res_idx, int slot_num, unsigned cmd, 
-                      int use_def_idx)
-            : ResourceRequest(res, inst, stage_num, res_idx, slot_num, cmd),
-              useDefIdx(use_def_idx)
+        UseDefRequest(UseDefUnit *res)
+            : ResourceRequest(res)
         { }
 
         int useDefIdx;
+
+        void setRequest(DynInstPtr _inst, int stage_num, int res_idx,
+                        int slot_num, unsigned _cmd, int idx)
+        {
+            useDefIdx = idx;
+
+            ResourceRequest::setRequest(_inst, stage_num, res_idx, slot_num,
+                                        _cmd);
+        }
     };
 
   protected:
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 92691720b..647c48a76 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -137,6 +137,10 @@ class DefaultFetch
         {}
 
         void
+        markDelayed()
+        {}
+
+        void
         finish(Fault fault, RequestPtr req, ThreadContext *tc,
                BaseTLB::Mode mode)
         {
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index d0c83d586..d2cde496e 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -604,6 +604,9 @@ DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req)
     ThreadID tid = mem_req->threadId();
     Addr block_PC = mem_req->getVaddr();
 
+    // Wake up CPU if it was idle
+    cpu->wakeCPU();
+
     // If translation was successful, attempt to read the icache block.
     if (fault == NoFault) {
         // Build packet here.
@@ -654,6 +657,9 @@ DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req)
         instruction->fault = fault;
         wroteToTimeBuffer = true;
 
+        DPRINTF(Activity, "Activity this cycle.\n");
+        cpu->activityThisCycle();
+
         fetchStatus[tid] = TrapPending;
 
         DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid);
@@ -1064,6 +1070,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     Addr pcOffset = fetchOffset[tid];
     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
 
+    bool inRom = isRomMicroPC(thisPC.microPC());
+
     // If returning from the delay of a cache miss, then update the status
     // to running, otherwise do the cache access.  Possibly move this up
     // to tick() function.
@@ -1077,7 +1085,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         Addr block_PC = icacheBlockAlignPC(fetchAddr);
 
         // Unless buffer already got the block, fetch it from icache.
-        if (!cacheDataValid[tid] || block_PC != cacheDataPC[tid]) {
+        if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid]) && !inRom) {
             DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
                     "instruction, starting at PC %s.\n", tid, thisPC);
 
@@ -1149,7 +1157,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
            !predictedBranch) {
 
         // If we need to process more memory, do it now.
-        if (!curMacroop && !predecoder.extMachInstReady()) {
+        if (!(curMacroop || inRom) && !predecoder.extMachInstReady()) {
             if (ISA_HAS_DELAY_SLOT && pcOffset == 0) {
                 // Walk past any annulled delay slot instructions.
                 Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask;
@@ -1175,7 +1183,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         // Extract as many instructions and/or microops as we can from
         // the memory we've processed so far.
         do {
-            if (!curMacroop) {
+            if (!(curMacroop || inRom)) {
                 if (predecoder.extMachInstReady()) {
                     ExtMachInst extMachInst;
 
@@ -1196,8 +1204,13 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                     break;
                 }
             }
-            if (curMacroop) {
-                staticInst = curMacroop->fetchMicroop(thisPC.microPC());
+            if (curMacroop || inRom) {
+                if (inRom) {
+                    staticInst = cpu->microcodeRom.fetchMicroop(
+                            thisPC.microPC(), curMacroop);
+                } else {
+                    staticInst = curMacroop->fetchMicroop(thisPC.microPC());
+                }
                 if (staticInst->isLastMicroop()) {
                     curMacroop = NULL;
                     pcOffset = 0;
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 3f3761ff3..03f73c798 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1241,12 +1241,33 @@ DefaultIEW<Impl>::executeInsts()
                 // Loads will mark themselves as executed, and their writeback
                 // event adds the instruction to the queue to commit
                 fault = ldstQueue.executeLoad(inst);
+
+                if (inst->isTranslationDelayed() &&
+                    fault == NoFault) {
+                    // A hw page table walk is currently going on; the
+                    // instruction must be deferred.
+                    DPRINTF(IEW, "Execute: Delayed translation, deferring "
+                            "load.\n");
+                    instQueue.deferMemInst(inst);
+                    continue;
+                }
+
                 if (inst->isDataPrefetch() || inst->isInstPrefetch()) {
                     fault = NoFault;
                 }
             } else if (inst->isStore()) {
                 fault = ldstQueue.executeStore(inst);
 
+                if (inst->isTranslationDelayed() &&
+                    fault == NoFault) {
+                    // A hw page table walk is currently going on; the
+                    // instruction must be deferred.
+                    DPRINTF(IEW, "Execute: Delayed translation, deferring "
+                            "store.\n");
+                    instQueue.deferMemInst(inst);
+                    continue;
+                }
+
                 // If the store had a fault then it may not have a mem req
                 if (fault != NoFault || inst->readPredicate() == false ||
                         !inst->isStoreConditional()) {
diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh
index be936e204..64df35743 100644
--- a/src/cpu/o3/inst_queue.hh
+++ b/src/cpu/o3/inst_queue.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -180,6 +192,11 @@ class InstructionQueue
      */
     DynInstPtr getInstToExecute();
 
+    /** Returns a memory instruction that was referred due to a delayed DTB
+     *  translation if it is now ready to execute.
+     */
+    DynInstPtr getDeferredMemInstToExecute();
+
     /**
      * Records the instruction as the producer of a register without
      * adding it to the rest of the IQ.
@@ -223,6 +240,12 @@ class InstructionQueue
     /** Completes a memory operation. */
     void completeMemInst(DynInstPtr &completed_inst);
 
+    /**
+     * Defers a memory instruction when its DTB translation incurs a hw
+     * page table walk.
+     */
+    void deferMemInst(DynInstPtr &deferred_inst);
+
     /** Indicates an ordering violation between a store and a load. */
     void violation(DynInstPtr &store, DynInstPtr &faulting_load);
 
@@ -284,6 +307,11 @@ class InstructionQueue
     /** List of instructions that are ready to be executed. */
     std::list<DynInstPtr> instsToExecute;
 
+    /** List of instructions waiting for their DTB translation to
+     *  complete (hw page table walk in progress).
+     */
+    std::list<DynInstPtr> deferredMemInsts;
+
     /**
      * Struct for comparing entries to be added to the priority queue.
      * This gives reverse ordering to the instructions in terms of
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 91cb2f0c8..aa21a0edc 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -397,6 +409,7 @@ InstructionQueue<Impl>::resetState()
     }
     nonSpecInsts.clear();
     listOrder.clear();
+    deferredMemInsts.clear();
 }
 
 template <class Impl>
@@ -733,6 +746,15 @@ InstructionQueue<Impl>::scheduleReadyInsts()
 
     IssueStruct *i2e_info = issueToExecuteQueue->access(0);
 
+    DynInstPtr deferred_mem_inst;
+    int total_deferred_mem_issued = 0;
+    while (total_deferred_mem_issued < totalWidth &&
+           (deferred_mem_inst = getDeferredMemInstToExecute()) != 0) {
+        issueToExecuteQueue->access(0)->size++;
+        instsToExecute.push_back(deferred_mem_inst);
+        total_deferred_mem_issued++;
+    }
+
     // Have iterator to head of the list
     // While I haven't exceeded bandwidth or reached the end of the list,
     // Try to get a FU that can do what this op needs.
@@ -745,7 +767,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
     ListOrderIt order_end_it = listOrder.end();
     int total_issued = 0;
 
-    while (total_issued < totalWidth &&
+    while (total_issued < (totalWidth - total_deferred_mem_issued) &&
            iewStage->canIssue() &&
            order_it != order_end_it) {
         OpClass op_class = (*order_it).queueType;
@@ -858,7 +880,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
     iqInstsIssued+= total_issued;
 
     // If we issued any instructions, tell the CPU we had activity.
-    if (total_issued) {
+    if (total_issued || total_deferred_mem_issued) {
         cpu->activityThisCycle();
     } else {
         DPRINTF(IQ, "Not able to schedule any instructions.\n");
@@ -1021,6 +1043,11 @@ void
 InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
 {
     DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum);
+
+    // Reset DTB translation state
+    resched_inst->translationStarted = false;
+    resched_inst->translationCompleted = false;
+
     resched_inst->clearCanIssue();
     memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
 }
@@ -1051,6 +1078,28 @@ InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
 
 template <class Impl>
 void
+InstructionQueue<Impl>::deferMemInst(DynInstPtr &deferred_inst)
+{
+    deferredMemInsts.push_back(deferred_inst);
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+InstructionQueue<Impl>::getDeferredMemInstToExecute()
+{
+    for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
+         ++it) {
+        if ((*it)->translationCompleted) {
+            DynInstPtr ret = *it;
+            deferredMemInsts.erase(it);
+            return ret;
+        }
+    }
+    return NULL;
+}
+
+template <class Impl>
+void
 InstructionQueue<Impl>::violation(DynInstPtr &store,
                                   DynInstPtr &faulting_load)
 {
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index dd3604ffe..b5d337935 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -445,12 +445,16 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
     Fault load_fault = NoFault;
 
     DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n",
-            inst->pcState(),inst->seqNum);
+            inst->pcState(), inst->seqNum);
 
     assert(!inst->isSquashed());
 
     load_fault = inst->initiateAcc();
 
+    if (inst->isTranslationDelayed() &&
+        load_fault == NoFault)
+        return load_fault;
+
     // If the instruction faulted or predicated false, then we need to send it
     // along to commit without the instruction completing.
     if (load_fault != NoFault || inst->readPredicate() == false) {
@@ -532,6 +536,10 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
 
     Fault store_fault = store_inst->initiateAcc();
 
+    if (store_inst->isTranslationDelayed() &&
+        store_fault == NoFault)
+        return store_fault;
+
     if (store_inst->readPredicate() == false)
         store_inst->forwardOldRegs();
 
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 453699f84..ab1ff91e8 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -752,6 +752,7 @@ TimingSimpleCPU::sendFetch(Fault fault, RequestPtr req, ThreadContext *tc)
     } else {
         delete req;
         // fetch fault: advance directly to next instruction (fault handler)
+        _status = Running;
         advanceInst(fault);
     }
 
@@ -805,12 +806,11 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
     if (curStaticInst && curStaticInst->isMemRef()) {
         // load or store: just send to dcache
         Fault fault = curStaticInst->initiateAcc(this, traceData);
-        if (_status != Running) {
-            // instruction will complete in dcache response callback
-            assert(_status == DcacheWaitResponse ||
-                    _status == DcacheRetry || DTBWaitResponse);
-            assert(fault == NoFault);
-        } else {
+
+        // If we're not running now the instruction will complete in a dcache
+        // response callback or the instruction faulted and has started an
+        // ifetch
+        if (_status == Running) {
             if (fault != NoFault && traceData) {
                 // If there was a fault, we shouldn't trace this instruction.
                 delete traceData;
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 2b0c8942a..a7a3eb7c3 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -108,6 +108,13 @@ class TimingSimpleCPU : public BaseSimpleCPU
         {}
 
         void
+        markDelayed()
+        {
+            assert(cpu->_status == Running);
+            cpu->_status = ITBWaitResponse;
+        }
+
+        void
         finish(Fault fault, RequestPtr req, ThreadContext *tc,
                BaseTLB::Mode mode)
         {
diff --git a/src/cpu/translation.hh b/src/cpu/translation.hh
index 7db7c381a..60953540f 100644
--- a/src/cpu/translation.hh
+++ b/src/cpu/translation.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2002-2005 The Regents of The University of Michigan
  * Copyright (c) 2009 The University of Edinburgh
  * All rights reserved.
@@ -53,6 +65,7 @@ class WholeTranslationState
     Fault faults[2];
 
   public:
+    bool delay;
     bool isSplit;
     RequestPtr mainReq;
     RequestPtr sreqLow;
@@ -67,8 +80,8 @@ class WholeTranslationState
      */
     WholeTranslationState(RequestPtr _req, uint8_t *_data, uint64_t *_res,
                           BaseTLB::Mode _mode)
-        : outstanding(1), isSplit(false), mainReq(_req), sreqLow(NULL),
-          sreqHigh(NULL), data(_data), res(_res), mode(_mode)
+        : outstanding(1), delay(false), isSplit(false), mainReq(_req),
+          sreqLow(NULL), sreqHigh(NULL), data(_data), res(_res), mode(_mode)
     {
         faults[0] = faults[1] = NoFault;
         assert(mode == BaseTLB::Read || mode == BaseTLB::Write);
@@ -82,8 +95,9 @@ class WholeTranslationState
     WholeTranslationState(RequestPtr _req, RequestPtr _sreqLow,
                           RequestPtr _sreqHigh, uint8_t *_data, uint64_t *_res,
                           BaseTLB::Mode _mode)
-        : outstanding(2), isSplit(true), mainReq(_req), sreqLow(_sreqLow),
-          sreqHigh(_sreqHigh), data(_data), res(_res), mode(_mode)
+        : outstanding(2), delay(false), isSplit(true), mainReq(_req),
+          sreqLow(_sreqLow), sreqHigh(_sreqHigh), data(_data), res(_res),
+          mode(_mode)
     {
         faults[0] = faults[1] = NoFault;
         assert(mode == BaseTLB::Read || mode == BaseTLB::Write);
@@ -221,6 +235,16 @@ class DataTranslation : public BaseTLB::Translation
     }
 
     /**
+     * Signal the translation state that the translation has been delayed due
+     * to a hw page table walk.  Split requests are transparently handled.
+     */
+    void
+    markDelayed()
+    {
+        state->delay = true;
+    }
+
+    /**
      * Finish this part of the translation and indicate that the whole
      * translation is complete if the state says so.
      */
diff --git a/src/dev/SConscript b/src/dev/SConscript
index 7cdea7961..5243da683 100644
--- a/src/dev/SConscript
+++ b/src/dev/SConscript
@@ -69,6 +69,7 @@ if env['FULL_SYSTEM']:
     Source('pcidev.cc')
     Source('pktfifo.cc')
     Source('platform.cc')
+    Source('ps2.cc')
     Source('simple_disk.cc')
     Source('sinic.cc')
     Source('terminal.cc')
diff --git a/src/dev/arm/RealView.py b/src/dev/arm/RealView.py
index cdc06e4ef..ef3f68a88 100644
--- a/src/dev/arm/RealView.py
+++ b/src/dev/arm/RealView.py
@@ -52,6 +52,14 @@ class AmbaDevice(BasicPioDevice):
     abstract = True
     amba_id = Param.UInt32("ID of AMBA device for kernel detection")
 
+class AmbaIntDevice(AmbaDevice):
+    type = 'AmbaIntDevice'
+    abstract = True
+    gic = Param.Gic(Parent.any, "Gic to use for interrupting")
+    int_num = Param.UInt32("Interrupt number that connects to GIC")
+    int_delay = Param.Latency("100ns",
+            "Time between action and interrupt generation by device")
+
 class AmbaDmaDevice(DmaDevice):
     type = 'AmbaDmaDevice'
     abstract = True
@@ -94,16 +102,17 @@ class Sp804(AmbaDevice):
     clock1 = Param.Clock('1MHz', "Clock speed of the input")
     amba_id = 0x00141804
 
-class Pl050(AmbaDevice):
+class Pl050(AmbaIntDevice):
     type = 'Pl050'
-    gic = Param.Gic(Parent.any, "Gic to use for interrupting")
-    int_num = Param.UInt32("Interrupt number that connects to GIC")
-    int_delay = Param.Latency("100ns", "Time between action and interrupt generation by UART")
+    vnc = Param.VncServer(Parent.any, "Vnc server for remote frame buffer display")
+    is_mouse = Param.Bool(False, "Is this interface a mouse, if not a keyboard")
+    int_delay = '1us'
     amba_id = 0x00141050
 
 class Pl111(AmbaDmaDevice):
     type = 'Pl111'
     clock = Param.Clock('24MHz', "Clock speed of the input")
+    vnc   = Param.VncServer(Parent.any, "Vnc server for remote frame buffer display")
     amba_id = 0x00141111
 
 class RealView(Platform):
@@ -121,7 +130,7 @@ class RealViewPBX(RealView):
     timer1 = Sp804(int_num0=37, int_num1=37, pio_addr=0x10012000)
     clcd = Pl111(pio_addr=0x10020000, int_num=55)
     kmi0   = Pl050(pio_addr=0x10006000, int_num=52)
-    kmi1   = Pl050(pio_addr=0x10007000, int_num=53)
+    kmi1   = Pl050(pio_addr=0x10007000, int_num=53, is_mouse=True)
 
     l2x0_fake     = IsaFake(pio_addr=0x1f002000, pio_size=0xfff)
     flash_fake    = IsaFake(pio_addr=0x40000000, pio_size=0x4000000)
@@ -140,7 +149,7 @@ class RealViewPBX(RealView):
     aaci_fake     = AmbaFake(pio_addr=0x10004000)
     mmc_fake      = AmbaFake(pio_addr=0x10005000)
     rtc_fake      = AmbaFake(pio_addr=0x10017000, amba_id=0x41031)
-
+    cf0_fake      = IsaFake(pio_addr=0x18000000, pio_size=0xfff)
 
 
     # Attach I/O devices that are on chip
@@ -175,6 +184,7 @@ class RealViewPBX(RealView):
        self.mmc_fake.pio      = bus.port
        self.rtc_fake.pio      = bus.port
        self.flash_fake.pio    = bus.port
+       self.cf0_fake.pio      = bus.port
 
 # Reference for memory map and interrupt number
 # RealView Emulation Baseboard User Guide (ARM DUI 0143B)
@@ -187,7 +197,7 @@ class RealViewEB(RealView):
     timer1 = Sp804(int_num0=37, int_num1=37, pio_addr=0x10012000)
     clcd   = Pl111(pio_addr=0x10020000, int_num=23)
     kmi0   = Pl050(pio_addr=0x10006000, int_num=20)
-    kmi1   = Pl050(pio_addr=0x10007000, int_num=21)
+    kmi1   = Pl050(pio_addr=0x10007000, int_num=21, is_mouse=True)
 
     l2x0_fake     = IsaFake(pio_addr=0x1f002000, pio_size=0xfff, warn_access="1")
     dmac_fake     = AmbaFake(pio_addr=0x10030000)
diff --git a/src/dev/arm/amba_device.cc b/src/dev/arm/amba_device.cc
index e5d53d6a3..37eb77ae1 100644
--- a/src/dev/arm/amba_device.cc
+++ b/src/dev/arm/amba_device.cc
@@ -47,11 +47,19 @@
 #include "mem/packet_access.hh"
 
 const uint64_t AmbaVendor = ULL(0xb105f00d00000000);
+
 AmbaDevice::AmbaDevice(const Params *p)
     : BasicPioDevice(p), ambaId(AmbaVendor | p->amba_id)
 {
 }
 
+AmbaIntDevice::AmbaIntDevice(const Params *p)
+    : AmbaDevice(p), intNum(p->int_num), gic(p->gic), intDelay(p->int_delay)
+{
+}
+
+
+
 AmbaDmaDevice::AmbaDmaDevice(const Params *p)
     : DmaDevice(p), ambaId(AmbaVendor | p->amba_id),
       pioAddr(p->pio_addr), pioSize(0),
diff --git a/src/dev/arm/amba_device.hh b/src/dev/arm/amba_device.hh
index 1782fb003..297a78f82 100644
--- a/src/dev/arm/amba_device.hh
+++ b/src/dev/arm/amba_device.hh
@@ -55,6 +55,7 @@
 #include "mem/packet.hh"
 #include "mem/packet_access.hh"
 #include "params/AmbaDevice.hh"
+#include "params/AmbaIntDevice.hh"
 #include "params/AmbaDmaDevice.hh"
 
 namespace AmbaDev {
@@ -81,6 +82,18 @@ class AmbaDevice : public BasicPioDevice
     AmbaDevice(const Params *p);
 };
 
+class AmbaIntDevice : public AmbaDevice
+{
+  protected:
+    int intNum;
+    Gic *gic;
+    Tick intDelay;
+
+  public:
+    typedef AmbaIntDeviceParams Params;
+    AmbaIntDevice(const Params *p);
+};
+
 class AmbaDmaDevice : public DmaDevice
 {
   protected:
diff --git a/src/dev/arm/kmi.cc b/src/dev/arm/kmi.cc
index 6cd61fd09..adf1439b3 100644
--- a/src/dev/arm/kmi.cc
+++ b/src/dev/arm/kmi.cc
@@ -37,21 +37,31 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: William Wang
+ * Authors: Ali Saidi
+ *          William Wang
  */
 
 #include "base/trace.hh"
+#include "base/vnc/vncserver.hh"
 #include "dev/arm/amba_device.hh"
 #include "dev/arm/kmi.hh"
+#include "dev/ps2.hh"
 #include "mem/packet.hh"
 #include "mem/packet_access.hh"
 
 Pl050::Pl050(const Params *p)
-    : AmbaDevice(p), control(0x00), status(0x43), kmidata(0x00), clkdiv(0x00),
-      intreg(0x00), intNum(p->int_num), gic(p->gic), intDelay(p->int_delay),
-      intEvent(this)
+    : AmbaIntDevice(p), control(0), status(0x43), clkdiv(0), interrupts(0),
+      rawInterrupts(0), ackNext(false), shiftDown(false), vnc(p->vnc),
+      driverInitialized(false), intEvent(this)
 {
     pioSize = 0xfff;
+
+    if (vnc) {
+        if (!p->is_mouse)
+            vnc->setKeyboard(this);
+        else
+            vnc->setMouse(this);
+    }
 }
 
 Tick
@@ -62,28 +72,39 @@ Pl050::read(PacketPtr pkt)
     Addr daddr = pkt->getAddr() - pioAddr;
     pkt->allocate();
 
-    DPRINTF(Pl050, " read register %#x size=%d\n", daddr, pkt->getSize());
 
-    // use a temporary data since the KMI registers are read/written with
-    // different size operations
-    //
     uint32_t data = 0;
 
     switch (daddr) {
       case kmiCr:
+        DPRINTF(Pl050, "Read Commmand: %#x\n", (uint32_t)control);
         data = control;
         break;
       case kmiStat:
+        if (rxQueue.empty())
+            status.rxfull = 0;
+        else
+            status.rxfull = 1;
+
+        DPRINTF(Pl050, "Read Status: %#x\n", (uint32_t)status);
         data = status;
         break;
       case kmiData:
-        data = kmidata;
+        if (rxQueue.empty()) {
+            data = 0;
+        } else {
+            data = rxQueue.front();
+            rxQueue.pop_front();
+        }
+        DPRINTF(Pl050, "Read Data: %#x\n", (uint32_t)data);
+        updateIntStatus();
         break;
       case kmiClkDiv:
         data = clkdiv;
         break;
       case kmiISR:
-        data = intreg;
+        data = interrupts;
+        DPRINTF(Pl050, "Read Interrupts: %#x\n", (uint32_t)interrupts);
         break;
       default:
         if (AmbaDev::readId(pkt, ambaId, pioAddr)) {
@@ -123,65 +144,224 @@ Pl050::write(PacketPtr pkt)
 
     Addr daddr = pkt->getAddr() - pioAddr;
 
-    DPRINTF(Pl050, " write register %#x value %#x size=%d\n", daddr,
-            pkt->get<uint8_t>(), pkt->getSize());
+    assert(pkt->getSize() == sizeof(uint8_t));
 
-    // use a temporary data since the KMI registers are read/written with
-    // different size operations
-    //
-    uint32_t data = 0;
 
-    switch (pkt->getSize()) {
-      case 1:
-        data = pkt->get<uint8_t>();
+    switch (daddr) {
+      case kmiCr:
+        DPRINTF(Pl050, "Write Commmand: %#x\n", (uint32_t)pkt->get<uint8_t>());
+        control = pkt->get<uint8_t>();
+        updateIntStatus();
         break;
-      case 2:
-        data = pkt->get<uint16_t>();
+      case kmiData:
+        DPRINTF(Pl050, "Write Data: %#x\n", (uint32_t)pkt->get<uint8_t>());
+        processCommand(pkt->get<uint8_t>());
+        updateIntStatus();
         break;
-      case 4:
-        data = pkt->get<uint32_t>();
+      case kmiClkDiv:
+        clkdiv = pkt->get<uint8_t>();
         break;
       default:
-        panic("KMI write size too big?\n");
+        warn("Tried to write PL050 at offset %#x that doesn't exist\n", daddr);
         break;
     }
+    pkt->makeAtomicResponse();
+    return pioDelay;
+}
 
+void
+Pl050::processCommand(uint8_t byte)
+{
+    using namespace Ps2;
 
-    switch (daddr) {
-      case kmiCr:
-        control = data;
+    if (ackNext) {
+        ackNext--;
+        rxQueue.push_back(Ack);
+        updateIntStatus();
+        return;
+    }
+
+    switch (byte) {
+      case Ps2Reset:
+        rxQueue.push_back(Ack);
+        rxQueue.push_back(SelfTestPass);
         break;
-      case kmiStat:
-        panic("Tried to write PL050 register(read only) at offset %#x\n",
-              daddr);
+      case SetResolution:
+      case SetRate:
+      case SetStatusLed:
+      case SetScaling1_1:
+      case SetScaling1_2:
+        rxQueue.push_back(Ack);
+        ackNext = 1;
         break;
-      case kmiData:
-        kmidata = data;
+      case ReadId:
+        rxQueue.push_back(Ack);
+        if (params()->is_mouse)
+            rxQueue.push_back(MouseId);
+        else
+            rxQueue.push_back(KeyboardId);
         break;
-      case kmiClkDiv:
-        clkdiv = data;
+      case TpReadId:
+        if (!params()->is_mouse)
+            break;
+        // We're not a trackpoint device, this should make the probe go away
+        rxQueue.push_back(Ack);
+        rxQueue.push_back(0);
+        rxQueue.push_back(0);
+        // fall through
+      case Disable:
+      case Enable:
+        rxQueue.push_back(Ack);
         break;
-      case kmiISR:
-        panic("Tried to write PL050 register(read only) at offset %#x\n",
-              daddr);
+      case StatusRequest:
+        rxQueue.push_back(Ack);
+        rxQueue.push_back(0);
+        rxQueue.push_back(2); // default resolution
+        rxQueue.push_back(100); // default sample rate
         break;
-      default:
-        warn("Tried to write PL050 at offset %#x that doesn't exist\n", daddr);
+      case TouchKitId:
+        ackNext = 2;
+        rxQueue.push_back(Ack);
+        rxQueue.push_back(TouchKitId);
+        rxQueue.push_back(1);
+        rxQueue.push_back('A');
+
+        driverInitialized = true;
         break;
+      default:
+        panic("Unknown byte received: %d\n", byte);
     }
-    pkt->makeAtomicResponse();
-    return pioDelay;
+
+    updateIntStatus();
+}
+
+
+void
+Pl050::updateIntStatus()
+{
+    if (!rxQueue.empty())
+        rawInterrupts.rx = 1;
+    else
+        rawInterrupts.rx = 0;
+
+    interrupts.tx = rawInterrupts.tx & control.txint_enable;
+    interrupts.rx = rawInterrupts.rx & control.rxint_enable;
+
+    DPRINTF(Pl050, "rawInterupts=%#x control=%#x interrupts=%#x\n",
+            (uint32_t)rawInterrupts, (uint32_t)control, (uint32_t)interrupts);
+
+    if (interrupts && !intEvent.scheduled())
+        schedule(intEvent, curTick() + intDelay);
 }
 
 void
 Pl050::generateInterrupt()
 {
-    if (intreg.rxintr || intreg.txintr) {
+
+    if (interrupts) {
         gic->sendInt(intNum);
-        DPRINTF(Pl050, " -- Generated\n");
+        DPRINTF(Pl050, "Generated interrupt\n");
     }
 }
 
+void
+Pl050::mouseAt(uint16_t x, uint16_t y, uint8_t buttons)
+{
+    using namespace Ps2;
+
+    // If the driver hasn't initialized the device yet, no need to try and send
+    // it anything. Similarly we can get vnc mouse events orders of maginture
+    // faster than m5 can process them. Only queue up two sets mouse movements
+    // and don't add more until those are processed.
+    if (!driverInitialized || rxQueue.size() > 10)
+        return;
+
+    // We shouldn't be here unless a vnc server called us in which case
+    // we should have a pointer to it
+    assert(vnc);
+
+    // Convert screen coordinates to touchpad coordinates
+    uint16_t _x = (2047.0/vnc->videoWidth()) * x;
+    uint16_t _y = (2047.0/vnc->videoHeight()) * y;
+
+    rxQueue.push_back(buttons);
+    rxQueue.push_back(_x >> 7);
+    rxQueue.push_back(_x & 0x7f);
+    rxQueue.push_back(_y >> 7);
+    rxQueue.push_back(_y & 0x7f);
+
+    updateIntStatus();
+}
+
+
+void
+Pl050::keyPress(uint32_t key, bool down)
+{
+    using namespace Ps2;
+
+    std::list<uint8_t> keys;
+
+    // convert the X11 keysym into ps2 codes
+    keySymToPs2(key, down, shiftDown, keys);
+
+    // Insert into our queue of charecters
+    rxQueue.splice(rxQueue.end(), keys);
+    updateIntStatus();
+}
+
+void
+Pl050::serialize(std::ostream &os)
+{
+    uint8_t ctrlreg = control;
+    SERIALIZE_SCALAR(ctrlreg);
+
+    uint8_t stsreg = status;
+    SERIALIZE_SCALAR(stsreg);
+    SERIALIZE_SCALAR(clkdiv);
+
+    uint8_t ints = interrupts;
+    SERIALIZE_SCALAR(ints);
+
+    uint8_t raw_ints = rawInterrupts;
+    SERIALIZE_SCALAR(raw_ints);
+
+    SERIALIZE_SCALAR(ackNext);
+    SERIALIZE_SCALAR(shiftDown);
+    SERIALIZE_SCALAR(driverInitialized);
+
+    arrayParamOut(os, "rxQueue", rxQueue);
+}
+
+void
+Pl050::unserialize(Checkpoint *cp, const std::string &section)
+{
+    uint8_t ctrlreg;
+    UNSERIALIZE_SCALAR(ctrlreg);
+    control = ctrlreg;
+
+    uint8_t stsreg;
+    UNSERIALIZE_SCALAR(stsreg);
+    status = stsreg;
+
+    UNSERIALIZE_SCALAR(clkdiv);
+
+    uint8_t ints;
+    UNSERIALIZE_SCALAR(ints);
+    interrupts = ints;
+
+    uint8_t raw_ints;
+    UNSERIALIZE_SCALAR(raw_ints);
+    rawInterrupts = raw_ints;
+
+    UNSERIALIZE_SCALAR(ackNext);
+    UNSERIALIZE_SCALAR(shiftDown);
+    UNSERIALIZE_SCALAR(driverInitialized);
+
+    arrayParamIn(cp, section, "rxQueue", rxQueue);
+}
+
+
+
 Pl050 *
 Pl050Params::create()
 {
diff --git a/src/dev/arm/kmi.hh b/src/dev/arm/kmi.hh
index c96dd55a9..1e25f8974 100644
--- a/src/dev/arm/kmi.hh
+++ b/src/dev/arm/kmi.hh
@@ -48,13 +48,16 @@
 #ifndef __DEV_ARM_PL050_HH__
 #define __DEV_ARM_PL050_HH__
 
+#include <list>
+
 #include "base/range.hh"
-#include "dev/io_device.hh"
+#include "base/vnc/vncserver.hh"
+#include "dev/arm/amba_device.hh"
 #include "params/Pl050.hh"
 
 class Gic;
 
-class Pl050 : public AmbaDevice
+class Pl050 : public AmbaIntDevice, public VncKeyboard, public VncMouse
 {
   protected:
     static const int kmiCr       = 0x000;
@@ -63,34 +66,68 @@ class Pl050 : public AmbaDevice
     static const int kmiClkDiv   = 0x00C;
     static const int kmiISR      = 0x010;
 
-    // control register
-    uint8_t control;
+    BitUnion8(ControlReg)
+        Bitfield<0> force_clock_low;
+        Bitfield<1> force_data_low;
+        Bitfield<2> enable;
+        Bitfield<3> txint_enable;
+        Bitfield<4> rxint_enable;
+        Bitfield<5> type;
+    EndBitUnion(ControlReg)
 
-    // status register
-    uint8_t status;
+    /** control register
+     */
+    ControlReg control;
+
+    /** KMI status register */
+    BitUnion8(StatusReg)
+        Bitfield<0> data_in;
+        Bitfield<1> clk_in;
+        Bitfield<2> rxparity;
+        Bitfield<3> rxbusy;
+        Bitfield<4> rxfull;
+        Bitfield<5> txbusy;
+        Bitfield<6> txempty;
+    EndBitUnion(StatusReg)
+
+    StatusReg status;
+
+    /** clock divisor register
+     * This register is just kept around to satisfy reads after driver does
+     * writes. The divsor does nothing, as we're not actually signaling ps2
+     * serial commands to anything.
+     */
+    uint8_t clkdiv;
 
-    // received data (read) or data to be transmitted (write)
-    uint8_t kmidata;
+    BitUnion8(InterruptReg)
+        Bitfield<0> rx;
+        Bitfield<1> tx;
+    EndBitUnion(InterruptReg)
 
-    // clock divisor register
-    uint8_t clkdiv;
+    /** interrupt status register. */
+    InterruptReg interrupts;
+
+    /** raw interrupt register (unmasked) */
+    InterruptReg rawInterrupts;
 
-    BitUnion8(IntReg)
-    Bitfield<0> txintr;
-    Bitfield<1> rxintr;
-    EndBitUnion(IntReg)
+    /** If the controller should ignore the next data byte and acknowledge it.
+     * The driver is attempting to setup some feature we don't care about
+     */
+    int ackNext;
 
-    /** interrupt mask register. */
-    IntReg intreg;
+    /** is the shift key currently down */
+    bool shiftDown;
 
-    /** Interrupt number to generate */
-    int intNum;
+    /** The vnc server we're connected to (if any) */
+    VncServer *vnc;
 
-    /** Gic to use for interrupting */
-    Gic *gic;
+    /** If the linux driver has initialized the device yet and thus can we send
+     * mouse data */
+    bool driverInitialized;
 
-    /** Delay before interrupting */
-    Tick intDelay;
+    /** Update the status of the interrupt registers and schedule an interrupt
+     * if required */
+    void updateIntStatus();
 
     /** Function to generate interrupt */
     void generateInterrupt();
@@ -98,6 +135,15 @@ class Pl050 : public AmbaDevice
     /** Wrapper to create an event out of the thing */
     EventWrapper<Pl050, &Pl050::generateInterrupt> intEvent;
 
+    /** Receive queue. This list contains all the pending commands that
+     * need to be sent to the driver
+     */
+    std::list<uint8_t> rxQueue;
+
+    /** Handle a command sent to the kmi and respond appropriately
+     */
+    void processCommand(uint8_t byte);
+
   public:
     typedef Pl050Params Params;
     const Params *
@@ -111,12 +157,11 @@ class Pl050 : public AmbaDevice
     virtual Tick read(PacketPtr pkt);
     virtual Tick write(PacketPtr pkt);
 
-    /**
-     * Return if we have an interrupt pending
-     * @return interrupt status
-     * @todo fix me when implementation improves
-     */
-    virtual bool intStatus() { return false; }
+    virtual void mouseAt(uint16_t x, uint16_t y, uint8_t buttons);
+    virtual void keyPress(uint32_t key, bool down);
+
+    virtual void serialize(std::ostream &os);
+    virtual void unserialize(Checkpoint *cp, const std::string &section);
 };
 
-#endif
+#endif // __DEV_ARM_PL050_HH__
diff --git a/src/dev/arm/pl111.cc b/src/dev/arm/pl111.cc
index e597bf272..e884d9b58 100644
--- a/src/dev/arm/pl111.cc
+++ b/src/dev/arm/pl111.cc
@@ -35,9 +35,13 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: William Wang
+ *          Ali Saidi
  */
 
+#include "base/bitmap.hh"
+#include "base/output.hh"
 #include "base/trace.hh"
+#include "base/vnc/vncserver.hh"
 #include "dev/arm/amba_device.hh"
 #include "dev/arm/gic.hh"
 #include "dev/arm/pl111.hh"
@@ -50,20 +54,27 @@ using namespace AmbaDev;
 Pl111::Pl111(const Params *p)
     : AmbaDmaDevice(p), lcdTiming0(0), lcdTiming1(0), lcdTiming2(0),
       lcdTiming3(0), lcdUpbase(0), lcdLpbase(0), lcdControl(0), lcdImsc(0),
-      lcdRis(0), lcdMis(0), lcdIcr(0), lcdUpcurr(0), lcdLpcurr(0),
+      lcdRis(0), lcdMis(0),
       clcdCrsrCtrl(0), clcdCrsrConfig(0), clcdCrsrPalette0(0),
       clcdCrsrPalette1(0), clcdCrsrXY(0), clcdCrsrClip(0), clcdCrsrImsc(0),
       clcdCrsrIcr(0), clcdCrsrRis(0), clcdCrsrMis(0), clock(p->clock),
-      height(0), width(0), startTime(0), startAddr(0), maxAddr(0), curAddr(0),
+      vncserver(p->vnc), bmp(NULL), width(LcdMaxWidth), height(LcdMaxHeight),
+      bytesPerPixel(4), startTime(0), startAddr(0), maxAddr(0), curAddr(0),
       waterMark(0), dmaPendingNum(0), readEvent(this), fillFifoEvent(this),
       dmaDoneEvent(maxOutstandingDma, this), intEvent(this)
 {
     pioSize = 0xFFFF;
 
+    pic = simout.create("framebuffer.bmp", true);
+
+    dmaBuffer = new uint8_t[LcdMaxWidth * LcdMaxHeight * sizeof(uint32_t)];
+
     memset(lcdPalette, 0, sizeof(lcdPalette));
     memset(cursorImage, 0, sizeof(cursorImage));
     memset(dmaBuffer, 0, sizeof(dmaBuffer));
-    memset(frameBuffer, 0, sizeof(frameBuffer));
+
+    if (vncserver)
+        vncserver->setFramebufferAddr(dmaBuffer);
 }
 
 // read registers and frame buffer
@@ -75,111 +86,105 @@ Pl111::read(PacketPtr pkt)
 
     uint32_t data = 0;
 
-    if ((pkt->getAddr()& 0xffff0000) == pioAddr) {
+    assert(pkt->getAddr() >= pioAddr &&
+           pkt->getAddr() < pioAddr + pioSize);
 
-        assert(pkt->getAddr() >= pioAddr &&
-               pkt->getAddr() < pioAddr + pioSize);
+    Addr daddr = pkt->getAddr() - pioAddr;
+    pkt->allocate();
 
-        Addr daddr = pkt->getAddr()&0xFFFF;
-        pkt->allocate();
+    DPRINTF(PL111, " read register %#x size=%d\n", daddr, pkt->getSize());
 
-        DPRINTF(PL111, " read register %#x size=%d\n", daddr, pkt->getSize());
-
-        switch (daddr) {
-          case LcdTiming0:
-            data = lcdTiming0;
-            break;
-          case LcdTiming1:
-            data = lcdTiming1;
-            break;
-          case LcdTiming2:
-            data = lcdTiming2;
-            break;
-          case LcdTiming3:
-            data = lcdTiming3;
-            break;
-          case LcdUpBase:
-            data = lcdUpbase;
-            break;
-          case LcdLpBase:
-            data = lcdLpbase;
-            break;
-          case LcdControl:
-            data = lcdControl;
-            break;
-          case LcdImsc:
-            warn("LCD interrupt set/clear function not supported\n");
-            data = lcdImsc;
-            break;
-          case LcdRis:
-            warn("LCD Raw interrupt status function not supported\n");
-            data = lcdRis;
-            break;
-          case LcdMis:
-            warn("LCD Masked interrupt status function not supported\n");
-            data = lcdMis;
-            break;
-          case LcdIcr:
-            panic("LCD register at offset %#x is Write-Only\n", daddr);
-            break;
-          case LcdUpCurr:
-            data = lcdUpcurr;
-            break;
-          case LcdLpCurr:
-            data = lcdLpcurr;
-            break;
-          case ClcdCrsrCtrl:
-            data = clcdCrsrCtrl;
-            break;
-          case ClcdCrsrConfig:
-            data = clcdCrsrConfig;
-            break;
-          case ClcdCrsrPalette0:
-            data = clcdCrsrPalette0;
-            break;
-          case ClcdCrsrPalette1:
-            data = clcdCrsrPalette1;
-            break;
-          case ClcdCrsrXY:
-            data = clcdCrsrXY;
-            break;
-          case ClcdCrsrClip:
-            data = clcdCrsrClip;
-            break;
-          case ClcdCrsrImsc:
-            data = clcdCrsrImsc;
-            break;
-          case ClcdCrsrIcr:
-            panic("CLCD register at offset %#x is Write-Only\n", daddr);
-            break;
-          case ClcdCrsrRis:
-            data = clcdCrsrRis;
-            break;
-          case ClcdCrsrMis:
-            data = clcdCrsrMis;
-            break;
-          default:
-            if (AmbaDev::readId(pkt, AMBA_ID, pioAddr)) {
-                // Hack for variable size accesses
-                data = pkt->get<uint32_t>();
-                break;
-            } else if (daddr >= CrsrImage && daddr <= 0xBFC) {
-                // CURSOR IMAGE
-                int index;
-                index = (daddr - CrsrImage) >> 2;
-                data= cursorImage[index];
-                break;
-            } else if (daddr >= LcdPalette && daddr <= 0x3FC) {
-                // LCD Palette
-                int index;
-                index = (daddr - LcdPalette) >> 2;
-                data = lcdPalette[index];
-                break;
-            } else {
-                panic("Tried to read CLCD register at offset %#x that \
+    switch (daddr) {
+      case LcdTiming0:
+        data = lcdTiming0;
+        break;
+      case LcdTiming1:
+        data = lcdTiming1;
+        break;
+      case LcdTiming2:
+        data = lcdTiming2;
+        break;
+      case LcdTiming3:
+        data = lcdTiming3;
+        break;
+      case LcdUpBase:
+        data = lcdUpbase;
+        break;
+      case LcdLpBase:
+        data = lcdLpbase;
+        break;
+      case LcdControl:
+        data = lcdControl;
+        break;
+      case LcdImsc:
+        data = lcdImsc;
+        break;
+      case LcdRis:
+        data = lcdRis;
+        break;
+      case LcdMis:
+        data = lcdMis;
+        break;
+      case LcdIcr:
+        panic("LCD register at offset %#x is Write-Only\n", daddr);
+        break;
+      case LcdUpCurr:
+        data = curAddr;
+        break;
+      case LcdLpCurr:
+        data = curAddr;
+        break;
+      case ClcdCrsrCtrl:
+        data = clcdCrsrCtrl;
+        break;
+      case ClcdCrsrConfig:
+        data = clcdCrsrConfig;
+        break;
+      case ClcdCrsrPalette0:
+        data = clcdCrsrPalette0;
+        break;
+      case ClcdCrsrPalette1:
+        data = clcdCrsrPalette1;
+        break;
+      case ClcdCrsrXY:
+        data = clcdCrsrXY;
+        break;
+      case ClcdCrsrClip:
+        data = clcdCrsrClip;
+        break;
+      case ClcdCrsrImsc:
+        data = clcdCrsrImsc;
+        break;
+      case ClcdCrsrIcr:
+        panic("CLCD register at offset %#x is Write-Only\n", daddr);
+        break;
+      case ClcdCrsrRis:
+        data = clcdCrsrRis;
+        break;
+      case ClcdCrsrMis:
+        data = clcdCrsrMis;
+        break;
+      default:
+        if (AmbaDev::readId(pkt, AMBA_ID, pioAddr)) {
+            // Hack for variable size accesses
+            data = pkt->get<uint32_t>();
+            break;
+        } else if (daddr >= CrsrImage && daddr <= 0xBFC) {
+            // CURSOR IMAGE
+            int index;
+            index = (daddr - CrsrImage) >> 2;
+            data= cursorImage[index];
+            break;
+        } else if (daddr >= LcdPalette && daddr <= 0x3FC) {
+            // LCD Palette
+            int index;
+            index = (daddr - LcdPalette) >> 2;
+            data = lcdPalette[index];
+            break;
+        } else {
+            panic("Tried to read CLCD register at offset %#x that \
                        doesn't exist\n", daddr);
-                break;
-            }
+            break;
         }
     }
 
@@ -226,119 +231,133 @@ Pl111::write(PacketPtr pkt)
         break;
     }
 
-    if ((pkt->getAddr()& 0xffff0000) == pioAddr) {
+    assert(pkt->getAddr() >= pioAddr &&
+           pkt->getAddr() < pioAddr + pioSize);
 
-        assert(pkt->getAddr() >= pioAddr &&
-               pkt->getAddr() < pioAddr + pioSize);
+    Addr daddr = pkt->getAddr() - pioAddr;
 
-        Addr daddr = pkt->getAddr() - pioAddr;
+    DPRINTF(PL111, " write register %#x value %#x size=%d\n", daddr,
+            pkt->get<uint8_t>(), pkt->getSize());
 
-        DPRINTF(PL111, " write register %#x value %#x size=%d\n", daddr,
-                pkt->get<uint8_t>(), pkt->getSize());
+    switch (daddr) {
+      case LcdTiming0:
+        lcdTiming0 = data;
+        // width = 16 * (PPL+1)
+        width = (lcdTiming0.ppl + 1) << 4;
+        break;
+      case LcdTiming1:
+        lcdTiming1 = data;
+        // height = LPP + 1
+        height = (lcdTiming1.lpp) + 1;
+        break;
+      case LcdTiming2:
+        lcdTiming2 = data;
+        break;
+      case LcdTiming3:
+        lcdTiming3 = data;
+        break;
+      case LcdUpBase:
+        lcdUpbase = data;
+        DPRINTF(PL111, "####### Upper panel base set to: %#x #######\n", lcdUpbase);
+        break;
+      case LcdLpBase:
+        warn("LCD dual screen mode not supported\n");
+        lcdLpbase = data;
+        DPRINTF(PL111, "###### Lower panel base set to: %#x #######\n", lcdLpbase);
+        break;
+      case LcdControl:
+        int old_lcdpwr;
+        old_lcdpwr = lcdControl.lcdpwr;
+        lcdControl = data;
+
+        DPRINTF(PL111, "LCD power is:%d\n", lcdControl.lcdpwr);
+
+        // LCD power enable
+        if (lcdControl.lcdpwr && !old_lcdpwr) {
+            updateVideoParams();
+            DPRINTF(PL111, " lcd size: height %d width %d\n", height, width);
+            waterMark = lcdControl.watermark ? 8 : 4;
+            startDma();
+        }
+        break;
+      case LcdImsc:
+        lcdImsc = data;
+        if (lcdImsc.vcomp)
+            panic("Interrupting on vcomp not supported\n");
 
-        switch (daddr) {
-          case LcdTiming0:
-            lcdTiming0 = data;
-            // width = 16 * (PPL+1)
-            width = (lcdTiming0.ppl + 1) << 4;
-            break;
-          case LcdTiming1:
-            lcdTiming1 = data;
-            // height = LPP + 1
-            height  = (lcdTiming1.lpp) + 1;
-            break;
-          case LcdTiming2:
-            lcdTiming2 = data;
-            break;
-          case LcdTiming3:
-            lcdTiming3 = data;
-            break;
-          case LcdUpBase:
-            lcdUpbase  = data;
-            break;
-          case LcdLpBase:
-            warn("LCD dual screen mode not supported\n");
-            lcdLpbase  = data;
-            break;
-          case LcdControl:
-            int old_lcdpwr;
-            old_lcdpwr = lcdControl.lcdpwr;
-            lcdControl = data;
-            // LCD power enable
-            if (lcdControl.lcdpwr&&!old_lcdpwr) {
-                DPRINTF(PL111, " lcd size: height %d width %d\n", height, width);
-                waterMark = lcdControl.watermark ? 8 : 4;
-                readFramebuffer();
-            }
-            break;
-          case LcdImsc:
-            warn("LCD interrupt mask set/clear not supported\n");
-            lcdImsc    = data;
-            break;
-          case LcdRis:
-            warn("LCD register at offset %#x is Read-Only\n", daddr);
-            break;
-          case LcdMis:
-            warn("LCD register at offset %#x is Read-Only\n", daddr);
-            break;
-          case LcdIcr:
-            warn("LCD interrupt clear not supported\n");
-            lcdIcr     = data;
-            break;
-          case LcdUpCurr:
-            warn("LCD register at offset %#x is Read-Only\n", daddr);
-            break;
-          case LcdLpCurr:
-            warn("LCD register at offset %#x is Read-Only\n", daddr);
-            break;
-          case ClcdCrsrCtrl:
-            clcdCrsrCtrl = data;
-            break;
-          case ClcdCrsrConfig:
-            clcdCrsrConfig = data;
-            break;
-          case ClcdCrsrPalette0:
-            clcdCrsrPalette0 = data;
-            break;
-          case ClcdCrsrPalette1:
-            clcdCrsrPalette1 = data;
-            break;
-          case ClcdCrsrXY:
-            clcdCrsrXY = data;
-            break;
-          case ClcdCrsrClip:
-            clcdCrsrClip = data;
-            break;
-          case ClcdCrsrImsc:
-            clcdCrsrImsc = data;
-            break;
-          case ClcdCrsrIcr:
-            clcdCrsrIcr = data;
-            break;
-          case ClcdCrsrRis:
-            warn("CLCD register at offset %#x is Read-Only\n", daddr);
-            break;
-          case ClcdCrsrMis:
-            warn("CLCD register at offset %#x is Read-Only\n", daddr);
-            break;
-          default:
-            if (daddr >= CrsrImage && daddr <= 0xBFC) {
-                // CURSOR IMAGE
-                int index;
-                index = (daddr - CrsrImage) >> 2;
-                cursorImage[index] = data;
-                break;
-            } else if (daddr >= LcdPalette && daddr <= 0x3FC) {
-                // LCD Palette
-                int index;
-                index = (daddr - LcdPalette) >> 2;
-                lcdPalette[index] = data;
-                break;
-            } else {
-                panic("Tried to write PL111 register at offset %#x that \
+        lcdMis = lcdImsc & lcdRis;
+
+        if (!lcdMis)
+            gic->clearInt(intNum);
+
+         break;
+      case LcdRis:
+        panic("LCD register at offset %#x is Read-Only\n", daddr);
+        break;
+      case LcdMis:
+        panic("LCD register at offset %#x is Read-Only\n", daddr);
+        break;
+      case LcdIcr:
+        lcdRis = lcdRis & ~data;
+        lcdMis = lcdImsc & lcdRis;
+
+        if (!lcdMis)
+            gic->clearInt(intNum);
+
+        break;
+      case LcdUpCurr:
+        panic("LCD register at offset %#x is Read-Only\n", daddr);
+        break;
+      case LcdLpCurr:
+        panic("LCD register at offset %#x is Read-Only\n", daddr);
+        break;
+      case ClcdCrsrCtrl:
+        clcdCrsrCtrl = data;
+        break;
+      case ClcdCrsrConfig:
+        clcdCrsrConfig = data;
+        break;
+      case ClcdCrsrPalette0:
+        clcdCrsrPalette0 = data;
+        break;
+      case ClcdCrsrPalette1:
+        clcdCrsrPalette1 = data;
+        break;
+      case ClcdCrsrXY:
+        clcdCrsrXY = data;
+        break;
+      case ClcdCrsrClip:
+        clcdCrsrClip = data;
+        break;
+      case ClcdCrsrImsc:
+        clcdCrsrImsc = data;
+        break;
+      case ClcdCrsrIcr:
+        clcdCrsrIcr = data;
+        break;
+      case ClcdCrsrRis:
+        panic("CLCD register at offset %#x is Read-Only\n", daddr);
+        break;
+      case ClcdCrsrMis:
+        panic("CLCD register at offset %#x is Read-Only\n", daddr);
+        break;
+      default:
+        if (daddr >= CrsrImage && daddr <= 0xBFC) {
+            // CURSOR IMAGE
+            int index;
+            index = (daddr - CrsrImage) >> 2;
+            cursorImage[index] = data;
+            break;
+        } else if (daddr >= LcdPalette && daddr <= 0x3FC) {
+            // LCD Palette
+            int index;
+            index = (daddr - LcdPalette) >> 2;
+            lcdPalette[index] = data;
+            break;
+        } else {
+            panic("Tried to write PL111 register at offset %#x that \
                        doesn't exist\n", daddr);
-                break;
-            }
+            break;
         }
     }
 
@@ -347,17 +366,75 @@ Pl111::write(PacketPtr pkt)
 }
 
 void
+Pl111::updateVideoParams()
+{
+        if (lcdControl.lcdbpp == bpp24) {
+            bytesPerPixel = 4;
+        } else if (lcdControl.lcdbpp == bpp16m565) {
+            bytesPerPixel = 2;
+        }
+
+        if (vncserver) {
+            if (lcdControl.lcdbpp == bpp24 && lcdControl.bgr)
+                vncserver->setFrameBufferParams(VideoConvert::bgr8888, width,
+                       height);
+            else if (lcdControl.lcdbpp == bpp24 && !lcdControl.bgr)
+                vncserver->setFrameBufferParams(VideoConvert::rgb8888, width,
+                       height);
+            else if (lcdControl.lcdbpp == bpp16m565 && lcdControl.bgr)
+                vncserver->setFrameBufferParams(VideoConvert::bgr565, width,
+                       height);
+            else if (lcdControl.lcdbpp == bpp16m565 && !lcdControl.bgr)
+                vncserver->setFrameBufferParams(VideoConvert::rgb565, width,
+                       height);
+            else
+                panic("Unimplemented video mode\n");
+        }
+
+        if (bmp)
+            delete bmp;
+
+        if (lcdControl.lcdbpp == bpp24 && lcdControl.bgr)
+            bmp = new Bitmap(VideoConvert::bgr8888, width, height, dmaBuffer);
+        else if (lcdControl.lcdbpp == bpp24 && !lcdControl.bgr)
+            bmp = new Bitmap(VideoConvert::rgb8888, width, height, dmaBuffer);
+        else if (lcdControl.lcdbpp == bpp16m565 && lcdControl.bgr)
+            bmp = new Bitmap(VideoConvert::bgr565, width, height, dmaBuffer);
+        else if (lcdControl.lcdbpp == bpp16m565 && !lcdControl.bgr)
+            bmp = new Bitmap(VideoConvert::rgb565, width, height, dmaBuffer);
+        else
+            panic("Unimplemented video mode\n");
+}
+
+void
+Pl111::startDma()
+{
+    if (dmaPendingNum != 0 || readEvent.scheduled())
+        return;
+    readFramebuffer();
+}
+
+void
 Pl111::readFramebuffer()
 {
     // initialization for dma read from frame buffer to dma buffer
-    uint32_t length  = height*width;
-    if (startAddr != lcdUpbase) {
+    uint32_t length = height * width;
+    if (startAddr != lcdUpbase)
         startAddr = lcdUpbase;
-    }
+
+    // Updating base address, interrupt if we're supposed to
+    lcdRis.baseaddr = 1;
+    if (!intEvent.scheduled())
+        schedule(intEvent, nextCycle());
+
     curAddr = 0;
     startTime = curTick();
-    maxAddr = static_cast<Addr>(length*sizeof(uint32_t));
-    dmaPendingNum =0 ;
+
+    maxAddr = static_cast<Addr>(length * bytesPerPixel);
+
+    DPRINTF(PL111, " lcd frame buffer size of %d bytes \n", maxAddr);
+
+    dmaPendingNum = 0;
 
     fillFifo();
 }
@@ -369,11 +446,16 @@ Pl111::fillFifo()
         // concurrent dma reads need different dma done events
         // due to assertion in scheduling state
         ++dmaPendingNum;
-        DPRINTF(PL111, " ++ DMA pending number %d read addr %#x\n",
-                dmaPendingNum, curAddr);
+
         assert(!dmaDoneEvent[dmaPendingNum-1].scheduled());
-        dmaRead(curAddr + startAddr, dmaSize, &dmaDoneEvent[dmaPendingNum-1],
-                curAddr + dmaBuffer);
+
+        // We use a uncachable request here because the requests from the CPU
+        // will be uncacheable as well. If we have uncacheable and cacheable
+        // requests in the memory system for the same address it won't be
+        // pleased
+        dmaPort->dmaAction(MemCmd::ReadReq, curAddr + startAddr, dmaSize,
+                &dmaDoneEvent[dmaPendingNum-1], curAddr + dmaBuffer, 0,
+                Request::UNCACHEABLE);
         curAddr += dmaSize;
     }
 }
@@ -381,27 +463,34 @@ Pl111::fillFifo()
 void
 Pl111::dmaDone()
 {
-    Tick maxFrameTime = lcdTiming2.cpl*height*clock;
+    Tick maxFrameTime = lcdTiming2.cpl * height * clock;
 
     --dmaPendingNum;
 
-    DPRINTF(PL111, " -- DMA pending number %d\n", dmaPendingNum);
-
     if (maxAddr == curAddr && !dmaPendingNum) {
-        if ((curTick() - startTime) > maxFrameTime)
+        if ((curTick() - startTime) > maxFrameTime) {
             warn("CLCD controller buffer underrun, took %d cycles when should"
                  " have taken %d\n", curTick() - startTime, maxFrameTime);
+            lcdRis.underflow = 1;
+            if (!intEvent.scheduled())
+                schedule(intEvent, nextCycle());
+        }
 
-        // double buffering so the vnc server doesn't see a tear in the screen
-        memcpy(frameBuffer, dmaBuffer, maxAddr);
         assert(!readEvent.scheduled());
+        if (vncserver)
+            vncserver->setDirty();
 
         DPRINTF(PL111, "-- write out frame buffer into bmp\n");
-        writeBMP(frameBuffer);
+
+        assert(bmp);
+        pic->seekp(0);
+        bmp->write(pic);
 
         DPRINTF(PL111, "-- schedule next dma read event at %d tick \n",
                 maxFrameTime + curTick());
-        schedule(readEvent, nextCycle(startTime + maxFrameTime));
+
+        if (lcdControl.lcden)
+            schedule(readEvent, nextCycle(startTime + maxFrameTime));
     }
 
     if (dmaPendingNum > (maxOutstandingDma - waterMark))
@@ -409,9 +498,9 @@ Pl111::dmaDone()
 
     if (!fillFifoEvent.scheduled())
         schedule(fillFifoEvent, nextCycle());
-
 }
 
+
 Tick
 Pl111::nextCycle()
 {
@@ -431,33 +520,6 @@ Pl111::nextCycle(Tick beginTick)
     return nextTick;
 }
 
-// write out the frame buffer into a bitmap file
-void
-Pl111::writeBMP(uint32_t* frameBuffer)
-{
-    fstream pic;
-
-    // write out bmp head
-    std::string filename = "./m5out/frameBuffer.bmp";
-    pic.open(filename.c_str(), ios::out|ios::binary);
-    Bitmap bm(pic, height, width);
-
-    DPRINTF(PL111, "-- write out data into bmp\n");
-
-    // write out frame buffer data
-    for (int i = height -1; i >= 0; --i) {
-        for (int j = 0; j< width; ++j) {
-            uint32_t pixel = frameBuffer[i*width + j];
-            pic.write(reinterpret_cast<char*>(&pixel),
-                      sizeof(uint32_t));
-            DPRINTF(PL111, " write pixel data  %#x at addr %#x\n",
-                    pixel, i*width + j);
-        }
-    }
-
-    pic.close();
-}
-
 void
 Pl111::serialize(std::ostream &os)
 {
@@ -490,9 +552,6 @@ Pl111::serialize(std::ostream &os)
     uint8_t lcdMis_serial = lcdMis;
     SERIALIZE_SCALAR(lcdMis_serial);
 
-    uint8_t lcdIcr_serial = lcdIcr;
-    SERIALIZE_SCALAR(lcdIcr_serial);
-
     SERIALIZE_ARRAY(lcdPalette, LcdPaletteSize);
     SERIALIZE_ARRAY(cursorImage, CrsrImageSize);
 
@@ -518,9 +577,9 @@ Pl111::serialize(std::ostream &os)
     SERIALIZE_SCALAR(clock);
     SERIALIZE_SCALAR(height);
     SERIALIZE_SCALAR(width);
+    SERIALIZE_SCALAR(bytesPerPixel);
 
-    SERIALIZE_ARRAY(dmaBuffer, height*width);
-    SERIALIZE_ARRAY(frameBuffer, height*width);
+    SERIALIZE_ARRAY(dmaBuffer, height * width);
     SERIALIZE_SCALAR(startTime);
     SERIALIZE_SCALAR(startAddr);
     SERIALIZE_SCALAR(maxAddr);
@@ -569,10 +628,6 @@ Pl111::unserialize(Checkpoint *cp, const std::string &section)
     UNSERIALIZE_SCALAR(lcdMis_serial);
     lcdMis = lcdMis_serial;
 
-    uint8_t lcdIcr_serial;
-    UNSERIALIZE_SCALAR(lcdIcr_serial);
-    lcdIcr = lcdIcr_serial;
-
     UNSERIALIZE_ARRAY(lcdPalette, LcdPaletteSize);
     UNSERIALIZE_ARRAY(cursorImage, CrsrImageSize);
 
@@ -602,25 +657,29 @@ Pl111::unserialize(Checkpoint *cp, const std::string &section)
     UNSERIALIZE_SCALAR(clock);
     UNSERIALIZE_SCALAR(height);
     UNSERIALIZE_SCALAR(width);
+    UNSERIALIZE_SCALAR(bytesPerPixel);
 
-    UNSERIALIZE_ARRAY(dmaBuffer, height*width);
-    UNSERIALIZE_ARRAY(frameBuffer, height*width);
+    UNSERIALIZE_ARRAY(dmaBuffer, height * width);
     UNSERIALIZE_SCALAR(startTime);
     UNSERIALIZE_SCALAR(startAddr);
     UNSERIALIZE_SCALAR(maxAddr);
     UNSERIALIZE_SCALAR(curAddr);
     UNSERIALIZE_SCALAR(waterMark);
     UNSERIALIZE_SCALAR(dmaPendingNum);
+
+    updateVideoParams();
+    if (vncserver)
+        vncserver->setDirty();
 }
 
 void
 Pl111::generateInterrupt()
 {
     DPRINTF(PL111, "Generate Interrupt: lcdImsc=0x%x lcdRis=0x%x lcdMis=0x%x\n",
-            lcdImsc, lcdRis, lcdMis);
+            (uint32_t)lcdImsc, (uint32_t)lcdRis, (uint32_t)lcdMis);
     lcdMis = lcdImsc & lcdRis;
 
-    if (lcdMis.ffufie || lcdMis.nbupie || lcdMis.vtcpie || lcdMis.ahmeie) {
+    if (lcdMis.underflow || lcdMis.baseaddr || lcdMis.vcomp || lcdMis.ahbmaster) {
         gic->sendInt(intNum);
         DPRINTF(PL111, " -- Generated\n");
     }
@@ -639,15 +698,4 @@ Pl111Params::create()
     return new Pl111(this);
 }
 
-// bitmap class ctor
-Bitmap::Bitmap(std::fstream& bmp, uint16_t h, uint16_t w)
-{
-    Magic  magic  = {{'B','M'}};
-    Header header = {sizeof(Color)*w*h , 0, 0, 54};
-    Info   info   = {sizeof(Info), w, h, 1, sizeof(Color)*8, 0,
-                     ( sizeof(Color) *(w*h) ), 1, 1, 0, 0};
-
-    bmp.write(reinterpret_cast<char*>(&magic),  sizeof(magic));
-    bmp.write(reinterpret_cast<char*>(&header), sizeof(header));
-    bmp.write(reinterpret_cast<char*>(&info),   sizeof(info));
-}
+
diff --git a/src/dev/arm/pl111.hh b/src/dev/arm/pl111.hh
index 4e75af4e8..f36dc6810 100644
--- a/src/dev/arm/pl111.hh
+++ b/src/dev/arm/pl111.hh
@@ -35,6 +35,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: William Wang
+ *          Ali Saidi
  */
 
 
@@ -55,6 +56,8 @@
 using namespace std;
 
 class Gic;
+class VncServer;
+class Bitmap;
 
 class Pl111: public AmbaDmaDevice
 {
@@ -96,58 +99,69 @@ class Pl111: public AmbaDmaDevice
     static const int dmaSize            = 8;    // 64 bits
     static const int maxOutstandingDma  = 16;   // 16 deep FIFO of 64 bits
 
+    enum LcdMode {
+        bpp1 = 0,
+        bpp2,
+        bpp4,
+        bpp8,
+        bpp16,
+        bpp24,
+        bpp16m565,
+        bpp12
+    };
+
     BitUnion8(InterruptReg)
-    Bitfield<1> ffufie;
-    Bitfield<2> nbupie;
-    Bitfield<3> vtcpie;
-    Bitfield<4> ahmeie;
+        Bitfield<1> underflow;
+        Bitfield<2> baseaddr;
+        Bitfield<3> vcomp;
+        Bitfield<4> ahbmaster;
     EndBitUnion(InterruptReg)
 
     BitUnion32(TimingReg0)
-    Bitfield<7,2> ppl;
-    Bitfield<15,8> hsw;
-    Bitfield<23,16> hfp;
-    Bitfield<31,24> hbp;
+        Bitfield<7,2> ppl;
+        Bitfield<15,8> hsw;
+        Bitfield<23,16> hfp;
+        Bitfield<31,24> hbp;
     EndBitUnion(TimingReg0)
 
     BitUnion32(TimingReg1)
-    Bitfield<9,0> lpp;
-    Bitfield<15,10> vsw;
-    Bitfield<23,16> vfp;
-    Bitfield<31,24> vbp;
+        Bitfield<9,0> lpp;
+        Bitfield<15,10> vsw;
+        Bitfield<23,16> vfp;
+        Bitfield<31,24> vbp;
     EndBitUnion(TimingReg1)
 
     BitUnion32(TimingReg2)
-    Bitfield<4,0> pcdlo;
-    Bitfield<5> clksel;
-    Bitfield<10,6> acb;
-    Bitfield<11> avs;
-    Bitfield<12> ihs;
-    Bitfield<13> ipc;
-    Bitfield<14> ioe;
-    Bitfield<25,16> cpl;
-    Bitfield<26> bcd;
-    Bitfield<31,27> pcdhi;
+        Bitfield<4,0> pcdlo;
+        Bitfield<5> clksel;
+        Bitfield<10,6> acb;
+        Bitfield<11> avs;
+        Bitfield<12> ihs;
+        Bitfield<13> ipc;
+        Bitfield<14> ioe;
+        Bitfield<25,16> cpl;
+        Bitfield<26> bcd;
+        Bitfield<31,27> pcdhi;
     EndBitUnion(TimingReg2)
 
     BitUnion32(TimingReg3)
-    Bitfield<6,0> led;
-    Bitfield<16> lee;
+        Bitfield<6,0> led;
+        Bitfield<16> lee;
     EndBitUnion(TimingReg3)
 
     BitUnion32(ControlReg)
-    Bitfield<0> lcden;
-    Bitfield<3,1> lcdbpp;
-    Bitfield<4> lcdbw;
-    Bitfield<5> lcdtft;
-    Bitfield<6> lcdmono8;
-    Bitfield<7> lcddual;
-    Bitfield<8> bgr;
-    Bitfield<9> bebo;
-    Bitfield<10> bepo;
-    Bitfield<11> lcdpwr;
-    Bitfield<13,12> lcdvcomp;
-    Bitfield<16> watermark;
+        Bitfield<0> lcden;
+        Bitfield<3,1> lcdbpp;
+        Bitfield<4> lcdbw;
+        Bitfield<5> lcdtft;
+        Bitfield<6> lcdmono8;
+        Bitfield<7> lcddual;
+        Bitfield<8> bgr;
+        Bitfield<9> bebo;
+        Bitfield<10> bepo;
+        Bitfield<11> lcdpwr;
+        Bitfield<13,12> lcdvcomp;
+        Bitfield<16> watermark;
     EndBitUnion(ControlReg)
 
     /** Horizontal axis panel control register */
@@ -180,15 +194,6 @@ class Pl111: public AmbaDmaDevice
     /** Masked interrupt status register */
     InterruptReg lcdMis;
 
-    /** Interrupt clear register */
-    InterruptReg lcdIcr;
-
-    /** Upper panel current address value register - ro */
-    int lcdUpcurr;
-
-    /** Lower panel current address value register - ro */
-    int lcdLpcurr;
-
     /** 256x16-bit color palette registers
      * 256 palette entries organized as 128 locations of two entries per word */
     int lcdPalette[LcdPaletteSize];
@@ -228,17 +233,26 @@ class Pl111: public AmbaDmaDevice
     /** Clock speed */
     Tick clock;
 
-    /** Frame buffer height - lines per panel */
-    uint16_t height;
+    /** VNC server */
+    VncServer *vncserver;
+
+    /** Helper to write out bitmaps */
+    Bitmap *bmp;
+
+    /** Picture of what the current frame buffer looks like */
+    std::ostream *pic;
 
     /** Frame buffer width - pixels per line */
     uint16_t width;
 
-    /** CLCDC supports up to 1024x768 */
-    uint8_t dmaBuffer[LcdMaxWidth * LcdMaxHeight * sizeof(uint32_t)];
+    /** Frame buffer height - lines per panel */
+    uint16_t height;
 
-    /** Double buffering */
-    uint32_t frameBuffer[LcdMaxWidth * LcdMaxHeight];
+    /** Bytes per pixel */
+    uint8_t bytesPerPixel;
+
+    /** CLCDC supports up to 1024x768 */
+    uint8_t *dmaBuffer;
 
     /** Start time for frame buffer dma read */
     Tick startTime;
@@ -258,12 +272,12 @@ class Pl111: public AmbaDmaDevice
     /** Number of pending dma reads */
     int dmaPendingNum;
 
+    /** Send updated parameters to the vnc server */
+    void updateVideoParams();
+
     /** DMA framebuffer read */
     void readFramebuffer();
 
-    /** Write framebuffer to a bmp file */
-    void writeBMP(uint32_t*);
-
     /** Generate dma framebuffer read event */
     void generateReadEvent();
 
@@ -273,6 +287,9 @@ class Pl111: public AmbaDmaDevice
     /** fillFIFO event */
     void fillFifo();
 
+    /** start the dmas off after power is enabled */
+    void startDma();
+
     /** DMA done event */
     void dmaDone();
 
@@ -289,7 +306,7 @@ class Pl111: public AmbaDmaDevice
     /** DMA done event */
     vector<EventWrapper<Pl111, &Pl111::dmaDone> > dmaDoneEvent;
 
-    /** Wrapper to create an event out of the thing */
+    /** Wrapper to create an event out of the interrupt */
     EventWrapper<Pl111, &Pl111::generateInterrupt> intEvent;
 
   public:
@@ -312,57 +329,6 @@ class Pl111: public AmbaDmaDevice
      * @param range_list range list to populate with ranges
      */
     void addressRanges(AddrRangeList &range_list);
-
-    /**
-     * Return if we have an interrupt pending
-     * @return interrupt status
-     * @todo fix me when implementation improves
-     */
-    virtual bool intStatus() { return false; }
-};
-
-// write frame buffer into a bitmap picture
-class  Bitmap
-{
-  public:
-    Bitmap(std::fstream& bmp, uint16_t h, uint16_t w);
-
-  private:
-    struct Magic
-    {
-        unsigned char magic_number[2];
-    } magic;
-
-    struct Header
-    {
-        uint32_t size;
-        uint16_t reserved1;
-        uint16_t reserved2;
-        uint32_t offset;
-    } header;
-
-    struct Info
-    {
-        uint32_t Size;
-        uint32_t Width;
-        uint32_t Height;
-        uint16_t Planes;
-        uint16_t BitCount;
-        uint32_t Compression;
-        uint32_t SizeImage;
-        uint32_t XPelsPerMeter;
-        uint32_t YPelsPerMeter;
-        uint32_t ClrUsed;
-        uint32_t ClrImportant;
-    } info;
-
-    struct Color
-    {
-        unsigned char b;
-        unsigned char g;
-        unsigned char r;
-        unsigned char a;
-    } color;
 };
 
 #endif
diff --git a/src/dev/arm/rv_ctrl.cc b/src/dev/arm/rv_ctrl.cc
index c0ba4c7aa..b1bbc065b 100644
--- a/src/dev/arm/rv_ctrl.cc
+++ b/src/dev/arm/rv_ctrl.cc
@@ -68,6 +68,27 @@ RealViewCtrl::read(PacketPtr pkt)
       case Flash:
         pkt->set<uint32_t>(0);
         break;
+      case Clcd:
+        pkt->set<uint32_t>(0x00001F00);
+        break;
+      case Osc0:
+        pkt->set<uint32_t>(0x00012C5C);
+        break;
+      case Osc1:
+        pkt->set<uint32_t>(0x00002CC0);
+        break;
+      case Osc2:
+        pkt->set<uint32_t>(0x00002C75);
+        break;
+      case Osc3:
+        pkt->set<uint32_t>(0x00020211);
+        break;
+      case Osc4:
+        pkt->set<uint32_t>(0x00002C75);
+        break;
+      case Lock:
+        pkt->set<uint32_t>(sysLock);
+        break;
       default:
         panic("Tried to read RealView I/O at offset %#x that doesn't exist\n", daddr);
         break;
@@ -85,6 +106,15 @@ RealViewCtrl::write(PacketPtr pkt)
     Addr daddr = pkt->getAddr() - pioAddr;
     switch (daddr) {
       case Flash:
+      case Clcd:
+      case Osc0:
+      case Osc1:
+      case Osc2:
+      case Osc3:
+      case Osc4:
+        break;
+      case Lock:
+        sysLock.lockVal = pkt->get<uint16_t>();
         break;
       default:
         panic("Tried to write RVIO at offset %#x that doesn't exist\n", daddr);
diff --git a/src/dev/arm/rv_ctrl.hh b/src/dev/arm/rv_ctrl.hh
index 00a19d715..ceed5ef2f 100644
--- a/src/dev/arm/rv_ctrl.hh
+++ b/src/dev/arm/rv_ctrl.hh
@@ -40,6 +40,7 @@
 #ifndef __DEV_ARM_RV_HH__
 #define __DEV_ARM_RV_HH__
 
+#include "base/bitunion.hh"
 #include "base/range.hh"
 #include "dev/io_device.hh"
 #include "params/RealViewCtrl.hh"
@@ -86,6 +87,14 @@ class RealViewCtrl : public BasicPioDevice
         TestOsc4   = 0xD0
     };
 
+    // system lock value
+    BitUnion32(SysLockReg)
+        Bitfield<15,0> lockVal;
+        Bitfield<16> locked;
+    EndBitUnion(SysLockReg)
+
+    SysLockReg sysLock;
+
   public:
     typedef RealViewCtrlParams Params;
     const Params *
@@ -120,4 +129,3 @@ class RealViewCtrl : public BasicPioDevice
 
 
 #endif // __DEV_ARM_RV_HH__
-
diff --git a/src/dev/arm/timer_sp804.cc b/src/dev/arm/timer_sp804.cc
index 04668d268..e6d2657ea 100644
--- a/src/dev/arm/timer_sp804.cc
+++ b/src/dev/arm/timer_sp804.cc
@@ -178,11 +178,11 @@ Sp804::Timer::restartCounter(uint32_t val)
     if (!control.timerEnable)
         return;
 
-    Tick time = clock << power(16, control.timerPrescale);
+    Tick time = clock * power(16, control.timerPrescale);
     if (control.timerSize)
-        time *= bits(val,15,0);
-    else
         time *= val;
+    else
+        time *= bits(val,15,0);
 
     if (zeroEvent.scheduled()) {
         DPRINTF(Timer, "-- Event was already schedule, de-scheduling\n");
diff --git a/src/dev/ps2.cc b/src/dev/ps2.cc
new file mode 100644
index 000000000..fe90ce6bc
--- /dev/null
+++ b/src/dev/ps2.cc
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ali Saidi
+ */
+
+#include <list>
+#include "x11keysym/keysym.h"
+
+#include "base/misc.hh"
+#include "dev/ps2.hh"
+
+
+namespace Ps2 {
+
+/** Table to convert simple key symbols (0x00XX) into ps2 bytes. Lower byte
+ * is the scan code to send and upper byte is if a modifier is required to
+ * generate it. The table generates us keyboard codes, (e.g. the guest is
+ * supposed to recognize the keyboard as en_US). A new table would be required
+ * for another locale.
+ */
+
+static const uint16_t keySymToPs2Byte[128] = {
+// 0 / 8   1 / 9   2 / A   3 / B   4 / C   5 / D   6 / E   7 / F
+   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x00-0x07
+   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x08-0x0f
+   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x10-0x17
+   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x18-0x1f
+   0x0029, 0x0116, 0x0152, 0x0126, 0x0125, 0x012e, 0x013d, 0x0052, // 0x20-0x27
+   0x0146, 0x0145, 0x013e, 0x0155, 0x0041, 0x004e, 0x0049, 0x004a, // 0x28-0x2f
+   0x0045, 0x0016, 0x001e, 0x0026, 0x0025, 0x002e, 0x0036, 0x003d, // 0x30-0x37
+   0x003e, 0x0046, 0x014c, 0x004c, 0x0141, 0x0055, 0x0149, 0x014a, // 0x38-0x3f
+   0x011e, 0x011c, 0x0132, 0x0121, 0x0123, 0x0124, 0x012b, 0x0134, // 0x40-0x47
+   0x0133, 0x0143, 0x013b, 0x0142, 0x014b, 0x013a, 0x0131, 0x0144, // 0x48-0x4f
+   0x014d, 0x0115, 0x012d, 0x011b, 0x012c, 0x013c, 0x012a, 0x011d, // 0x50-0x57
+   0x0122, 0x0135, 0x011a, 0x0054, 0x005d, 0x005b, 0x0136, 0x014e, // 0x58-0x5f
+   0x000e, 0x001c, 0x0032, 0x0021, 0x0023, 0x0024, 0x002b, 0x0034, // 0x60-0x67
+   0x0033, 0x0043, 0x003b, 0x0042, 0x004b, 0x003a, 0x0031, 0x0044, // 0x68-0x6f
+   0x004d, 0x0015, 0x002d, 0x001b, 0x002c, 0x003c, 0x002a, 0x001d, // 0x70-0x77
+   0x0022, 0x0035, 0x001a, 0x0154, 0x015d, 0x015b, 0x010e, 0x0000  // 0x78-0x7f
+};
+
+const uint8_t ShiftKey = 0x12;
+const uint8_t BreakKey = 0xf0;
+const uint8_t ExtendedKey = 0xe0;
+const uint32_t UpperKeys = 0xff00;
+
+void
+keySymToPs2(uint32_t key, bool down, bool &cur_shift,
+        std::list<uint8_t> &keys)
+{
+    if (key <= XK_asciitilde) {
+        uint16_t tmp = keySymToPs2Byte[key];
+        uint8_t code = tmp & 0xff;
+        bool shift = tmp >> 8;
+
+        if (down) {
+            if (!cur_shift && shift) {
+                keys.push_back(ShiftKey);
+                cur_shift = true;
+            }
+            keys.push_back(code);
+        } else {
+            if (cur_shift && !shift) {
+                keys.push_back(BreakKey);
+                keys.push_back(ShiftKey);
+                cur_shift = false;
+            }
+            keys.push_back(BreakKey);
+            keys.push_back(code);
+        }
+    } else {
+        if ((key & UpperKeys) == UpperKeys) {
+            bool extended = false;
+            switch (key) {
+              case XK_BackSpace:
+                keys.push_back(0x66);
+                break;
+              case XK_Tab:
+                keys.push_back(0x0d);
+                break;
+              case XK_Return:
+                keys.push_back(0x5a);
+                break;
+             case XK_Escape:
+                keys.push_back(0x76);
+                break;
+             case XK_Delete:
+                extended = true;
+                keys.push_back(0x71);
+                break;
+             case XK_Home:
+                extended = true;
+                keys.push_back(0x6c);
+                break;
+             case XK_Left:
+                extended = true;
+                keys.push_back(0x6b);
+                break;
+             case XK_Right:
+                extended = true;
+                keys.push_back(0x74);
+                break;
+             case XK_Down:
+                extended = true;
+                keys.push_back(0x72);
+                break;
+             case XK_Up:
+                extended = true;
+                keys.push_back(0x75);
+                break;
+             case XK_Page_Up:
+                extended = true;
+                keys.push_back(0x7d);
+                break;
+             case XK_Page_Down:
+                extended = true;
+                keys.push_back(0x7a);
+                break;
+             case XK_End:
+                extended = true;
+                keys.push_back(0x69);
+                break;
+             case XK_Shift_L:
+                keys.push_back(0x12);
+                if (down)
+                    cur_shift = true;
+                else
+                    cur_shift = false;
+                break;
+             case XK_Shift_R:
+                keys.push_back(0x59);
+                if (down)
+                    cur_shift = true;
+                else
+                    cur_shift = false;
+                break;
+             case XK_Control_L:
+                keys.push_back(0x14);
+                break;
+             case XK_Control_R:
+                extended = true;
+                keys.push_back(0x14);
+                break;
+             default:
+               warn("Unknown extended key %#x\n", key);
+               return;
+            }
+
+            if (extended) {
+                if (down) {
+                    keys.push_front(ExtendedKey);
+                } else {
+                    keys.push_front(BreakKey);
+                    keys.push_front(ExtendedKey);
+                }
+            } else {
+                if (!down)
+                    keys.push_front(BreakKey);
+            }
+        } // upper keys
+    } // extended keys
+    return;
+}
+
+} /* namespace Ps2 */
+
diff --git a/src/dev/ps2.hh b/src/dev/ps2.hh
new file mode 100644
index 000000000..73f3f9cd8
--- /dev/null
+++ b/src/dev/ps2.hh
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ali Saidi
+ */
+
+#ifndef __DEV_PS2_HH__
+#define __DEV_PS2_HH__
+
+#include <stdint.h>
+
+#include "base/bitunion.hh"
+
+/** @file misc functions and constants required to interface with or emulate ps2
+ * devices
+ */
+
+namespace Ps2 {
+enum {
+    Ps2Reset        = 0xff,
+    SelfTestPass    = 0xAA,
+    SetStatusLed    = 0xed,
+    SetResolution   = 0xe8,
+    StatusRequest   = 0xe9,
+    SetScaling1_2   = 0xe7,
+    SetScaling1_1   = 0xe6,
+    ReadId          = 0xf2,
+    TpReadId        = 0xe1,
+    Ack             = 0xfa,
+    SetRate         = 0xf3,
+    Enable          = 0xf4,
+    Disable         = 0xf6,
+    KeyboardId      = 0xab,
+    TouchKitId      = 0x0a,
+    MouseId         = 0x00,
+};
+
+/** A bitfield that represents the first byte of a mouse movement packet
+ */
+BitUnion8(Ps2MouseMovement)
+    Bitfield<0> leftButton;
+    Bitfield<1> rightButton;
+    Bitfield<2> middleButton;
+    Bitfield<3> one;
+    Bitfield<4> xSign;
+    Bitfield<5> ySign;
+    Bitfield<6> xOverflow;
+    Bitfield<7> yOverflow;
+EndBitUnion(Ps2MouseMovement)
+
+/** Convert an x11 key symbol into a set of ps2 charecters.
+ * @param key x11 key symbol
+ * @param down if the key is being pressed or released
+ * @param cur_shift if device has already sent a shift
+ * @param keys list of keys command to send to emulate the x11 key symbol
+ */
+void keySymToPs2(uint32_t key, bool down, bool &cur_shift,
+        std::list<uint8_t> &keys);
+
+} /* namespace Ps2 */
+#endif // __DEV_PS2_HH__
diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm
index 8744a7122..4442cee41 100644
--- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm
@@ -287,20 +287,21 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 asks the L2 for it.
             trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
                     L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
+            }
+
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
@@ -313,21 +314,23 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
             }
           }
         } else {
-          // *** DATA ACCESS ***
-          // Check to see if it is in the OTHER L1
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
-          }
 
+          // *** DATA ACCESS ***
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 ask the L2 for it
             trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
                     L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
+            }
+
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
index 4082f23c9..e590c952a 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
@@ -44,7 +44,6 @@ machine(L1Cache, "Directory protocol")
   // From this node's L1 cache TO the network
   // a local L1 -> this L2 bank, currently ordered with directory forwarded requests
   MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false";
-  MessageBuffer foo, network="To", virtual_network="1", ordered="false";
   // a local L1 -> this L2 bank
   MessageBuffer responseFromL1Cache, network="To", virtual_network="2", ordered="false";
 //  MessageBuffer writebackFromL1Cache, network="To", virtual_network="3", ordered="false";
@@ -53,7 +52,6 @@ machine(L1Cache, "Directory protocol")
   // To this node's L1 cache FROM the network
   // a L2 bank -> this L1
   MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false";
-  MessageBuffer goo, network="From", virtual_network="1", ordered="false";
   // a L2 bank -> this L1
   MessageBuffer responseToL1Cache, network="From", virtual_network="2", ordered="false";
 
@@ -229,7 +227,6 @@ machine(L1Cache, "Directory protocol")
   out_port(requestNetwork_out, RequestMsg, requestFromL1Cache);
   out_port(responseNetwork_out, ResponseMsg, responseFromL1Cache);
   out_port(triggerQueue_out, TriggerMsg, triggerQueue);
-  out_port(foo_out, ResponseMsg, foo);
 
   // ** IN_PORTS **
 
@@ -242,15 +239,6 @@ machine(L1Cache, "Directory protocol")
     }
   }
 
-
-  in_port(goo_in, RequestMsg, goo) {
-    if (goo_in.isReady()) {
-      peek(goo_in, RequestMsg) {
-        assert(false);
-      }
-    }
-  }
-
   // Trigger Queue
   in_port(triggerQueue_in, TriggerMsg, triggerQueue) {
     if (triggerQueue_in.isReady()) {
@@ -338,14 +326,6 @@ machine(L1Cache, "Directory protocol")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          // Check to see if it is in the OTHER L1
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress, L1Dcache_entry,
-                    TBEs[in_msg.LineAddress]);
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 asks the L2 for it.
@@ -353,6 +333,14 @@ machine(L1Cache, "Directory protocol")
                     in_msg.LineAddress, L1Icache_entry,
                     TBEs[in_msg.LineAddress]);
           } else {
+
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            // Check to see if it is in the OTHER L1
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress, L1Dcache_entry,
+                      TBEs[in_msg.LineAddress]);
+            }
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type),
@@ -369,14 +357,6 @@ machine(L1Cache, "Directory protocol")
         } else {
           // *** DATA ACCESS ***
 
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-          // Check to see if it is in the OTHER L1
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Icache_entry, TBEs[in_msg.LineAddress]);
-          }
-
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 ask the L2 for it
@@ -384,6 +364,14 @@ machine(L1Cache, "Directory protocol")
                     in_msg.LineAddress, L1Dcache_entry,
                     TBEs[in_msg.LineAddress]);
           } else {
+
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            // Check to see if it is in the OTHER L1
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Icache_entry, TBEs[in_msg.LineAddress]);
+            }
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type),
@@ -411,6 +399,7 @@ machine(L1Cache, "Directory protocol")
         out_msg.Address := address;
         out_msg.Type := CoherenceRequestType:GETS;
         out_msg.Requestor := machineID;
+        out_msg.RequestorMachine := MachineType:L1Cache;
         out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
               l2_select_low_bit, l2_select_num_bits));
         out_msg.MessageSize := MessageSizeType:Request_Control;
@@ -455,6 +444,7 @@ machine(L1Cache, "Directory protocol")
       out_msg.Address := address;
       out_msg.Type := CoherenceRequestType:PUTO;
       out_msg.Requestor := machineID;
+      out_msg.RequestorMachine := MachineType:L1Cache;
       out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
             l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Writeback_Control;
@@ -467,6 +457,7 @@ machine(L1Cache, "Directory protocol")
       out_msg.Address := address;
       out_msg.Type := CoherenceRequestType:PUTS;
       out_msg.Requestor := machineID;
+      out_msg.RequestorMachine := MachineType:L1Cache;
       out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
             l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Writeback_Control;
@@ -481,6 +472,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
                 l2_select_low_bit, l2_select_num_bits));
           out_msg.DataBlk := cache_entry.DataBlk;
@@ -496,6 +488,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(in_msg.Requestor);
           out_msg.DataBlk := cache_entry.DataBlk;
           // out_msg.Dirty := cache_entry.Dirty;
@@ -514,6 +507,7 @@ machine(L1Cache, "Directory protocol")
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:DATA;
       out_msg.Sender := machineID;
+      out_msg.SenderMachine := MachineType:L1Cache;
       out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
             l2_select_low_bit, l2_select_num_bits));
       out_msg.DataBlk := cache_entry.DataBlk;
@@ -592,6 +586,7 @@ machine(L1Cache, "Directory protocol")
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:UNBLOCK;
       out_msg.Sender := machineID;
+      out_msg.SenderMachine := MachineType:L1Cache;
       out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
             l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
@@ -690,6 +685,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(in_msg.Requestor);
           out_msg.DataBlk := tbe.DataBlk;
           // out_msg.Dirty := tbe.Dirty;
@@ -703,6 +699,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
                 l2_select_low_bit, l2_select_num_bits));
           out_msg.DataBlk := tbe.DataBlk;
@@ -723,6 +720,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(in_msg.Requestor);
           out_msg.DataBlk := tbe.DataBlk;
           out_msg.Dirty := tbe.Dirty;
@@ -735,6 +733,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
                 l2_select_low_bit, l2_select_num_bits));
           out_msg.DataBlk := tbe.DataBlk;
diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
index 00e9404c9..226f21374 100644
--- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
@@ -647,20 +647,21 @@ machine(L1Cache, "Token protocol")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-              trigger(Event:L1_Replacement, in_msg.LineAddress,
-                      L1Dcache_entry, tbe);
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Icache_entry, tbe);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+                trigger(Event:L1_Replacement, in_msg.LineAddress,
+                        L1Dcache_entry, tbe);
+            }
+
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
               trigger(mandatory_request_type_to_event(in_msg.Type),
@@ -676,21 +677,21 @@ machine(L1Cache, "Token protocol")
         } else {
           // *** DATA ACCESS ***
 
-            // Check to see if it is in the OTHER L1
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Icache_entry, tbe);
-          }
-
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Dcache_entry, tbe);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Icache_entry, tbe);
+            }
+
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
               trigger(mandatory_request_type_to_event(in_msg.Type),
diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm
index 26598f541..f9d5ffcab 100644
--- a/src/mem/protocol/MOESI_hammer-cache.sm
+++ b/src/mem/protocol/MOESI_hammer-cache.sm
@@ -377,26 +377,26 @@ machine(L1Cache, "AMD Hammer-like protocol")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-            if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
-              trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe);
-            } else {
-              trigger(Event:L2_Replacement,
-                      L2cacheMemory.cacheProbe(in_msg.LineAddress),
-                      getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
-                      TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
-            }
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Icache_entry, tbe);
           } else {
+            // Check to see if it is in the OTHER L1
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+              if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+                trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe);
+              } else {
+                trigger(Event:L2_Replacement,
+                        L2cacheMemory.cacheProbe(in_msg.LineAddress),
+                        getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
+                        TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
+              }
+            }
+
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
 
@@ -430,26 +430,27 @@ machine(L1Cache, "AMD Hammer-like protocol")
         } else {
           // *** DATA ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-            if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
-              trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe);
-            } else {
-              trigger(Event:L2_Replacement,
-                      L2cacheMemory.cacheProbe(in_msg.LineAddress),
-                      getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
-                      TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
-            }
-          }
-
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Dcache_entry, tbe);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+              if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+                trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe);
+              } else {
+                trigger(Event:L2_Replacement,
+                        L2cacheMemory.cacheProbe(in_msg.LineAddress),
+                        getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
+                        TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
+              }
+            }
+
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
               Entry L2cache_entry := getL2CacheEntry(in_msg.LineAddress);
diff --git a/src/mem/ruby/buffers/MessageBuffer.cc b/src/mem/ruby/buffers/MessageBuffer.cc
index f6b79c580..225595005 100644
--- a/src/mem/ruby/buffers/MessageBuffer.cc
+++ b/src/mem/ruby/buffers/MessageBuffer.cc
@@ -58,6 +58,8 @@ MessageBuffer::MessageBuffer(const string &name)
     m_name = name;
 
     m_stall_msg_map.clear();
+    m_input_link_id = 0;
+    m_vnet_id = 0;
 }
 
 int
@@ -228,6 +230,7 @@ MessageBuffer::enqueue(MsgPtr message, Time delta)
     // Schedule the wakeup
     if (m_consumer_ptr != NULL) {
         g_eventQueue_ptr->scheduleEventAbsolute(m_consumer_ptr, arrival_time);
+        m_consumer_ptr->storeEventInfo(m_vnet_id);
     } else {
         panic("No consumer: %s name: %s\n", *this, m_name);
     }
diff --git a/src/mem/ruby/buffers/MessageBuffer.hh b/src/mem/ruby/buffers/MessageBuffer.hh
index 62cc65670..88df5b788 100644
--- a/src/mem/ruby/buffers/MessageBuffer.hh
+++ b/src/mem/ruby/buffers/MessageBuffer.hh
@@ -142,6 +142,9 @@ class MessageBuffer
     void printStats(std::ostream& out);
     void clearStats() { m_not_avail_count = 0; m_msg_counter = 0; }
 
+    void setIncomingLink(int link_id) { m_input_link_id = link_id; }
+    void setVnet(int net) { m_vnet_id = net; }
+
   private:
     //added by SS
     int m_recycle_latency;
@@ -184,6 +187,9 @@ class MessageBuffer
     bool m_ordering_set;
     bool m_randomization;
     Time m_last_arrival_time;
+
+    int m_input_link_id;
+    int m_vnet_id;
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/common/Consumer.hh b/src/mem/ruby/common/Consumer.hh
index c1f8bc42e..a119abb39 100644
--- a/src/mem/ruby/common/Consumer.hh
+++ b/src/mem/ruby/common/Consumer.hh
@@ -67,6 +67,7 @@ class Consumer
 
     virtual void wakeup() = 0;
     virtual void print(std::ostream& out) const = 0;
+    virtual void storeEventInfo(int info) {}
 
     const Time&
     getLastScheduledWakeup() const
diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc
index 7229c724f..5c461c63f 100644
--- a/src/mem/ruby/network/simple/PerfectSwitch.cc
+++ b/src/mem/ruby/network/simple/PerfectSwitch.cc
@@ -54,6 +54,11 @@ PerfectSwitch::PerfectSwitch(SwitchID sid, SimpleNetwork* network_ptr)
     m_round_robin_start = 0;
     m_network_ptr = network_ptr;
     m_wakeups_wo_switch = 0;
+
+    for(int i = 0;i < m_virtual_networks;++i)
+    {
+        m_pending_message_count.push_back(0);
+    }
 }
 
 void
@@ -62,12 +67,15 @@ PerfectSwitch::addInPort(const vector<MessageBuffer*>& in)
     assert(in.size() == m_virtual_networks);
     NodeID port = m_in.size();
     m_in.push_back(in);
+
     for (int j = 0; j < m_virtual_networks; j++) {
         m_in[port][j]->setConsumer(this);
         string desc = csprintf("[Queue from port %s %s %s to PerfectSwitch]",
             NodeIDToString(m_switch_id), NodeIDToString(port),
             NodeIDToString(j));
         m_in[port][j]->setDescription(desc);
+        m_in[port][j]->setIncomingLink(port);
+        m_in[port][j]->setVnet(j);
     }
 }
 
@@ -154,161 +162,170 @@ PerfectSwitch::wakeup()
             m_round_robin_start = 0;
         }
 
-        // for all input ports, use round robin scheduling
-        for (int counter = 0; counter < m_in.size(); counter++) {
-            // Round robin scheduling
-            incoming++;
-            if (incoming >= m_in.size()) {
-                incoming = 0;
-            }
+        if(m_pending_message_count[vnet] > 0) {
+            // for all input ports, use round robin scheduling
+            for (int counter = 0; counter < m_in.size(); counter++) {
+                // Round robin scheduling
+                incoming++;
+                if (incoming >= m_in.size()) {
+                    incoming = 0;
+                }
 
-            // temporary vectors to store the routing results
-            vector<LinkID> output_links;
-            vector<NetDest> output_link_destinations;
-
-            // Is there a message waiting?
-            while (m_in[incoming][vnet]->isReady()) {
-                DPRINTF(RubyNetwork, "incoming: %d\n", incoming);
-
-                // Peek at message
-                msg_ptr = m_in[incoming][vnet]->peekMsgPtr();
-                net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
-                DPRINTF(RubyNetwork, "Message: %s\n", (*net_msg_ptr));
-
-                output_links.clear();
-                output_link_destinations.clear();
-                NetDest msg_dsts =
-                    net_msg_ptr->getInternalDestination();
-
-                // Unfortunately, the token-protocol sends some
-                // zero-destination messages, so this assert isn't valid
-                // assert(msg_dsts.count() > 0);
-
-                assert(m_link_order.size() == m_routing_table.size());
-                assert(m_link_order.size() == m_out.size());
-
-                if (m_network_ptr->getAdaptiveRouting()) {
-                    if (m_network_ptr->isVNetOrdered(vnet)) {
-                        // Don't adaptively route
-                        for (int out = 0; out < m_out.size(); out++) {
-                            m_link_order[out].m_link = out;
-                            m_link_order[out].m_value = 0;
-                        }
-                    } else {
-                        // Find how clogged each link is
-                        for (int out = 0; out < m_out.size(); out++) {
-                            int out_queue_length = 0;
-                            for (int v = 0; v < m_virtual_networks; v++) {
-                                out_queue_length += m_out[out][v]->getSize();
+                // temporary vectors to store the routing results
+                vector<LinkID> output_links;
+                vector<NetDest> output_link_destinations;
+
+                // Is there a message waiting?
+                while (m_in[incoming][vnet]->isReady()) {
+                    DPRINTF(RubyNetwork, "incoming: %d\n", incoming);
+
+                    // Peek at message
+                    msg_ptr = m_in[incoming][vnet]->peekMsgPtr();
+                    net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
+                    DPRINTF(RubyNetwork, "Message: %s\n", (*net_msg_ptr));
+
+                    output_links.clear();
+                    output_link_destinations.clear();
+                    NetDest msg_dsts =
+                        net_msg_ptr->getInternalDestination();
+
+                    // Unfortunately, the token-protocol sends some
+                    // zero-destination messages, so this assert isn't valid
+                    // assert(msg_dsts.count() > 0);
+
+                    assert(m_link_order.size() == m_routing_table.size());
+                    assert(m_link_order.size() == m_out.size());
+
+                    if (m_network_ptr->getAdaptiveRouting()) {
+                        if (m_network_ptr->isVNetOrdered(vnet)) {
+                            // Don't adaptively route
+                            for (int out = 0; out < m_out.size(); out++) {
+                                m_link_order[out].m_link = out;
+                                m_link_order[out].m_value = 0;
+                            }
+                        } else {
+                            // Find how clogged each link is
+                            for (int out = 0; out < m_out.size(); out++) {
+                                int out_queue_length = 0;
+                                for (int v = 0; v < m_virtual_networks; v++) {
+                                    out_queue_length += m_out[out][v]->getSize();
+                                }
+                                int value =
+                                    (out_queue_length << 8) | (random() & 0xff);
+                                m_link_order[out].m_link = out;
+                                m_link_order[out].m_value = value;
                             }
-                            int value =
-                                (out_queue_length << 8) | (random() & 0xff);
-                            m_link_order[out].m_link = out;
-                            m_link_order[out].m_value = value;
+
+                            // Look at the most empty link first
+                            sort(m_link_order.begin(), m_link_order.end());
                         }
+                    }
 
-                        // Look at the most empty link first
-                        sort(m_link_order.begin(), m_link_order.end());
+                    for (int i = 0; i < m_routing_table.size(); i++) {
+                        // pick the next link to look at
+                        int link = m_link_order[i].m_link;
+                        NetDest dst = m_routing_table[link];
+                        DPRINTF(RubyNetwork, "dst: %s\n", dst);
+
+                        if (!msg_dsts.intersectionIsNotEmpty(dst))
+                            continue;
+
+                        // Remember what link we're using
+                        output_links.push_back(link);
+
+                        // Need to remember which destinations need this
+                        // message in another vector.  This Set is the
+                        // intersection of the routing_table entry and the
+                        // current destination set.  The intersection must
+                        // not be empty, since we are inside "if"
+                        output_link_destinations.push_back(msg_dsts.AND(dst));
+
+                        // Next, we update the msg_destination not to
+                        // include those nodes that were already handled
+                        // by this link
+                        msg_dsts.removeNetDest(dst);
                     }
-                }
 
-                for (int i = 0; i < m_routing_table.size(); i++) {
-                    // pick the next link to look at
-                    int link = m_link_order[i].m_link;
-                    NetDest dst = m_routing_table[link];
-                    DPRINTF(RubyNetwork, "dst: %s\n", dst);
-
-                    if (!msg_dsts.intersectionIsNotEmpty(dst))
-                        continue;
-
-                    // Remember what link we're using
-                    output_links.push_back(link);
-
-                    // Need to remember which destinations need this
-                    // message in another vector.  This Set is the
-                    // intersection of the routing_table entry and the
-                    // current destination set.  The intersection must
-                    // not be empty, since we are inside "if"
-                    output_link_destinations.push_back(msg_dsts.AND(dst));
-
-                    // Next, we update the msg_destination not to
-                    // include those nodes that were already handled
-                    // by this link
-                    msg_dsts.removeNetDest(dst);
-                }
+                    assert(msg_dsts.count() == 0);
+                    //assert(output_links.size() > 0);
+
+                    // Check for resources - for all outgoing queues
+                    bool enough = true;
+                    for (int i = 0; i < output_links.size(); i++) {
+                        int outgoing = output_links[i];
+                        if (!m_out[outgoing][vnet]->areNSlotsAvailable(1))
+                            enough = false;
+                        DPRINTF(RubyNetwork, "Checking if node is blocked\n"
+                                "outgoing: %d, vnet: %d, enough: %d\n",
+                                outgoing, vnet, enough);
+                    }
 
-                assert(msg_dsts.count() == 0);
-                //assert(output_links.size() > 0);
-
-                // Check for resources - for all outgoing queues
-                bool enough = true;
-                for (int i = 0; i < output_links.size(); i++) {
-                    int outgoing = output_links[i];
-                    if (!m_out[outgoing][vnet]->areNSlotsAvailable(1))
-                        enough = false;
-                    DPRINTF(RubyNetwork, "Checking if node is blocked\n"
-                            "outgoing: %d, vnet: %d, enough: %d\n",
-                            outgoing, vnet, enough);
-                }
+                    // There were not enough resources
+                    if (!enough) {
+                        g_eventQueue_ptr->scheduleEvent(this, 1);
+                        DPRINTF(RubyNetwork, "Can't deliver message since a node "
+                                "is blocked\n"
+                                "Message: %s\n", (*net_msg_ptr));
+                        break; // go to next incoming port
+                    }
 
-                // There were not enough resources
-                if (!enough) {
-                    g_eventQueue_ptr->scheduleEvent(this, 1);
-                    DPRINTF(RubyNetwork, "Can't deliver message since a node "
-                            "is blocked\n"
-                            "Message: %s\n", (*net_msg_ptr));
-                    break; // go to next incoming port
-                }
+                    MsgPtr unmodified_msg_ptr;
 
-                MsgPtr unmodified_msg_ptr;
+                    if (output_links.size() > 1) {
+                        // If we are sending this message down more than
+                        // one link (size>1), we need to make a copy of
+                        // the message so each branch can have a different
+                        // internal destination we need to create an
+                        // unmodified MsgPtr because the MessageBuffer
+                        // enqueue func will modify the message
 
-                if (output_links.size() > 1) {
-                    // If we are sending this message down more than
-                    // one link (size>1), we need to make a copy of
-                    // the message so each branch can have a different
-                    // internal destination we need to create an
-                    // unmodified MsgPtr because the MessageBuffer
-                    // enqueue func will modify the message
+                        // This magic line creates a private copy of the
+                        // message
+                        unmodified_msg_ptr = msg_ptr->clone();
+                    }
 
-                    // This magic line creates a private copy of the
-                    // message
-                    unmodified_msg_ptr = msg_ptr->clone();
-                }
+                    // Enqueue it - for all outgoing queues
+                    for (int i=0; i<output_links.size(); i++) {
+                        int outgoing = output_links[i];
 
-                // Enqueue it - for all outgoing queues
-                for (int i=0; i<output_links.size(); i++) {
-                    int outgoing = output_links[i];
+                        if (i > 0) {
+                            // create a private copy of the unmodified
+                            // message
+                            msg_ptr = unmodified_msg_ptr->clone();
+                        }
 
-                    if (i > 0) {
-                        // create a private copy of the unmodified
-                        // message
-                        msg_ptr = unmodified_msg_ptr->clone();
-                    }
+                        // Change the internal destination set of the
+                        // message so it knows which destinations this
+                        // link is responsible for.
+                        net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
+                        net_msg_ptr->getInternalDestination() =
+                            output_link_destinations[i];
 
-                    // Change the internal destination set of the
-                    // message so it knows which destinations this
-                    // link is responsible for.
-                    net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
-                    net_msg_ptr->getInternalDestination() =
-                        output_link_destinations[i];
+                        // Enqeue msg
+                        DPRINTF(RubyNetwork, "Switch: %d enqueuing net msg from "
+                                "inport[%d][%d] to outport [%d][%d] time: %lld.\n",
+                                m_switch_id, incoming, vnet, outgoing, vnet,
+                                g_eventQueue_ptr->getTime());
 
-                    // Enqeue msg
-                    DPRINTF(RubyNetwork, "Switch: %d enqueuing net msg from "
-                            "inport[%d][%d] to outport [%d][%d] time: %lld.\n",
-                            m_switch_id, incoming, vnet, outgoing, vnet,
-                            g_eventQueue_ptr->getTime());
+                        m_out[outgoing][vnet]->enqueue(msg_ptr);
+                    }
 
-                    m_out[outgoing][vnet]->enqueue(msg_ptr);
+                    // Dequeue msg
+                    m_in[incoming][vnet]->pop();
+                    m_pending_message_count[vnet]--;
                 }
-
-                // Dequeue msg
-                m_in[incoming][vnet]->pop();
             }
         }
     }
 }
 
 void
+PerfectSwitch::storeEventInfo(int info)
+{
+    m_pending_message_count[info]++;
+}
+
+void
 PerfectSwitch::printStats(std::ostream& out) const
 {
     out << "PerfectSwitch printStats" << endl;
diff --git a/src/mem/ruby/network/simple/PerfectSwitch.hh b/src/mem/ruby/network/simple/PerfectSwitch.hh
index a7e577df0..cd0219fd9 100644
--- a/src/mem/ruby/network/simple/PerfectSwitch.hh
+++ b/src/mem/ruby/network/simple/PerfectSwitch.hh
@@ -69,6 +69,7 @@ class PerfectSwitch : public Consumer
     int getOutLinks() const { return m_out.size(); }
 
     void wakeup();
+    void storeEventInfo(int info);
 
     void printStats(std::ostream& out) const;
     void clearStats();
@@ -92,6 +93,7 @@ class PerfectSwitch : public Consumer
     int m_round_robin_start;
     int m_wakeups_wo_switch;
     SimpleNetwork* m_network_ptr;
+    std::vector<int> m_pending_message_count;
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/slicc_interface/Message.hh b/src/mem/ruby/slicc_interface/Message.hh
index ff94fdd40..7fcfabe9c 100644
--- a/src/mem/ruby/slicc_interface/Message.hh
+++ b/src/mem/ruby/slicc_interface/Message.hh
@@ -57,6 +57,8 @@ class Message : public RefCounted
 
     virtual Message* clone() const = 0;
     virtual void print(std::ostream& out) const = 0;
+    virtual void setIncomingLink(int) {}
+    virtual void setVnet(int) {}
 
     void setDelayedCycles(const int& cycles) { m_DelayedCycles = cycles; }
     const int& getDelayedCycles() const {return m_DelayedCycles;}
diff --git a/src/mem/ruby/slicc_interface/NetworkMessage.hh b/src/mem/ruby/slicc_interface/NetworkMessage.hh
index 082481e05..a8f9c625b 100644
--- a/src/mem/ruby/slicc_interface/NetworkMessage.hh
+++ b/src/mem/ruby/slicc_interface/NetworkMessage.hh
@@ -82,9 +82,16 @@ class NetworkMessage : public Message
 
     virtual void print(std::ostream& out) const = 0;
 
+    int getIncomingLink() const { return incoming_link; }
+    void setIncomingLink(int link) { incoming_link = link; }
+    int getVnet() const { return vnet; }
+    void setVnet(int net) { vnet = net; }
+
   private:
     NetDest m_internal_dest;
     bool m_internal_dest_valid;
+    int incoming_link;
+    int vnet;
 };
 
 inline std::ostream&
diff --git a/src/python/m5/main.py b/src/python/m5/main.py
index cd139ccb3..23a012166 100644
--- a/src/python/m5/main.py
+++ b/src/python/m5/main.py
@@ -61,8 +61,6 @@ add_option('-C', "--copyright", action="store_true", default=False,
     help="Show full copyright information")
 add_option('-R', "--readme", action="store_true", default=False,
     help="Show the readme")
-add_option('-N', "--release-notes", action="store_true", default=False,
-    help="Show the release notes")
 
 # Options for configuring the base simulator
 add_option('-d', "--outdir", metavar="DIR", default="m5out",
@@ -207,13 +205,6 @@ def main():
         print info.README
         print
 
-    if options.release_notes:
-        done = True
-        print 'Release Notes:'
-        print
-        print info.RELEASE_NOTES
-        print
-
     if options.trace_help:
         done = True
         check_tracing()
diff --git a/src/sim/root.cc b/src/sim/root.cc
index 1dc9b6058..d51fcbda6 100644
--- a/src/sim/root.cc
+++ b/src/sim/root.cc
@@ -108,7 +108,18 @@ Root::Root(RootParams *p) : SimObject(p), _enabled(false),
     assert(_root == NULL);
     _root = this;
     lastTime.setTimer();
-    timeSyncEnable(p->time_sync_enable);
+}
+
+void
+Root::initState()
+{
+    timeSyncEnable(params()->time_sync_enable);
+}
+
+void
+Root::loadState(Checkpoint *cp)
+{
+    timeSyncEnable(params()->time_sync_enable);
 }
 
 Root *
diff --git a/src/sim/root.hh b/src/sim/root.hh
index 2beced9d4..76a508c19 100644
--- a/src/sim/root.hh
+++ b/src/sim/root.hh
@@ -95,7 +95,22 @@ class Root : public SimObject
     /// Set the threshold for time remaining to spin wait.
     void timeSyncSpinThreshold(Time newThreshold);
 
-    Root(RootParams *p);
+    typedef RootParams Params;
+    const Params *
+    params() const
+    {
+        return dynamic_cast<const Params *>(_params);
+    }
+
+    Root(Params *p);
+
+    /** Schedule the timesync event at loadState() so that curTick is correct
+     */
+    void loadState(Checkpoint *cp);
+
+    /** Schedule the timesync event at initState() when not unserializing
+     */
+    void initState();
 };
 
 #endif // __SIM_ROOT_HH__
diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc
index d28f335be..44fe7b2e7 100644
--- a/src/sim/serialize.cc
+++ b/src/sim/serialize.cc
@@ -201,6 +201,23 @@ arrayParamOut(ostream &os, const string &name, const vector<T> &param)
     os << "\n";
 }
 
+template <class T>
+void
+arrayParamOut(ostream &os, const string &name, const list<T> &param)
+{
+    typename list<T>::const_iterator it = param.begin();
+
+    os << name << "=";
+    if (param.size() > 0)
+        showParam(os, *it);
+    it++;
+    while (it != param.end()) {
+        os << " ";
+        showParam(os, *it);
+        it++;
+    }
+    os << "\n";
+}
 
 template <class T>
 void
@@ -326,6 +343,37 @@ arrayParamIn(Checkpoint *cp, const string &section,
     }
 }
 
+template <class T>
+void
+arrayParamIn(Checkpoint *cp, const string &section,
+             const string &name, list<T> &param)
+{
+    string str;
+    if (!cp->find(section, name, str)) {
+        fatal("Can't unserialize '%s:%s'\n", section, name);
+    }
+    param.clear();
+
+    vector<string> tokens;
+    tokenize(tokens, str, ' ');
+
+    for (vector<string>::size_type i = 0; i < tokens.size(); i++) {
+        T scalar_value = 0;
+        if (!parseParam(tokens[i], scalar_value)) {
+            string err("could not parse \"");
+
+            err += str;
+            err += "\"";
+
+            fatal(err);
+        }
+
+        // assign parsed value to vector
+        param.push_back(scalar_value);
+    }
+}
+
+
 void
 objParamIn(Checkpoint *cp, const string &section,
            const string &name, SimObject * &param)
@@ -356,7 +404,13 @@ arrayParamOut(ostream &os, const string &name,                          \
               const vector<type> &param);                               \
 template void                                                           \
 arrayParamIn(Checkpoint *cp, const string &section,                     \
-             const string &name, vector<type> &param);
+             const string &name, vector<type> &param);                  \
+template void                                                           \
+arrayParamOut(ostream &os, const string &name,                          \
+              const list<type> &param);                                 \
+template void                                                           \
+arrayParamIn(Checkpoint *cp, const string &section,                     \
+             const string &name, list<type> &param);
 
 INSTANTIATE_PARAM_TEMPLATES(char)
 INSTANTIATE_PARAM_TEMPLATES(signed char)
diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh
index 5ea632ea4..6be8ce3b6 100644
--- a/src/sim/serialize.hh
+++ b/src/sim/serialize.hh
@@ -70,6 +70,10 @@ void arrayParamOut(std::ostream &os, const std::string &name,
                    const std::vector<T> &param);
 
 template <class T>
+void arrayParamOut(std::ostream &os, const std::string &name,
+                   const std::list<T> &param);
+
+template <class T>
 void arrayParamIn(Checkpoint *cp, const std::string &section,
                   const std::string &name, T *param, unsigned size);
 
@@ -77,6 +81,10 @@ template <class T>
 void arrayParamIn(Checkpoint *cp, const std::string &section,
                   const std::string &name, std::vector<T> &param);
 
+template <class T>
+void arrayParamIn(Checkpoint *cp, const std::string &section,
+                  const std::string &name, std::list<T> &param);
+
 void
 objParamIn(Checkpoint *cp, const std::string &section,
            const std::string &name, SimObject * &param);
diff --git a/src/sim/tlb.hh b/src/sim/tlb.hh
index 1512bc0fa..253f12072 100644
--- a/src/sim/tlb.hh
+++ b/src/sim/tlb.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2006 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -64,6 +76,12 @@ class BaseTLB : public SimObject
         virtual ~Translation()
         {}
 
+        /**
+         * Signal that the translation has been delayed due to a hw page table
+         * walk.
+         */
+        virtual void markDelayed() = 0;
+
         /*
          * The memory for this object may be dynamically allocated, and it may
          * be responsible for cleaning itself up which will happen in this