78 files changed, 5855 insertions, 900 deletions
diff --git a/src/arch/SConscript b/src/arch/SConscript
index c3ff69f46..0ac25b6c7 100644
--- a/src/arch/SConscript
+++ b/src/arch/SConscript
@@ -51,6 +51,7 @@ isa_switch_hdrs = Split('''
         locked_mem.hh
         mmaped_ipr.hh
 	process.hh
+	predecoder.hh
 	regfile.hh
 	remote_gdb.hh
 	stacktrace.hh
diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa
index b62372f66..af1a91a62 100644
--- a/src/arch/alpha/isa/decoder.isa
+++ b/src/arch/alpha/isa/decoder.isa
@@ -728,8 +728,10 @@ decode OPCODE default Unknown::unknown() {
         0: OpcdecFault::hw_st_quad();
         1: decode HW_LDST_QUAD {
             format HwLoad {
-                0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, L);
-                1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, Q);
+                0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }},
+                         L, IsSerializing, IsSerializeBefore);
+                1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }},
+                         Q, IsSerializing, IsSerializeBefore);
             }
         }
     }
@@ -740,9 +742,9 @@ decode OPCODE default Unknown::unknown() {
             1: decode HW_LDST_COND {
                 0: decode HW_LDST_QUAD {
                     0: hw_st({{ EA = (Rb + disp) & ~3; }},
-                {{ Mem.ul = Ra<31:0>; }}, L);
+                {{ Mem.ul = Ra<31:0>; }}, L, IsSerializing, IsSerializeBefore);
                     1: hw_st({{ EA = (Rb + disp) & ~7; }},
-                {{ Mem.uq = Ra.uq; }}, Q);
+                {{ Mem.uq = Ra.uq; }}, Q, IsSerializing, IsSerializeBefore);
                 }
 
                 1: FailUnimpl::hw_st_cond();
diff --git a/src/arch/alpha/predecoder.hh b/src/arch/alpha/predecoder.hh
new file mode 100644
index 000000000..650f2bfa2
--- /dev/null
+++ b/src/arch/alpha/predecoder.hh
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_ALPHA_PREDECODER_HH__
+#define __ARCH_ALPHA_PREDECODER_HH__
+
+#include "arch/alpha/types.hh"
+#include "base/misc.hh"
+#include "config/full_system.hh"
+#include "sim/host.hh"
+
+class ThreadContext;
+
+namespace AlphaISA
+{
+    class Predecoder
+    {
+      protected:
+        ThreadContext * tc;
+        //The pc of the current instruction
+        Addr fetchPC;
+        //The extended machine instruction being generated
+        ExtMachInst ext_inst;
+
+      public:
+        Predecoder(ThreadContext * _tc) : tc(_tc)
+        {}
+
+        ThreadContext * getTC()
+        {
+            return tc;
+        }
+
+        void setTC(ThreadContext * _tc)
+        {
+            tc = _tc;
+        }
+
+        void process()
+        {
+        }
+
+        //Use this to give data to the predecoder. This should be used
+        //when there is control flow.
+        void moreBytes(Addr pc, Addr off, MachInst inst)
+        {
+            fetchPC = pc;
+            assert(off == 0);
+            ext_inst = inst;
+#if FULL_SYSTEM
+            if (pc && 0x1)
+                ext_inst|=(static_cast<ExtMachInst>(pc & 0x1) << 32);
+#endif
+        }
+
+        //Use this to give data to the predecoder. This should be used
+        //when instructions are executed in order.
+        void moreBytes(MachInst machInst)
+        {
+            moreBytes(fetchPC + sizeof(machInst), 0, machInst);
+        }
+
+        bool needMoreBytes()
+        {
+            return true;
+        }
+
+        bool extMachInstReady()
+        {
+            return true;
+        }
+
+        //This returns a constant reference to the ExtMachInst to avoid a copy
+        const ExtMachInst & getExtMachInst()
+        {
+            return ext_inst;
+        }
+    };
+};
+
+#endif // __ARCH_ALPHA_PREDECODER_HH__
diff --git a/src/arch/alpha/utility.hh b/src/arch/alpha/utility.hh
index b7844c7eb..c20394a92 100644
--- a/src/arch/alpha/utility.hh
+++ b/src/arch/alpha/utility.hh
@@ -48,19 +48,6 @@ namespace AlphaISA
         return (tc->readMiscRegNoEffect(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
     }
 
-    static inline ExtMachInst
-    makeExtMI(MachInst inst, Addr pc) {
-#if FULL_SYSTEM
-        ExtMachInst ext_inst = inst;
-        if (pc && 0x1)
-            return ext_inst|=(static_cast<ExtMachInst>(pc & 0x1) << 32);
-        else
-            return ext_inst;
-#else
-        return ExtMachInst(inst);
-#endif
-    }
-
     inline bool isCallerSaveIntegerRegister(unsigned int reg) {
         panic("register classification not implemented");
         return (reg >= 1 && reg <= 8 || reg >= 22 && reg <= 25 || reg == 27);
diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py
index 21860a2e1..f3981a6eb 100755
--- a/src/arch/isa_parser.py
+++ b/src/arch/isa_parser.py
@@ -81,12 +81,12 @@ tokens = reserved + (
     # code literal
     'CODELIT',
 
-    # ( ) [ ] { } < > , ; : :: *
+    # ( ) [ ] { } < > , ; . : :: *
     'LPAREN', 'RPAREN',
     'LBRACKET', 'RBRACKET',
     'LBRACE', 'RBRACE',
     'LESS', 'GREATER', 'EQUALS',
-    'COMMA', 'SEMI', 'COLON', 'DBLCOLON',
+    'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
     'ASTERISK',
 
     # C preprocessor directives
@@ -113,6 +113,7 @@ t_GREATER          = r'\>'
 t_EQUALS           = r'='
 t_COMMA            = r','
 t_SEMI             = r';'
+t_DOT              = r'\.'
 t_COLON            = r':'
 t_DBLCOLON         = r'::'
 t_ASTERISK	   = r'\*'
@@ -261,6 +262,7 @@ def p_defs_and_outputs_1(t):
 def p_def_or_output(t):
     '''def_or_output : def_format
                      | def_bitfield
+                     | def_bitfield_struct
                      | def_template
                      | def_operand_types
                      | def_operands
@@ -363,6 +365,23 @@ def p_def_bitfield_1(t):
     hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
     t[0] = GenCode(header_output = hash_define)
 
+# alternate form for structure member: 'def bitfield <ID> <ID>'
+def p_def_bitfield_struct(t):
+    'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
+    if (t[2] != ''):
+        error(t.lineno(1), 'error: structure bitfields are always unsigned.')
+    expr = 'machInst.%s' % t[5]
+    hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
+    t[0] = GenCode(header_output = hash_define)
+
+def p_id_with_dot_0(t):
+    'id_with_dot : ID'
+    t[0] = t[1]
+
+def p_id_with_dot_1(t):
+    'id_with_dot : ID DOT id_with_dot'
+    t[0] = t[1] + t[2] + t[3]
+
 def p_opt_signed_0(t):
     'opt_signed : SIGNED'
     t[0] = t[1]
diff --git a/src/arch/mips/predecoder.hh b/src/arch/mips/predecoder.hh
new file mode 100644
index 000000000..a25cce8a7
--- /dev/null
+++ b/src/arch/mips/predecoder.hh
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_MIPS_PREDECODER_HH__
+#define __ARCH_MIPS_PREDECODER_HH__
+
+#include "arch/mips/types.hh"
+#include "base/misc.hh"
+#include "sim/host.hh"
+
+class ThreadContext;
+
+namespace MipsISA
+{
+    class Predecoder
+    {
+      protected:
+        ThreadContext * tc;
+        //The extended machine instruction being generated
+        ExtMachInst emi;
+
+      public:
+        Predecoder(ThreadContext * _tc) : tc(_tc)
+        {}
+
+        ThreadContext * getTC()
+        {
+            return tc;
+        }
+
+        void setTC(ThreadContext * _tc)
+        {
+            tc = _tc;
+        }
+
+        void process()
+        {
+        }
+
+        //Use this to give data to the predecoder. This should be used
+        //when there is control flow.
+        void moreBytes(Addr currPC, Addr off, MachInst inst)
+        {
+            assert(off == 0);
+            emi = inst;
+        }
+
+        //Use this to give data to the predecoder. This should be used
+        //when instructions are executed in order.
+        void moreBytes(MachInst machInst)
+        {
+            moreBytes(0, 0, machInst);
+        }
+
+        bool needMoreBytes()
+        {
+            return true;
+        }
+
+        bool extMachInstReady()
+        {
+            return true;
+        }
+
+        //This returns a constant reference to the ExtMachInst to avoid a copy
+        const ExtMachInst & getExtMachInst()
+        {
+            return emi;
+        }
+    };
+};
+
+#endif // __ARCH_MIPS_PREDECODER_HH__
diff --git a/src/arch/mips/utility.hh b/src/arch/mips/utility.hh
index b5c1e31e1..12db1de57 100644
--- a/src/arch/mips/utility.hh
+++ b/src/arch/mips/utility.hh
@@ -88,19 +88,6 @@ namespace MipsISA {
         return 0;
     }
 
-    static inline ExtMachInst
-    makeExtMI(MachInst inst, ThreadContext * xc) {
-#if FULL_SYSTEM
-        ExtMachInst ext_inst = inst;
-        if (xc->readPC() && 0x1)
-            return ext_inst|=(static_cast<ExtMachInst>(xc->readPC() & 0x1) << 32);
-        else
-            return ext_inst;
-#else
-        return ExtMachInst(inst);
-#endif
-    }
-
     inline void startupCPU(ThreadContext *tc, int cpuId)
     {
         tc->activate(0);
diff --git a/src/arch/sparc/isa/decoder.isa b/src/arch/sparc/isa/decoder.isa
index 70afe19b6..68b2183ad 100644
--- a/src/arch/sparc/isa/decoder.isa
+++ b/src/arch/sparc/isa/decoder.isa
@@ -185,25 +185,25 @@ decode OP default Unknown::unknown()
                     }}, ',a');
                 }
                 default: decode BPCC {
-                    0x0: fbpcc0(22, {{
+                    0x0: fbpfcc0(19, {{
                         if(passesFpCondition(Fsr<11:10>, COND2))
                             NNPC = xc->readPC() + disp;
                         else
                             handle_annul
                     }});
-                    0x1: fbpcc1(22, {{
+                    0x1: fbpfcc1(19, {{
                         if(passesFpCondition(Fsr<33:32>, COND2))
                             NNPC = xc->readPC() + disp;
                         else
                             handle_annul
                     }});
-                    0x2: fbpcc2(22, {{
+                    0x2: fbpfcc2(19, {{
                         if(passesFpCondition(Fsr<35:34>, COND2))
                             NNPC = xc->readPC() + disp;
                         else
                             handle_annul
                     }});
-                    0x3: fbpcc3(22, {{
+                    0x3: fbpfcc3(19, {{
                         if(passesFpCondition(Fsr<37:36>, COND2))
                             NNPC = xc->readPC() + disp;
                         else
@@ -426,19 +426,22 @@ decode OP default Unknown::unknown()
                 {{Rs1<63:> == val2<63:> && val2<63:> != resTemp<63:>}}
             );
             0x24: mulscc({{
-                int64_t resTemp, multiplicand = Rs2_or_imm13;
-                int32_t multiplier = Rs1<31:0>;
                 int32_t savedLSB = Rs1<0:>;
-                multiplier = multiplier<31:1> |
-                    ((Ccr<3:3> ^ Ccr<1:1>) << 32);
-                if(!Y<0:>)
-                    multiplicand = 0;
-                Rd = resTemp = multiplicand + multiplier;
+
+                //Step 1
+                int64_t multiplicand = Rs2_or_imm13;
+                //Step 2
+                int32_t partialP = Rs1<31:1> |
+                    ((Ccr<3:3> ^ Ccr<1:1>) << 31);
+                //Step 3
+                int32_t added = Y<0:> ? multiplicand : 0;
+                Rd = partialP + added;
+                //Steps 4 & 5
                 Y = Y<31:1> | (savedLSB << 31);}},
-                {{((multiplicand<31:0> + multiplier<31:0>)<32:0>)}},
-                {{multiplicand<31:> == multiplier<31:> && multiplier<31:> != resTemp<31:>}},
-                {{((multiplicand >> 1) + (multiplier >> 1) + (multiplicand & multiplier & 0x1))<63:>}},
-                {{multiplicand<63:> == multiplier<63:> && multiplier<63:> != resTemp<63:>}}
+                {{((partialP<31:0> + added<31:0>)<32:0>)}},
+                {{partialP<31:> == added<31:> && added<31:> != Rd<31:>}},
+                {{((partialP >> 1) + (added >> 1) + (partialP & added & 0x1))<63:>}},
+                {{partialP<63:> == added<63:> && partialP<63:> != Rd<63:>}}
             );
         }
         format IntOp
@@ -816,6 +819,58 @@ decode OP default Unknown::unknown()
             }
             0x35: decode OPF{
                 format FpBasic{
+                    0x01: fmovs_fcc0({{
+                        if(passesFpCondition(Fsr<11:10>, COND4))
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0x02: fmovd_fcc0({{
+                        if(passesFpCondition(Fsr<11:10>, COND4))
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0x03: FpUnimpl::fmovq_fcc0();
+                    0x25: fmovrsz({{
+                        if(Rs1 == 0)
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0x26: fmovrdz({{
+                        if(Rs1 == 0)
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0x27: FpUnimpl::fmovrqz();
+                    0x41: fmovs_fcc1({{
+                        if(passesFpCondition(Fsr<33:32>, COND4))
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0x42: fmovd_fcc1({{
+                        if(passesFpCondition(Fsr<33:32>, COND4))
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0x43: FpUnimpl::fmovq_fcc1();
+                    0x45: fmovrslez({{
+                        if(Rs1 <= 0)
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0x46: fmovrdlez({{
+                        if(Rs1 <= 0)
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0x47: FpUnimpl::fmovrqlez();
                     0x51: fcmps({{
                           uint8_t fcc;
                           if(isnan(Frs1s) || isnan(Frs2s))
@@ -874,6 +929,110 @@ decode OP default Unknown::unknown()
                           Fsr = insertBits(Fsr, firstbit +1, firstbit, fcc);
                     }});
                     0x57: FpUnimpl::fcmpeq();
+                    0x65: fmovrslz({{
+                        if(Rs1 < 0)
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0x66: fmovrdlz({{
+                        if(Rs1 < 0)
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0x67: FpUnimpl::fmovrqlz();
+                    0x81: fmovs_fcc2({{
+                        if(passesFpCondition(Fsr<35:34>, COND4))
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0x82: fmovd_fcc2({{
+                        if(passesFpCondition(Fsr<35:34>, COND4))
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0x83: FpUnimpl::fmovq_fcc2();
+                    0xA5: fmovrsnz({{
+                        if(Rs1 != 0)
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0xA6: fmovrdnz({{
+                        if(Rs1 != 0)
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0xA7: FpUnimpl::fmovrqnz();
+                    0xC1: fmovs_fcc3({{
+                        if(passesFpCondition(Fsr<37:36>, COND4))
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0xC2: fmovd_fcc3({{
+                        if(passesFpCondition(Fsr<37:36>, COND4))
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0xC3: FpUnimpl::fmovq_fcc3();
+                    0xC5: fmovrsgz({{
+                        if(Rs1 > 0)
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0xC6: fmovrdgz({{
+                        if(Rs1 > 0)
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0xC7: FpUnimpl::fmovrqgz();
+                    0xE5: fmovrsgez({{
+                        if(Rs1 >= 0)
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0xE6: fmovrdgez({{
+                        if(Rs1 >= 0)
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0xE7: FpUnimpl::fmovrqgez();
+                    0x101: fmovs_icc({{
+                        if(passesCondition(Ccr<3:0>, COND4))
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0x102: fmovd_icc({{
+                        if(passesCondition(Ccr<3:0>, COND4))
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0x103: FpUnimpl::fmovq_icc();
+                    0x181: fmovs_xcc({{
+                        if(passesCondition(Ccr<7:4>, COND4))
+                            Frds = Frs2s;
+                        else
+                            Frds = Frds;
+                    }});
+                    0x182: fmovd_xcc({{
+                        if(passesCondition(Ccr<7:4>, COND4))
+                            Frd = Frs2;
+                        else
+                            Frd = Frd;
+                    }});
+                    0x183: FpUnimpl::fmovq_xcc();
                     default: FailUnimpl::fpop2();
                 }
             }
@@ -1164,7 +1323,16 @@ decode OP default Unknown::unknown()
             0x04: stw({{Mem.uw = Rd.sw;}});
             0x05: stb({{Mem.ub = Rd.sb;}});
             0x06: sth({{Mem.uhw = Rd.shw;}});
-            0x07: sttw({{Mem.udw = RdLow<31:0> | (RdHigh<31:0> << 32);}});
+            0x07: sttw({{
+                      //This temporary needs to be here so that the parser
+                      //will correctly identify this instruction as a store.
+                      //It's probably either the parenthesis or referencing
+                      //the member variable that throws confuses it.
+                      Twin32_t temp;
+                      temp.a = RdLow<31:0>;
+                      temp.b = RdHigh<31:0>;
+                      Mem.tuw = temp;
+                  }});
         }
         format Load {
             0x08: ldsw({{Rd = (int32_t)Mem.sw;}});
@@ -1254,7 +1422,16 @@ decode OP default Unknown::unknown()
             0x14: stwa({{Mem.uw = Rd;}}, {{EXT_ASI}});
             0x15: stba({{Mem.ub = Rd;}}, {{EXT_ASI}});
             0x16: stha({{Mem.uhw = Rd;}}, {{EXT_ASI}});
-            0x17: sttwa({{Mem.udw = RdLow<31:0> | RdHigh<31:0> << 32;}}, {{EXT_ASI}});
+            0x17: sttwa({{
+                      //This temporary needs to be here so that the parser
+                      //will correctly identify this instruction as a store.
+                      //It's probably either the parenthesis or referencing
+                      //the member variable that throws confuses it.
+                      Twin32_t temp;
+                      temp.a = RdLow<31:0>;
+                      temp.b = RdHigh<31:0>;
+                      Mem.tuw = temp;
+                  }}, {{EXT_ASI}});
         }
         format LoadAlt {
             0x18: ldswa({{Rd = (int32_t)Mem.sw;}}, {{EXT_ASI}});
diff --git a/src/arch/sparc/isa/formats/branch.isa b/src/arch/sparc/isa/formats/branch.isa
index 5cd3ab598..f5ab940bb 100644
--- a/src/arch/sparc/isa/formats/branch.isa
+++ b/src/arch/sparc/isa/formats/branch.isa
@@ -40,7 +40,7 @@ output header {{
         {
           protected:
             // Constructor
-            Branch(const char *mnem, MachInst _machInst, OpClass __opClass) :
+            Branch(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
                 SparcStaticInst(mnem, _machInst, __opClass)
             {
             }
@@ -56,7 +56,7 @@ output header {{
         {
           protected:
             // Constructor
-            BranchDisp(const char *mnem, MachInst _machInst,
+            BranchDisp(const char *mnem, ExtMachInst _machInst,
                     OpClass __opClass) :
                 Branch(mnem, _machInst, __opClass)
             {
@@ -76,7 +76,7 @@ output header {{
         {
           protected:
             // Constructor
-            BranchNBits(const char *mnem, MachInst _machInst,
+            BranchNBits(const char *mnem, ExtMachInst _machInst,
                     OpClass __opClass) :
                 BranchDisp(mnem, _machInst, __opClass)
             {
@@ -91,7 +91,7 @@ output header {{
         {
           protected:
             // Constructor
-            BranchSplit(const char *mnem, MachInst _machInst,
+            BranchSplit(const char *mnem, ExtMachInst _machInst,
                     OpClass __opClass) :
                 BranchDisp(mnem, _machInst, __opClass)
             {
@@ -107,7 +107,7 @@ output header {{
         {
           protected:
             // Constructor
-            BranchImm13(const char *mnem, MachInst _machInst, OpClass __opClass) :
+            BranchImm13(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
                 Branch(mnem, _machInst, __opClass), imm(sext<13>(SIMM13))
             {
             }
diff --git a/src/arch/sparc/isa/formats/mem/util.isa b/src/arch/sparc/isa/formats/mem/util.isa
index 1d884d6c3..dfe937371 100644
--- a/src/arch/sparc/isa/formats/mem/util.isa
+++ b/src/arch/sparc/isa/formats/mem/util.isa
@@ -224,7 +224,7 @@ def template StoreExecute {{
             }
             if(storeCond && fault == NoFault)
             {
-                fault = xc->write((uint%(mem_acc_size)s_t)Mem,
+                fault = xc->write((%(mem_acc_type)s%(mem_acc_size)s_t)Mem,
                         EA, %(asi_val)s, 0);
             }
             if(fault == NoFault)
@@ -257,7 +257,7 @@ def template StoreInitiateAcc {{
             }
             if(storeCond && fault == NoFault)
             {
-                fault = xc->write((uint%(mem_acc_size)s_t)Mem,
+                fault = xc->write((%(mem_acc_type)s%(mem_acc_size)s_t)Mem,
                         EA, %(asi_val)s, 0);
             }
             if(fault == NoFault)
diff --git a/src/arch/sparc/linux/syscalls.cc b/src/arch/sparc/linux/syscalls.cc
index 2c2902f9e..03c8bafe2 100644
--- a/src/arch/sparc/linux/syscalls.cc
+++ b/src/arch/sparc/linux/syscalls.cc
@@ -132,7 +132,7 @@ SyscallDesc SparcLinuxProcess::syscall32Descs[] = {
     /*  40 */ SyscallDesc("lstat", unimplementedFunc),
     /*  41 */ SyscallDesc("dup", unimplementedFunc),
     /*  42 */ SyscallDesc("pipe", pipePseudoFunc),
-    /*  43 */ SyscallDesc("times", unimplementedFunc),
+    /*  43 */ SyscallDesc("times", ignoreFunc),
     /*  44 */ SyscallDesc("getuid32", unimplementedFunc),
     /*  45 */ SyscallDesc("umount2", unimplementedFunc), //32 bit
     /*  46 */ SyscallDesc("setgid", unimplementedFunc), //32 bit
@@ -217,7 +217,7 @@ SyscallDesc SparcLinuxProcess::syscall32Descs[] = {
     /* 125 */ SyscallDesc("recvfrom", unimplementedFunc),
     /* 126 */ SyscallDesc("setreuid", unimplementedFunc), //32 bit
     /* 127 */ SyscallDesc("setregid", unimplementedFunc), //32 bit
-    /* 128 */ SyscallDesc("rename", unimplementedFunc),
+    /* 128 */ SyscallDesc("rename", renameFunc),
     /* 129 */ SyscallDesc("truncate", unimplementedFunc),
     /* 130 */ SyscallDesc("ftruncate", unimplementedFunc),
     /* 131 */ SyscallDesc("flock", unimplementedFunc),
@@ -320,7 +320,7 @@ SyscallDesc SparcLinuxProcess::syscall32Descs[] = {
     /* 228 */ SyscallDesc("setfsuid", unimplementedFunc), //32 bit
     /* 229 */ SyscallDesc("setfsgid", unimplementedFunc), //32 bit
     /* 230 */ SyscallDesc("_newselect", unimplementedFunc), //32 bit
-    /* 231 */ SyscallDesc("time", unimplementedFunc),
+    /* 231 */ SyscallDesc("time", ignoreFunc),
     /* 232 */ SyscallDesc("oldstat", unimplementedFunc),
     /* 233 */ SyscallDesc("stime", unimplementedFunc),
     /* 234 */ SyscallDesc("statfs64", unimplementedFunc),
@@ -435,7 +435,7 @@ SyscallDesc SparcLinuxProcess::syscallDescs[] = {
     /* 40 */ SyscallDesc("lstat", unimplementedFunc),
     /* 41 */ SyscallDesc("dup", unimplementedFunc),
     /* 42 */ SyscallDesc("pipe", pipePseudoFunc),
-    /* 43 */ SyscallDesc("times", unimplementedFunc),
+    /* 43 */ SyscallDesc("times", ignoreFunc),
     /* 44 */ SyscallDesc("getuid32", unimplementedFunc),
     /* 45 */ SyscallDesc("umount2", unimplementedFunc),
     /* 46 */ SyscallDesc("setgid", unimplementedFunc),
@@ -520,7 +520,7 @@ SyscallDesc SparcLinuxProcess::syscallDescs[] = {
     /* 125 */ SyscallDesc("recvfrom", unimplementedFunc),
     /* 126 */ SyscallDesc("setreuid", unimplementedFunc),
     /* 127 */ SyscallDesc("setregid", unimplementedFunc),
-    /* 128 */ SyscallDesc("rename", unimplementedFunc),
+    /* 128 */ SyscallDesc("rename", renameFunc),
     /* 129 */ SyscallDesc("truncate", unimplementedFunc),
     /* 130 */ SyscallDesc("ftruncate", unimplementedFunc),
     /* 131 */ SyscallDesc("flock", unimplementedFunc),
@@ -623,7 +623,7 @@ SyscallDesc SparcLinuxProcess::syscallDescs[] = {
     /* 228 */ SyscallDesc("setfsuid", unimplementedFunc),
     /* 229 */ SyscallDesc("setfsgid", unimplementedFunc),
     /* 230 */ SyscallDesc("_newselect", unimplementedFunc),
-    /* 231 */ SyscallDesc("time", unimplementedFunc),
+    /* 231 */ SyscallDesc("time", ignoreFunc),
     /* 232 */ SyscallDesc("oldstat", unimplementedFunc),
     /* 233 */ SyscallDesc("stime", unimplementedFunc),
     /* 234 */ SyscallDesc("statfs64", unimplementedFunc),
diff --git a/src/arch/sparc/miscregfile.cc b/src/arch/sparc/miscregfile.cc
index f3be1c4c2..5bd572d38 100644
--- a/src/arch/sparc/miscregfile.cc
+++ b/src/arch/sparc/miscregfile.cc
@@ -646,7 +646,12 @@ void MiscRegFile::setReg(int miscReg,
 #endif
         return;
       case MISCREG_CWP:
-        new_val = val > NWindows ? NWindows - 1 : val;
+        new_val = val >= NWindows ? NWindows - 1 : val;
+        if (val >= NWindows) {
+            new_val = NWindows - 1;
+            warn("Attempted to set the CWP to %d with NWindows = %d\n",
+                    val, NWindows);
+        }
         tc->changeRegFileContext(CONTEXT_CWP, new_val);
         break;
       case MISCREG_GL:
diff --git a/src/arch/sparc/predecoder.hh b/src/arch/sparc/predecoder.hh
new file mode 100644
index 000000000..4a8c9dc4a
--- /dev/null
+++ b/src/arch/sparc/predecoder.hh
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_SPARC_PREDECODER_HH__
+#define __ARCH_SPARC_PREDECODER_HH__
+
+#include "arch/sparc/types.hh"
+#include "base/misc.hh"
+#include "cpu/thread_context.hh"
+#include "sim/host.hh"
+
+class ThreadContext;
+
+namespace SparcISA
+{
+    class Predecoder
+    {
+      protected:
+        ThreadContext * tc;
+        //The extended machine instruction being generated
+        ExtMachInst emi;
+
+      public:
+        Predecoder(ThreadContext * _tc) : tc(_tc)
+        {}
+
+        ThreadContext * getTC()
+        {
+            return tc;
+        }
+
+        void setTC(ThreadContext * _tc)
+        {
+            tc = _tc;
+        }
+
+        void process()
+        {
+        }
+
+        //Use this to give data to the predecoder. This should be used
+        //when there is control flow.
+        void moreBytes(Addr currPC, Addr off, MachInst inst)
+        {
+            assert(off == 0);
+
+            emi = inst;
+            //The I bit, bit 13, is used to figure out where the ASI
+            //should come from. Use that in the ExtMachInst. This is
+            //slightly redundant, but it removes the need to put a condition
+            //into all the execute functions
+            if(inst & (1 << 13))
+                emi |= (static_cast<ExtMachInst>(
+                            tc->readMiscRegNoEffect(MISCREG_ASI))
+                        << (sizeof(MachInst) * 8));
+            else
+                emi |= (static_cast<ExtMachInst>(bits(inst, 12, 5))
+                        << (sizeof(MachInst) * 8));
+        }
+
+        //Use this to give data to the predecoder. This should be used
+        //when instructions are executed in order.
+        void moreBytes(MachInst machInst)
+        {
+            moreBytes(0, 0, machInst);
+        }
+
+        bool needMoreBytes()
+        {
+            return true;
+        }
+
+        bool extMachInstReady()
+        {
+            return true;
+        }
+
+        //This returns a constant reference to the ExtMachInst to avoid a copy
+        const ExtMachInst & getExtMachInst()
+        {
+            return emi;
+        }
+    };
+};
+
+#endif // __ARCH_SPARC_PREDECODER_HH__
diff --git a/src/arch/sparc/ua2005.cc b/src/arch/sparc/ua2005.cc
index 865280038..48e97a531 100644
--- a/src/arch/sparc/ua2005.cc
+++ b/src/arch/sparc/ua2005.cc
@@ -195,6 +195,7 @@ MiscRegFile::setFSReg(int miscReg, const MiscReg &val, ThreadContext *tc)
             panic("No support for setting spec_en bit\n");
         setRegNoEffect(miscReg, bits(val,0,0));
         if (!bits(val,0,0)) {
+            DPRINTF(Quiesce, "Cpu executed quiescing instruction\n");
             // Time to go to sleep
             tc->suspend();
             if (tc->getKernelStats())
@@ -307,7 +308,7 @@ MiscRegFile::processSTickCompare(ThreadContext *tc)
         tc->getCpuPtr()->instCount();
     assert(ticks >= 0 && "stick compare missed interrupt cycle");
 
-    if (ticks == 0) {
+    if (ticks == 0 || tc->status() == ThreadContext::Suspended) {
         DPRINTF(Timer, "STick compare cycle reached at %#x\n",
                 (stick_cmpr & mask(63)));
         if (!(tc->readMiscRegNoEffect(MISCREG_STICK_CMPR) & (ULL(1) << 63))) {
@@ -324,11 +325,15 @@ MiscRegFile::processHSTickCompare(ThreadContext *tc)
     // we're actually at the correct cycle or we need to wait a little while
     // more
     int ticks;
+    if ( tc->status() == ThreadContext::Halted ||
+         tc->status() == ThreadContext::Unallocated)
+       return;
+
     ticks = ((int64_t)(hstick_cmpr & mask(63)) - (int64_t)stick) -
         tc->getCpuPtr()->instCount();
     assert(ticks >= 0 && "hstick compare missed interrupt cycle");
 
-    if (ticks == 0) {
+    if (ticks == 0 || tc->status() == ThreadContext::Suspended) {
         DPRINTF(Timer, "HSTick compare cycle reached at %#x\n",
                 (stick_cmpr & mask(63)));
         if (!(tc->readMiscRegNoEffect(MISCREG_HSTICK_CMPR) & (ULL(1) << 63))) {
diff --git a/src/arch/sparc/utility.hh b/src/arch/sparc/utility.hh
index dc9201401..1458231f2 100644
--- a/src/arch/sparc/utility.hh
+++ b/src/arch/sparc/utility.hh
@@ -48,22 +48,6 @@ namespace SparcISA
                 tc->readMiscRegNoEffect(MISCREG_HPSTATE & (1 << 2)));
     }
 
-    inline ExtMachInst
-    makeExtMI(MachInst inst, ThreadContext * xc) {
-        ExtMachInst emi = (MachInst) inst;
-        //The I bit, bit 13, is used to figure out where the ASI
-        //should come from. Use that in the ExtMachInst. This is
-        //slightly redundant, but it removes the need to put a condition
-        //into all the execute functions
-        if(inst & (1 << 13))
-            emi |= (static_cast<ExtMachInst>(xc->readMiscRegNoEffect(MISCREG_ASI))
-                    << (sizeof(MachInst) * 8));
-        else
-            emi |= (static_cast<ExtMachInst>(bits(inst, 12, 5))
-                    << (sizeof(MachInst) * 8));
-        return emi;
-    }
-
     inline bool isCallerSaveIntegerRegister(unsigned int reg) {
         panic("register classification not implemented");
         return false;
diff --git a/src/arch/x86/SConscript b/src/arch/x86/SConscript
index f49225758..2e2c5b006 100644
--- a/src/arch/x86/SConscript
+++ b/src/arch/x86/SConscript
@@ -84,11 +84,12 @@
 # Authors: Gabe Black
 
 Import('*')
-
 if env['TARGET_ISA'] == 'x86':
     Source('floatregfile.cc')
     Source('intregfile.cc')
     Source('miscregfile.cc')
+    Source('predecoder.cc')
+    Source('predecoder_tables.cc')
     Source('regfile.cc')
     Source('remote_gdb.cc')
 
diff --git a/src/arch/x86/intregfile.hh b/src/arch/x86/intregfile.hh
index da631d444..f7b03f0f0 100644
--- a/src/arch/x86/intregfile.hh
+++ b/src/arch/x86/intregfile.hh
@@ -88,8 +88,9 @@
 #ifndef __ARCH_X86_INTREGFILE_HH__
 #define __ARCH_X86_INTREGFILE_HH__
 
-#include "arch/x86/x86_traits.hh"
+#include "arch/x86/intregs.hh"
 #include "arch/x86/types.hh"
+#include "arch/x86/x86_traits.hh"
 
 #include <string>
 
@@ -102,7 +103,7 @@ namespace X86ISA
     //This function translates integer register file indices into names
     std::string getIntRegName(RegIndex);
 
-    const int NumIntArchRegs = 16;
+    const int NumIntArchRegs = NUM_INTREGS;
     const int NumIntRegs = NumIntArchRegs + NumMicroIntRegs;
 
     class IntRegFile
diff --git a/src/arch/x86/intregs.hh b/src/arch/x86/intregs.hh
index 3fe25bd5f..ed801cc48 100644
--- a/src/arch/x86/intregs.hh
+++ b/src/arch/x86/intregs.hh
@@ -77,7 +77,8 @@ namespace X86ISA
         INTREG_R12W,
         INTREG_R13W,
         INTREG_R14W,
-        INTREG_R15W
+        INTREG_R15W,
+        NUM_INTREGS
     };
 };
 
diff --git a/src/arch/x86/isa/base.isa b/src/arch/x86/isa/base.isa
new file mode 100644
index 000000000..4776f7a7e
--- /dev/null
+++ b/src/arch/x86/isa/base.isa
@@ -0,0 +1,181 @@
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Base class for sparc instructions, and some support functions
+//
+
+output header {{
+
+        /**
+         * Base class for all X86 static instructions.
+         */
+        class X86StaticInst : public StaticInst
+        {
+          protected:
+            // Constructor.
+            X86StaticInst(const char *mnem,
+                 ExtMachInst _machInst, OpClass __opClass)
+                    : StaticInst(mnem, _machInst, __opClass)
+                {
+                }
+
+            std::string generateDisassembly(Addr pc,
+                const SymbolTable *symtab) const;
+
+            void printReg(std::ostream &os, int reg) const;
+            void printSrcReg(std::ostream &os, int reg) const;
+            void printDestReg(std::ostream &os, int reg) const;
+        };
+}};
+
+output decoder {{
+
+        inline void printMnemonic(std::ostream &os, const char * mnemonic)
+        {
+            ccprintf(os, "\t%s   ", mnemonic);
+        }
+
+        void
+        X86StaticInst::printSrcReg(std::ostream &os, int reg) const
+        {
+            if(_numSrcRegs > reg)
+                printReg(os, _srcRegIdx[reg]);
+        }
+
+        void
+        X86StaticInst::printDestReg(std::ostream &os, int reg) const
+        {
+            if(_numDestRegs > reg)
+                printReg(os, _destRegIdx[reg]);
+        }
+
+        void
+        X86StaticInst::printReg(std::ostream &os, int reg) const
+        {
+            if (reg < FP_Base_DepTag) {
+                //FIXME These should print differently depending on the
+                //mode etc, but for now this will get the point across
+                switch (reg) {
+                  case INTREG_RAX:
+                    ccprintf(os, "rax");
+                    break;
+                  case INTREG_RBX:
+                    ccprintf(os, "rbx");
+                    break;
+                  case INTREG_RCX:
+                    ccprintf(os, "rcx");
+                    break;
+                  case INTREG_RDX:
+                    ccprintf(os, "rdx");
+                    break;
+                  case INTREG_RSP:
+                    ccprintf(os, "rsp");
+                    break;
+                  case INTREG_RBP:
+                    ccprintf(os, "rbp");
+                    break;
+                  case INTREG_RSI:
+                    ccprintf(os, "rsi");
+                    break;
+                  case INTREG_RDI:
+                    ccprintf(os, "rdi");
+                    break;
+                  case INTREG_R8W:
+                    ccprintf(os, "r8");
+                    break;
+                  case INTREG_R9W:
+                    ccprintf(os, "r9");
+                    break;
+                  case INTREG_R10W:
+                    ccprintf(os, "r10");
+                    break;
+                  case INTREG_R11W:
+                    ccprintf(os, "r11");
+                    break;
+                  case INTREG_R12W:
+                    ccprintf(os, "r12");
+                    break;
+                  case INTREG_R13W:
+                    ccprintf(os, "r13");
+                    break;
+                  case INTREG_R14W:
+                    ccprintf(os, "r14");
+                    break;
+                  case INTREG_R15W:
+                    ccprintf(os, "r15");
+                    break;
+                }
+            } else if (reg < Ctrl_Base_DepTag) {
+                ccprintf(os, "%%f%d", reg - FP_Base_DepTag);
+            } else {
+                switch (reg - Ctrl_Base_DepTag) {
+                  default:
+                    ccprintf(os, "%%ctrl%d", reg - Ctrl_Base_DepTag);
+                }
+            }
+        }
+
+        std::string X86StaticInst::generateDisassembly(Addr pc,
+            const SymbolTable *symtab) const
+        {
+            std::stringstream ss;
+
+            printMnemonic(ss, mnemonic);
+
+            return ss.str();
+        }
+}};
diff --git a/src/arch/x86/isa/bitfields.isa b/src/arch/x86/isa/bitfields.isa
index 47aec4fa1..fff324caa 100644
--- a/src/arch/x86/isa/bitfields.isa
+++ b/src/arch/x86/isa/bitfields.isa
@@ -58,5 +58,30 @@
 // Bitfield definitions.
 //
 
-def bitfield EXAMPLE	<24>;
+//Prefixes
+def bitfield REX rex;
+def bitfield LEGACY legacy;
 
+// Pieces of the opcode
+def bitfield OPCODE_NUM opcode.num;
+def bitfield OPCODE_PREFIXA opcode.prefixA;
+def bitfield OPCODE_PREFIXB opcode.prefixB;
+def bitfield OPCODE_OP opcode.op;
+//The top 5 bits of the opcode tend to split the instructions into groups
+def bitfield OPCODE_OP_TOP5 opcode.op.top5;
+def bitfield OPCODE_OP_BOTTOM3 opcode.op.bottom3;
+
+// Immediate fields
+def bitfield IMMEDIATE immediate;
+def bitfield DISPLACEMENT displacement;
+
+//Modifier bytes
+def bitfield MODRM modRM;
+def bitfield MODRM_MOD modRM.mod;
+def bitfield MODRM_REG modRM.reg;
+def bitfield MODRM_RM modRM.rm;
+
+def bitfield SIB sib;
+def bitfield SIB_SCALE sib.scale;
+def bitfield SIB_INDEX sib.index;
+def bitfield SIB_BASE sib.base;
diff --git a/src/arch/x86/isa/decoder/decoder.isa b/src/arch/x86/isa/decoder/decoder.isa
new file mode 100644
index 000000000..20f31f882
--- /dev/null
+++ b/src/arch/x86/isa/decoder/decoder.isa
@@ -0,0 +1,89 @@
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// The actual decoder specification
+//
+
+decode OPCODE_NUM default Unknown::unknown()
+{
+    0x0: M5InternalError::error(
+        {{"Saw an ExtMachInst with zero opcode bytes!"}});
+    //1 byte opcodes
+    ##include "one_byte_opcodes.isa"
+    //2 byte opcodes
+    ##include "two_byte_opcodes.isa"
+    //3 byte opcodes
+    0x3: decode OPCODE_PREFIXA {
+        0xF0: decode OPCODE_PREFIXB {
+            //We don't handle these properly in the predecoder yet, so there's
+            //no reason to implement them for now.
+            0x38: decode OPCODE_OP {
+                default: FailUnimpl::sseThreeEight();
+            }
+            0x3A: decode OPCODE_OP {
+                default: FailUnimpl::sseThreeA();
+            }
+            0xF0: decode OPCODE_OP {
+                default: FailUnimpl::threednow();
+            }
+            default: M5InternalError::error(
+                {{"Unexpected second opcode byte in three byte opcode!"}});
+        }
+        default: M5InternalError::error(
+            {{"Unexpected first opcode byte in three byte opcode!"}});
+    }
+}
diff --git a/src/arch/x86/isa/decoder/one_byte_opcodes.isa b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
new file mode 100644
index 000000000..c56a8bf92
--- /dev/null
+++ b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
@@ -0,0 +1,398 @@
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Decode the one byte opcodes
+//
+
+0x1: decode OPCODE_OP_TOP5 {
+    format WarnUnimpl {
+        0x00: decode OPCODE_OP_BOTTOM3 {
+            0x6: push_ES();
+            0x7: pop_ES();
+            default: MultiOp::add(
+                {{out1 = in1 + in2}},
+                OPCODE_OP_BOTTOM3,
+                [[Eb,Gb],[Ev,Gv],
+                 [Gb,Eb],[Gv,Ev],
+                 [Al,Ib],[rAx,Iz]]);
+        }
+        0x01: decode OPCODE_OP_BOTTOM3 {
+            0x0: or_Eb_Gb();
+            0x1: or_Ev_Gv();
+            0x2: or_Gb_Eb();
+            0x3: or_Gv_Ev();
+            0x4: or_Al_Ib();
+            0x5: or_rAX_Iz();
+            0x6: push_CS();
+            //Any time this is seen, it should generate a two byte opcode
+            0x7: M5InternalError::error(
+                {{"Saw a one byte opcode whose value was 0x0F!"}});
+        }
+        0x02: decode OPCODE_OP_BOTTOM3 {
+            0x0: adc_Eb_Gb();
+            0x1: adc_Ev_Gv();
+            0x2: adc_Gb_Eb();
+            0x3: adc_Gv_Ev();
+            0x4: adc_Al_Ib();
+            0x5: adc_rAX_Iz();
+            0x6: push_SS();
+            0x7: pop_SS();
+        }
+        0x03: decode OPCODE_OP_BOTTOM3 {
+            0x0: sbb_Eb_Gb();
+            0x1: sbb_Ev_Gv();
+            0x2: sbb_Gb_Eb();
+            0x3: sbb_Gv_Ev();
+            0x4: sbb_Al_Ib();
+            0x5: sbb_rAX_Iz();
+            0x6: push_DS();
+            0x7: pop_DS();
+        }
+        0x04: decode OPCODE_OP_BOTTOM3 {
+            0x0: and_Eb_Gb();
+            0x1: and_Ev_Gv();
+            0x2: and_Gb_Eb();
+            0x3: and_Gv_Ev();
+            0x4: and_Al_Ib();
+            0x5: and_rAX_Iz();
+            0x6: M5InternalError::error(
+                {{"Tried to execute the ES segment override prefix!"}});
+            0x7: daa();
+        }
+        0x05: decode OPCODE_OP_BOTTOM3 {
+            0x0: sub_Eb_Gb();
+            0x1: sub_Ev_Gv();
+            0x2: sub_Gb_Eb();
+            0x3: sub_Gv_Ev();
+            0x4: sub_Al_Ib();
+            0x5: sub_rAX_Iz();
+            0x6: M5InternalError::error(
+                {{"Tried to execute the CS segment override prefix!"}});
+            0x7: das();
+        }
+        0x06: decode OPCODE_OP_BOTTOM3 {
+            0x0: xor_Eb_Gb();
+            0x1: xor_Ev_Gv();
+            0x2: xor_Gb_Eb();
+            0x3: xor_Gv_Ev();
+            0x4: xor_Al_Ib();
+            0x5: xor_rAX_Iz();
+            0x6: M5InternalError::error(
+                {{"Tried to execute the SS segment override prefix!"}});
+            0x7: aaa();
+        }
+        0x07: decode OPCODE_OP_BOTTOM3 {
+            0x0: cmp_Eb_Gb();
+            0x1: cmp_Ev_Gv();
+            0x2: cmp_Gb_Eb();
+            0x3: cmp_Gv_Ev();
+            0x4: cmp_Al_Ib();
+            0x5: cmp_rAX_Iz();
+            0x6: M5InternalError::error(
+                {{"Tried to execute the DS segment override prefix!"}});
+            0x7: aas();
+        }
+        0x08: decode OPCODE_OP_BOTTOM3 {
+            0x0: inc_eAX();
+            0x1: inc_eCX();
+            0x2: inc_eDX();
+            0x3: inc_eBX();
+            0x4: inc_eSP();
+            0x5: inc_eBP();
+            0x6: inc_eSI();
+            0x7: inc_eDI();
+        }
+        0x09: decode OPCODE_OP_BOTTOM3 {
+            0x0: dec_eAX();
+            0x1: dec_eCX();
+            0x2: dec_eDX();
+            0x3: dec_eBX();
+            0x4: dec_eSP();
+            0x5: dec_eBP();
+            0x6: dec_eSI();
+            0x7: dec_eDI();
+        }
+        0x0A: decode OPCODE_OP_BOTTOM3 {
+            0x0: push_rAX();
+            0x1: push_rCX();
+            0x2: push_rDX();
+            0x3: push_rBX();
+            0x4: push_rSP();
+            0x5: push_rBP();
+            0x6: push_rSI();
+            0x7: push_rDI();
+        }
+        0x0B: decode OPCODE_OP_BOTTOM3 {
+            0x0: pop_rAX();
+            0x1: pop_rCX();
+            0x2: pop_rDX();
+            0x3: pop_rBX();
+            0x4: pop_rSP();
+            0x5: pop_rBP();
+            0x6: pop_rSI();
+            0x7: pop_rDI();
+        }
+        0x0C: decode OPCODE_OP_BOTTOM3 {
+            0x0: pusha();
+            0x1: popa();
+            0x2: bound_Gv_Ma();
+            0x3: arpl_Ew_Gw();
+            0x4: M5InternalError::error(
+                {{"Tried to execute the FS segment override prefix!"}});
+            0x5: M5InternalError::error(
+                {{"Tried to execute the GS segment override prefix!"}});
+            0x6: M5InternalError::error(
+                {{"Tried to execute the operand size override prefix!"}});
+            0x7: M5InternalError::error(
+                {{"Tried to execute the DS address size override prefix!"}});
+        }
+        0x0D: decode OPCODE_OP_BOTTOM3 {
+            0x0: push_Iz();
+            0x1: imul_Gv_Ev_Iz();
+            0x2: push_Ib();
+            0x3: imul_Gv_Ev_Ib();
+            0x4: ins_Yb_Dx();
+            0x5: ins_Yz_Dx();
+            0x6: outs_Dx_Xb();
+            0x7: outs_Dx_Xz();
+        }
+        0x0E: decode OPCODE_OP_BOTTOM3 {
+            0x0: jo_Jb();
+            0x1: jno_Jb();
+            0x2: jb_Jb();
+            0x3: jnb_Jb();
+            0x4: jz_Jb();
+            0x5: jnz_Jb();
+            0x6: jbe_Jb();
+            0x7: jnbe_Jb();
+        }
+        0x0F: decode OPCODE_OP_BOTTOM3 {
+            0x0: js_Jb();
+            0x1: jns_Jb();
+            0x2: jp_Jb();
+            0x3: jnp_Jb();
+            0x4: jl_Jb();
+            0x5: jnl_Jb();
+            0x6: jle_Jb();
+            0x7: jnke_Jb();
+        }
+        0x10: decode OPCODE_OP_BOTTOM3 {
+            0x0: group1_Eb_Ib();
+            0x1: group1_Ev_Iz();
+            0x2: group1_Eb_Ib();
+            0x3: group1_Ev_Ib();
+            0x4: test_Eb_Gb();
+            0x5: test_Ev_Gv();
+            0x6: xchg_Eb_Gb();
+            0x7: xchg_Ev_Gv();
+        }
+        0x11: decode OPCODE_OP_BOTTOM3 {
+            0x0: mov_Eb_Gb();
+            0x1: mov_Ev_Gv();
+            0x2: mov_Gb_Eb();
+            0x3: mov_Gv_Ev();
+            0x4: mov_MwRv_Sw();
+            0x5: lea_Gv_M();
+            0x6: mov_Sw_MwRv();
+            0x7: group10_Ev(); //Make sure this is Ev
+        }
+        0x12: decode OPCODE_OP_BOTTOM3 {
+            0x0: nop_or_pause(); //Check for repe prefix
+            0x1: xchg_rCX_rAX();
+            0x2: xchg_rDX_rAX();
+            0x3: xchg_rVX_rAX();
+            0x4: xchg_rSP_rAX();
+            0x5: xchg_rBP_rAX();
+            0x6: xchg_rSI_rAX();
+            0x7: xchg_rDI_rAX();
+        }
+        0x13: decode OPCODE_OP_BOTTOM3 {
+            0x0: cbw_or_cwde_or_cdqe_rAX();
+            0x1: cwd_or_cdq_or_cqo_rAX_rDX();
+            0x2: call_Ap();
+            0x3: fwait(); //aka wait
+            0x4: pushf_Fv();
+            0x5: popf_Fv();
+            0x6: sahf();
+            0x7: lahf();
+        }
+        0x14: decode OPCODE_OP_BOTTOM3 {
+            0x0: mov_Al_Ob();
+            0x1: mov_rAX_Ov();
+            0x2: mov_Ob_Al();
+            0x3: mov_Ov_rAX();
+            0x4: movs_Yb_Xb();
+            0x5: movs_Yv_Xv();
+            0x6: cmps_Yb_Xb();
+            0x7: cmps_Yv_Xv();
+        }
+        0x15: decode OPCODE_OP_BOTTOM3 {
+            0x0: test_Al_Ib();
+            0x1: test_rAX_Iz();
+            0x2: stos_Yb_Al();
+            0x3: stos_Yv_rAX();
+            0x4: lods_Al_Xb();
+            0x5: lods_rAX_Xv();
+            0x6: scas_Yb_Al();
+            0x7: scas_Yv_rAX();
+        }
+        0x16: decode OPCODE_OP_BOTTOM3 {
+            0x0: mov_Al_Ib();
+            0x1: mov_Cl_Ib();
+            0x2: mov_Dl_Ib();
+            0x3: mov_Bl_Ib();
+            0x4: mov_Ah_Ib();
+            0x5: mov_Ch_Ib();
+            0x6: mov_Dh_Ib();
+            0x7: mov_Bh_Ib();
+        }
+        0x17: decode OPCODE_OP_BOTTOM3 {
+            0x0: mov_rAX_Iv();
+            0x1: mov_rCX_Iv();
+            0x2: mov_rDX_Iv();
+            0x3: mov_rBX_Iv();
+            0x4: mov_rSP_Iv();
+            0x5: mov_rBP_Iv();
+            0x6: mov_rSI_Iv();
+            0x7: mov_rDI_Iv();
+        }
+        0x18: decode OPCODE_OP_BOTTOM3 {
+            0x0: group2_Eb_Ib();
+            0x1: group2_Ev_Ib();
+            0x2: ret_near_Iw();
+            0x3: ret_near();
+            0x4: les_Gz_Mp();
+            0x5: lds_Gz_Mp();
+            0x6: group12_Eb_Ib();
+            0x7: group12_Ev_Iz();
+        }
+        0x19: decode OPCODE_OP_BOTTOM3 {
+            0x0: enter_Iw_Ib();
+            0x1: leave();
+            0x2: ret_far_Iw();
+            0x3: ret_far();
+            0x4: int3();
+            0x5: int_Ib();
+            0x6: into();
+            0x7: iret();
+        }
+        0x1A: decode OPCODE_OP_BOTTOM3 {
+            0x0: group2_Eb_1();
+            0x1: group2_Ev_1();
+            0x2: group2_Eb_Cl();
+            0x3: group2_Ev_Cl();
+            0x4: aam_Ib();
+            0x5: aad_Ib();
+            0x6: salc();
+            0x7: xlat();
+        }
+        0x1B: decode OPCODE_OP_BOTTOM3 {
+            0x0: esc0();
+            0x1: esc1();
+            0x2: esc2();
+            0x3: esc3();
+            0x4: esc4();
+            0x5: esc5();
+            0x6: esc6();
+            0x7: esc7();
+        }
+        0x1C: decode OPCODE_OP_BOTTOM3 {
+            0x0: loopne_Jb();
+            0x1: loope_Jb();
+            0x2: loop_Jb();
+            0x3: jcxz_or_jecx_or_jrcx();
+            0x4: in_Al_Ib();
+            0x5: in_eAX_Ib();
+            0x6: out_Ib_Al();
+            0x7: out_Ib_eAX();
+        }
+        0x1D: decode OPCODE_OP_BOTTOM3 {
+            0x0: call_Jz();
+            0x1: jmp_Jz();
+            0x2: jmp_Ap();
+            0x3: jmp_Jb();
+            0x4: in_Al_Dx();
+            0x5: in_eAX_Dx();
+            0x6: out_Dx_Al();
+            0x7: out_Dx_eAX();
+        }
+        0x1E: decode OPCODE_OP_BOTTOM3 {
+            0x0: M5InternalError::error(
+                {{"Tried to execute the lock prefix!"}});
+            0x1: int1();
+            0x2: M5InternalError::error(
+                {{"Tried to execute the repne prefix!"}});
+            0x3: M5InternalError::error(
+                {{"Tried to execute the rep/repe prefix!"}});
+            0x4: hlt();
+            0x5: cmc();
+            0x6: group3_Eb();
+            0x7: group3_Ev();
+        }
+        0x1F: decode OPCODE_OP_BOTTOM3 {
+            0x0: clc();
+            0x1: stc();
+            0x2: cli();
+            0x3: sti();
+            0x4: cld();
+            0x5: std();
+            0x6: group4();
+            0x7: group5();
+        }
+    }
+    default: FailUnimpl::oneByteOps();
+}
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
new file mode 100644
index 000000000..f05c33bdb
--- /dev/null
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -0,0 +1,393 @@
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Decode the two byte opcodes
+//
+0x2: decode OPCODE_PREFIXA {
+    0xF0: decode OPCODE_OP_TOP5 {
+        format WarnUnimpl {
+            0x00: decode OPCODE_OP_BOTTOM3 {
+                0x00: group6();
+                0x01: group7();
+                0x02: lar_Gv_Ew();
+                0x03: lsl_Gv_Ew();
+                //sandpile.org doesn't seem to know what this is... ?
+                0x04: loadall_or_reset_or_hang();
+                //sandpile.org says (AMD) after syscall, so I might want to check
+                //if that means amd64 or AMD machines
+                0x05: loadall_or_syscall();
+                0x06: clts();
+                //sandpile.org says (AMD) after sysret, so I might want to check
+                //if that means amd64 or AMD machines
+                0x07: loadall_or_sysret();
+            }
+            0x01: decode OPCODE_OP_BOTTOM3 {
+                0x0: holderholder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x02: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x03: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x04: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x05: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x06: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x07: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x08: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x09: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x0A: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x0B: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x0C: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x0D: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x0E: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x0F: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x10: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x11: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x12: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x13: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x14: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x15: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x16: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x17: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x18: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x19: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x1A: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x1B: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x1C: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x1D: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x1E: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            0x1F: decode OPCODE_OP_BOTTOM3 {
+                0x0: holder();
+                0x1: holder();
+                0x2: holder();
+                0x3: holder();
+                0x4: holder();
+                0x5: holder();
+                0x6: holder();
+                0x7: holder();
+            }
+            default: FailUnimpl::twoByteOps();
+        }
+    }
+    default: M5InternalError::error(
+        {{"Unexpected first opcode byte in two byte opcode!"}});
+}
diff --git a/src/arch/x86/isa/formats/basic.isa b/src/arch/x86/isa/formats/basic.isa
index 7aea7085f..ea224d638 100644
--- a/src/arch/x86/isa/formats/basic.isa
+++ b/src/arch/x86/isa/formats/basic.isa
@@ -147,3 +147,12 @@ def template BasicDecode {{
 def template BasicDecodeWithMnemonic {{
     return new %(class_name)s("%(mnemonic)s", machInst);
 }};
+
+// The most basic instruction format... used only for a few misc. insts
+def format BasicOperate(code, *flags) {{
+        iop = InstObjParams(name, Name, 'SparcStaticInst', code, flags)
+        header_output = BasicDeclare.subst(iop)
+        decoder_output = BasicConstructor.subst(iop)
+        decode_block = BasicDecode.subst(iop)
+        exec_output = BasicExecute.subst(iop)
+}};
diff --git a/src/arch/x86/isa/decoder.isa b/src/arch/x86/isa/formats/error.isa
index 85f376b49..8ac2ea44d 100644
--- a/src/arch/x86/isa/decoder.isa
+++ b/src/arch/x86/isa/formats/error.isa
@@ -1,3 +1,5 @@
+// -*- mode:c++ -*-
+
 // Copyright (c) 2007 The Hewlett-Packard Development Company
 // All rights reserved.
 //
@@ -55,10 +57,21 @@
 
 ////////////////////////////////////////////////////////////////////
 //
-// The actual decoder specification
+// "Format" which really indicates an internal error. This is a more
+// significant problem for x86 than for other ISAs because of it's complex
+// ExtMachInst type.
 //
 
-decode EXAMPLE default Unknown::unknown()
-{
-    0x0: Unknown::unknown2();
-}
+def template ErrorDecode {{
+    {
+        panic("X86 decoder internal error: '%%s' %%s",
+                %(message)s, machInst);
+    }
+}};
+
+def format M5InternalError(error_message) {{
+    iop = InstObjParams(name, 'M5InternalError')
+    iop.message = error_message
+    decode_block = ErrorDecode.subst(iop)
+}};
+
diff --git a/src/arch/x86/isa/formats/formats.isa b/src/arch/x86/isa/formats/formats.isa
index 0d3d1c6dc..d763c05bc 100644
--- a/src/arch/x86/isa/formats/formats.isa
+++ b/src/arch/x86/isa/formats/formats.isa
@@ -87,3 +87,14 @@
 
 //Include the "unknown" format
 ##include "unknown.isa"
+
+//Include the "unimp" format
+##include "unimp.isa"
+
+//Include a format to signal m5 internal errors. This is used to indicate a
+//malfunction of the decode mechanism.
+##include "error.isa"
+
+//Include a format which implements a batch of instructions which do the same
+//thing on a variety of inputs
+##include "multi.isa"
diff --git a/src/arch/x86/isa/formats/multi.isa b/src/arch/x86/isa/formats/multi.isa
new file mode 100644
index 000000000..3e80f9cfb
--- /dev/null
+++ b/src/arch/x86/isa/formats/multi.isa
@@ -0,0 +1,106 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Instructions that do the same thing to multiple sets of arguments.
+//
+
+output header {{
+}};
+
+output decoder {{
+}};
+
+output exec {{
+}};
+
+let {{
+    multiops = {}
+}};
+
+def format MultiOp(code, switchVal, opTags, *opt_flags) {{
+    # Loads and stores that bring in and write out values from the
+    # instructions. These are determined by the input and output tags,
+    # and the resulting instruction will have the right number of micro ops,
+    # or could be implemented as an atomic macro op.
+    instNames = []
+    for tagSet in opTags:
+        loads = []
+        stores = []
+        postfix = ''
+        for tag in tagSet:
+            postfix += '_' + tag
+        gather_inputs = ''
+        if len(loads) + len(stores) == 0:
+            # If there are no loads or stores, make this a single instruction.
+            iop = InstObjParams(name, Name + postfix, 'X86StaticInst',
+                    {"code": code, "gather_inputs": gather_inputs},
+                    opt_flags)
+        else:
+            # Build up a macro op. We'll punt on this for now
+            pass
+
+    decodeBlob = 'switch(%s) {\n' % switchVal
+    counter = 0
+    for inst in instNames:
+        decodeBlob += '%d: return (X86StaticInst *)(new %s(machInst));\n' % \
+                      (counter, inst)
+        counter += 1
+    decodeBlob += '}\n'
+    # decode_block = BasicDecodeWithMnemonic.subst(iop)
+}};
diff --git a/src/arch/x86/isa/formats/unimp.isa b/src/arch/x86/isa/formats/unimp.isa
new file mode 100644
index 000000000..12fa8387b
--- /dev/null
+++ b/src/arch/x86/isa/formats/unimp.isa
@@ -0,0 +1,174 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Unimplemented instructions
+//
+
+output header {{
+    /**
+     * Static instruction class for unimplemented instructions that
+     * cause simulator termination.  Note that these are recognized
+     * (legal) instructions that the simulator does not support; the
+     * 'Unknown' class is used for unrecognized/illegal instructions.
+     * This is a leaf class.
+     */
+    class FailUnimplemented : public X86StaticInst
+    {
+      public:
+        /// Constructor
+        FailUnimplemented(const char *_mnemonic, ExtMachInst _machInst)
+            : X86StaticInst(_mnemonic, _machInst, No_OpClass)
+        {
+            // don't call execute() (which panics) if we're on a
+            // speculative path
+            flags[IsNonSpeculative] = true;
+        }
+
+        %(BasicExecDeclare)s
+
+        std::string
+        generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+    };
+
+    /**
+     * Base class for unimplemented instructions that cause a warning
+     * to be printed (but do not terminate simulation).  This
+     * implementation is a little screwy in that it will print a
+     * warning for each instance of a particular unimplemented machine
+     * instruction, not just for each unimplemented opcode.  Should
+     * probably make the 'warned' flag a static member of the derived
+     * class.
+     */
+    class WarnUnimplemented : public X86StaticInst
+    {
+      private:
+        /// Have we warned on this instruction yet?
+        mutable bool warned;
+
+      public:
+        /// Constructor
+        WarnUnimplemented(const char *_mnemonic, ExtMachInst _machInst)
+            : X86StaticInst(_mnemonic, _machInst, No_OpClass), warned(false)
+        {
+            // don't call execute() (which panics) if we're on a
+            // speculative path
+            flags[IsNonSpeculative] = true;
+        }
+
+        %(BasicExecDeclare)s
+
+        std::string
+        generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+    };
+}};
+
+output decoder {{
+    std::string
+    FailUnimplemented::generateDisassembly(Addr pc,
+                                           const SymbolTable *symtab) const
+    {
+        return csprintf("%-10s (unimplemented)", mnemonic);
+    }
+
+    std::string
+    WarnUnimplemented::generateDisassembly(Addr pc,
+                                           const SymbolTable *symtab) const
+    {
+#ifdef SS_COMPATIBLE_DISASSEMBLY
+        return csprintf("%-10s", mnemonic);
+#else
+        return csprintf("%-10s (unimplemented)", mnemonic);
+#endif
+    }
+}};
+
+output exec {{
+    Fault
+    FailUnimplemented::execute(%(CPU_exec_context)s *xc,
+                               Trace::InstRecord *traceData) const
+    {
+        panic("attempt to execute unimplemented instruction '%s' %s",
+                mnemonic, machInst);
+        return NoFault;
+    }
+
+    Fault
+    WarnUnimplemented::execute(%(CPU_exec_context)s *xc,
+                               Trace::InstRecord *traceData) const
+    {
+        if (!warned) {
+            warn("instruction '%s' unimplemented\n", mnemonic);
+            warned = true;
+        }
+
+        return NoFault;
+    }
+}};
+
+
+def format FailUnimpl() {{
+    iop = InstObjParams(name, 'FailUnimplemented')
+    decode_block = BasicDecodeWithMnemonic.subst(iop)
+}};
+
+def format WarnUnimpl() {{
+    iop = InstObjParams(name, 'WarnUnimplemented')
+    decode_block = BasicDecodeWithMnemonic.subst(iop)
+}};
+
diff --git a/src/arch/x86/isa/formats/unknown.isa b/src/arch/x86/isa/formats/unknown.isa
index 605ddcb69..43ddc20c1 100644
--- a/src/arch/x86/isa/formats/unknown.isa
+++ b/src/arch/x86/isa/formats/unknown.isa
@@ -120,7 +120,8 @@ output exec {{
         Fault Unknown::execute(%(CPU_exec_context)s *xc,
                 Trace::InstRecord *traceData) const
         {
-            panic("No instructions are implemented for X86!\n");
+            warn("No instructions are implemented for X86!\n");
+            return NoFault;
         }
 }};
 
diff --git a/src/arch/x86/isa/main.isa b/src/arch/x86/isa/main.isa
index fd1b461f0..146f714a7 100644
--- a/src/arch/x86/isa/main.isa
+++ b/src/arch/x86/isa/main.isa
@@ -79,10 +79,10 @@ namespace X86ISA;
 ##include "operands.isa"
 
 //Include the base class for x86 instructions, and some support code
-//##include "base.isa"
+##include "base.isa"
 
 //Include the definitions for the instruction formats
 ##include "formats/formats.isa"
 
 //Include the decoder definition
-##include "decoder.isa"
+##include "decoder/decoder.isa"
diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa
index 4b144dce0..20376f38f 100644
--- a/src/arch/x86/isa/operands.isa
+++ b/src/arch/x86/isa/operands.isa
@@ -96,4 +96,7 @@ def operand_types {{
 }};
 
 def operands {{
+        # This is just copied from SPARC, because having no operands confuses
+        # the parser.
+        'Rd':               ('IntReg', 'udw', 'RD', 'IsInteger', 1)
 }};
diff --git a/src/arch/x86/predecoder.cc b/src/arch/x86/predecoder.cc
new file mode 100644
index 000000000..fbed4fe41
--- /dev/null
+++ b/src/arch/x86/predecoder.cc
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 2007 The Hewlett-Packard Development Company
+ * All rights reserved.
+ *
+ * Redistribution and use of this software in source and binary forms,
+ * with or without modification, are permitted provided that the
+ * following conditions are met:
+ *
+ * The software must be used only for Non-Commercial Use which means any
+ * use which is NOT directed to receiving any direct monetary
+ * compensation for, or commercial advantage from such use.  Illustrative
+ * examples of non-commercial use are academic research, personal study,
+ * teaching, education and corporate research & development.
+ * Illustrative examples of commercial use are distributing products for
+ * commercial advantage and providing services using the software for
+ * commercial advantage.
+ *
+ * If you wish to use this software or functionality therein that may be
+ * covered by patents for commercial use, please contact:
+ *     Director of Intellectual Property Licensing
+ *     Office of Strategy and Technology
+ *     Hewlett-Packard Company
+ *     1501 Page Mill Road
+ *     Palo Alto, California  94304
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.  Redistributions
+ * in binary form must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.  Neither the name of
+ * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.  No right of
+ * sublicense is granted herewith.  Derivatives of the software and
+ * output created using the software may be prepared, but only for
+ * Non-Commercial Uses.  Derivatives of the software may be shared with
+ * others provided: (i) the others agree to abide by the list of
+ * conditions herein which includes the Non-Commercial Use restrictions;
+ * and (ii) such Derivatives of the software include the above copyright
+ * notice to acknowledge the contribution from this software where
+ * applicable, this list of conditions and the disclaimer below.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/x86/predecoder.hh"
+#include "base/misc.hh"
+#include "base/trace.hh"
+#include "sim/host.hh"
+
+namespace X86ISA
+{
+    void Predecoder::process()
+    {
+        //This function drives the predecoder state machine.
+
+        //Some sanity checks. You shouldn't try to process more bytes if
+        //there aren't any, and you shouldn't overwrite an already
+        //predecoder ExtMachInst.
+        assert(!outOfBytes);
+        assert(!emiIsReady);
+
+        //While there's still something to do...
+        while(!emiIsReady && !outOfBytes)
+        {
+            uint8_t nextByte = getNextByte();
+            switch(state)
+            {
+              case PrefixState:
+                state = doPrefixState(nextByte);
+                break;
+              case OpcodeState:
+                state = doOpcodeState(nextByte);
+                break;
+              case ModRMState:
+                state = doModRMState(nextByte);
+                break;
+              case SIBState:
+                state = doSIBState(nextByte);
+                break;
+              case DisplacementState:
+                state = doDisplacementState();
+                break;
+              case ImmediateState:
+                state = doImmediateState();
+                break;
+              case ErrorState:
+                panic("Went to the error state in the predecoder.\n");
+              default:
+                panic("Unrecognized state! %d\n", state);
+            }
+        }
+    }
+
+    //Either get a prefix and record it in the ExtMachInst, or send the
+    //state machine on to get the opcode(s).
+    Predecoder::State Predecoder::doPrefixState(uint8_t nextByte)
+    {
+        uint8_t prefix = Prefixes[nextByte];
+        State nextState = PrefixState;
+        if(prefix)
+            consumeByte();
+        switch(prefix)
+        {
+            //Operand size override prefixes
+          case OperandSizeOverride:
+            DPRINTF(Predecoder, "Found operand size override prefix.\n");
+            break;
+          case AddressSizeOverride:
+            DPRINTF(Predecoder, "Found address size override prefix.\n");
+            break;
+            //Segment override prefixes
+          case CSOverride:
+            DPRINTF(Predecoder, "Found cs segment override.\n");
+            break;
+          case DSOverride:
+            DPRINTF(Predecoder, "Found ds segment override.\n");
+            break;
+          case ESOverride:
+            DPRINTF(Predecoder, "Found es segment override.\n");
+            break;
+          case FSOverride:
+            DPRINTF(Predecoder, "Found fs segment override.\n");
+            break;
+          case GSOverride:
+            DPRINTF(Predecoder, "Found gs segment override.\n");
+            break;
+          case SSOverride:
+            DPRINTF(Predecoder, "Found ss segment override.\n");
+            break;
+          case Lock:
+            DPRINTF(Predecoder, "Found lock prefix.\n");
+            break;
+          case Rep:
+            DPRINTF(Predecoder, "Found rep prefix.\n");
+            break;
+          case Repne:
+            DPRINTF(Predecoder, "Found repne prefix.\n");
+            break;
+          case RexPrefix:
+            DPRINTF(Predecoder, "Found Rex prefix %#x.\n", nextByte);
+            emi.rex = nextByte;
+            break;
+          case 0:
+            emi.opcode.num = 0;
+            nextState = OpcodeState;
+            break;
+          default:
+            panic("Unrecognized prefix %#x\n", nextByte);
+        }
+        return nextState;
+    }
+
+    //Load all the opcodes (currently up to 2) and then figure out
+    //what immediate and/or ModRM is needed.
+    Predecoder::State Predecoder::doOpcodeState(uint8_t nextByte)
+    {
+        State nextState = ErrorState;
+        emi.opcode.num++;
+        //We can't handle 3+ byte opcodes right now
+        assert(emi.opcode.num < 3);
+        consumeByte();
+        if(emi.opcode.num == 1 && nextByte == 0x0f)
+        {
+            nextState = OpcodeState;
+            DPRINTF(Predecoder, "Found two byte opcode.\n");
+            emi.opcode.prefixA = nextByte;
+        }
+        else if(emi.opcode.num == 2 &&
+                (nextByte == 0x0f ||
+                 (nextByte & 0xf8) == 0x38))
+        {
+            panic("Three byte opcodes aren't yet supported!\n");
+            nextState = OpcodeState;
+            DPRINTF(Predecoder, "Found three byte opcode.\n");
+            emi.opcode.prefixB = nextByte;
+        }
+        else
+        {
+            DPRINTF(Predecoder, "Found opcode %#x.\n", nextByte);
+            emi.opcode.op = nextByte;
+
+            //Prepare for any immediate/displacement we might need
+            immediateCollected = 0;
+            emi.immediate = 0;
+            displacementCollected = 0;
+            emi.displacement = 0;
+
+            //Figure out how big of an immediate we'll retreive based
+            //on the opcode.
+            int immType = ImmediateType[
+                emi.opcode.num - 1][nextByte];
+            if(0) //16 bit mode
+                immediateSize = ImmediateTypeToSize[0][immType];
+            else if(!(emi.rex & 0x4)) //32 bit mode
+                immediateSize = ImmediateTypeToSize[1][immType];
+            else //64 bit mode
+                immediateSize = ImmediateTypeToSize[2][immType];
+
+            //Determine what to expect next
+            if (UsesModRM[emi.opcode.num - 1][nextByte]) {
+                nextState = ModRMState;
+            } else if(immediateSize) {
+                nextState = ImmediateState;
+            } else {
+                emiIsReady = true;
+                nextState = PrefixState;
+            }
+        }
+        return nextState;
+    }
+
+    //Get the ModRM byte and determine what displacement, if any, there is.
+    //Also determine whether or not to get the SIB byte, displacement, or
+    //immediate next.
+    Predecoder::State Predecoder::doModRMState(uint8_t nextByte)
+    {
+        State nextState = ErrorState;
+        emi.modRM = nextByte;
+        DPRINTF(Predecoder, "Found modrm byte %#x.\n", nextByte);
+        if (0) {//FIXME in 16 bit mode
+            //figure out 16 bit displacement size
+            if(nextByte & 0xC7 == 0x06 ||
+                    nextByte & 0xC0 == 0x80)
+                displacementSize = 2;
+            else if(nextByte & 0xC0 == 0x40)
+                displacementSize = 1;
+            else
+                displacementSize = 0;
+        } else {
+            //figure out 32/64 bit displacement size
+            if(nextByte & 0xC7 == 0x05 ||
+                    nextByte & 0xC0 == 0x80)
+                displacementSize = 4;
+            else if(nextByte & 0xC0 == 0x40)
+                displacementSize = 2;
+            else
+                displacementSize = 0;
+        }
+        //If there's an SIB, get that next.
+        //There is no SIB in 16 bit mode.
+        if(nextByte & 0x7 == 4 &&
+                nextByte & 0xC0 != 0xC0) {
+                // && in 32/64 bit mode)
+            nextState = SIBState;
+        } else if(displacementSize) {
+            nextState = DisplacementState;
+        } else if(immediateSize) {
+            nextState = ImmediateState;
+        } else {
+            emiIsReady = true;
+            nextState = PrefixState;
+        }
+        //The ModRM byte is consumed no matter what
+        consumeByte();
+        return nextState;
+    }
+
+    //Get the SIB byte. We don't do anything with it at this point, other
+    //than storing it in the ExtMachInst. Determine if we need to get a
+    //displacement or immediate next.
+    Predecoder::State Predecoder::doSIBState(uint8_t nextByte)
+    {
+        State nextState = ErrorState;
+        emi.sib = nextByte;
+        DPRINTF(Predecoder, "Found SIB byte %#x.\n", nextByte);
+        consumeByte();
+        if(displacementSize) {
+            nextState = DisplacementState;
+        } else if(immediateSize) {
+            nextState = ImmediateState;
+        } else {
+            emiIsReady = true;
+            nextState = PrefixState;
+        }
+        return nextState;
+    }
+
+    //Gather up the displacement, or at least as much of it
+    //as we can get.
+    Predecoder::State Predecoder::doDisplacementState()
+    {
+        State nextState = ErrorState;
+
+        getImmediate(displacementCollected,
+                emi.displacement,
+                displacementSize);
+
+        DPRINTF(Predecoder, "Collecting %d byte displacement, got %d bytes.\n",
+                displacementSize, displacementCollected);
+
+        if(displacementSize == displacementCollected) {
+            //Sign extend the displacement
+            switch(displacementSize)
+            {
+              case 1:
+                emi.displacement = sext<8>(emi.displacement);
+                break;
+              case 2:
+                emi.displacement = sext<16>(emi.displacement);
+                break;
+              case 4:
+                emi.displacement = sext<32>(emi.displacement);
+                break;
+              default:
+                panic("Undefined displacement size!\n");
+            }
+            DPRINTF(Predecoder, "Collected displacement %#x.\n",
+                    emi.displacement);
+            if(immediateSize) {
+                nextState = ImmediateState;
+            } else {
+                emiIsReady = true;
+                nextState = PrefixState;
+            }
+        }
+        else
+            nextState = DisplacementState;
+        return nextState;
+    }
+
+    //Gather up the immediate, or at least as much of it
+    //as we can get
+    Predecoder::State Predecoder::doImmediateState()
+    {
+        State nextState = ErrorState;
+
+        getImmediate(immediateCollected,
+                emi.immediate,
+                immediateSize);
+
+        DPRINTF(Predecoder, "Collecting %d byte immediate, got %d bytes.\n",
+                immediateSize, immediateCollected);
+
+        if(immediateSize == immediateCollected)
+        {
+            DPRINTF(Predecoder, "Collected immediate %#x.\n",
+                    emi.immediate);
+            emiIsReady = true;
+            nextState = PrefixState;
+        }
+        else
+            nextState = ImmediateState;
+        return nextState;
+    }
+}
diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh
new file mode 100644
index 000000000..1df17d6d2
--- /dev/null
+++ b/src/arch/x86/predecoder.hh
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2007 The Hewlett-Packard Development Company
+ * All rights reserved.
+ *
+ * Redistribution and use of this software in source and binary forms,
+ * with or without modification, are permitted provided that the
+ * following conditions are met:
+ *
+ * The software must be used only for Non-Commercial Use which means any
+ * use which is NOT directed to receiving any direct monetary
+ * compensation for, or commercial advantage from such use.  Illustrative
+ * examples of non-commercial use are academic research, personal study,
+ * teaching, education and corporate research & development.
+ * Illustrative examples of commercial use are distributing products for
+ * commercial advantage and providing services using the software for
+ * commercial advantage.
+ *
+ * If you wish to use this software or functionality therein that may be
+ * covered by patents for commercial use, please contact:
+ *     Director of Intellectual Property Licensing
+ *     Office of Strategy and Technology
+ *     Hewlett-Packard Company
+ *     1501 Page Mill Road
+ *     Palo Alto, California  94304
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.  Redistributions
+ * in binary form must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.  Neither the name of
+ * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.  No right of
+ * sublicense is granted herewith.  Derivatives of the software and
+ * output created using the software may be prepared, but only for
+ * Non-Commercial Uses.  Derivatives of the software may be shared with
+ * others provided: (i) the others agree to abide by the list of
+ * conditions herein which includes the Non-Commercial Use restrictions;
+ * and (ii) such Derivatives of the software include the above copyright
+ * notice to acknowledge the contribution from this software where
+ * applicable, this list of conditions and the disclaimer below.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_X86_PREDECODER_HH__
+#define __ARCH_X86_PREDECODER_HH__
+
+#include "arch/x86/types.hh"
+#include "base/bitfield.hh"
+#include "sim/host.hh"
+
+class ThreadContext;
+
+namespace X86ISA
+{
+    class Predecoder
+    {
+      private:
+        //These are defined and documented in predecoder_tables.cc
+        static const uint8_t Prefixes[256];
+        static const uint8_t UsesModRM[2][256];
+        static const uint8_t ImmediateType[2][256];
+        static const uint8_t ImmediateTypeToSize[3][10];
+
+      protected:
+        ThreadContext * tc;
+        //The bytes to be predecoded
+        MachInst fetchChunk;
+        //The pc of the start of fetchChunk
+        Addr basePC;
+        //The offset into fetchChunk of current processing
+        int offset;
+        //The extended machine instruction being generated
+        ExtMachInst emi;
+
+        inline uint8_t getNextByte()
+        {
+            return (fetchChunk >> (offset * 8)) & 0xff;
+        }
+
+        void getImmediate(int &collected, uint64_t &current, int size)
+        {
+            //Figure out how many bytes we still need to get for the
+            //immediate.
+            int toGet = size - collected;
+            //Figure out how many bytes are left in our "buffer"
+            int remaining = sizeof(MachInst) - offset;
+            //Get as much as we need, up to the amount available.
+            toGet = toGet > remaining ? remaining : toGet;
+
+            //Shift the bytes we want to be all the way to the right
+            uint64_t partialDisp = fetchChunk >> (offset * 8);
+            //Mask off what we don't want
+            partialDisp &= mask(toGet * 8);
+            //Shift it over to overlay with our displacement.
+            partialDisp <<= (displacementCollected * 8);
+            //Put it into our displacement
+            current |= partialDisp;
+            //Update how many bytes we've collected.
+            collected += toGet;
+            consumeBytes(toGet);
+        }
+
+        inline void consumeByte()
+        {
+            offset++;
+            assert(offset <= sizeof(MachInst));
+            if(offset == sizeof(MachInst))
+                outOfBytes = true;
+        }
+
+        inline void consumeBytes(int numBytes)
+        {
+            offset += numBytes;
+            assert(offset <= sizeof(MachInst));
+            if(offset == sizeof(MachInst))
+                outOfBytes = true;
+        }
+
+        //State machine state
+      protected:
+        //Whether or not we're out of bytes
+        bool outOfBytes;
+        //Whether we've completed generating an ExtMachInst
+        bool emiIsReady;
+        //The size of the displacement value
+        int displacementSize;
+        int displacementCollected;
+        //The size of the immediate value
+        int immediateSize;
+        int immediateCollected;
+
+        enum State {
+            PrefixState,
+            OpcodeState,
+            ModRMState,
+            SIBState,
+            DisplacementState,
+            ImmediateState,
+            //We should never get to this state. Getting here is an error.
+            ErrorState
+        };
+
+        State state;
+
+        //Functions to handle each of the states
+        State doPrefixState(uint8_t);
+        State doOpcodeState(uint8_t);
+        State doModRMState(uint8_t);
+        State doSIBState(uint8_t);
+        State doDisplacementState();
+        State doImmediateState();
+
+      public:
+        Predecoder(ThreadContext * _tc) :
+            tc(_tc), basePC(0), offset(0),
+            outOfBytes(true), emiIsReady(false),
+            state(PrefixState)
+        {}
+
+        ThreadContext * getTC()
+        {
+            return tc;
+        }
+
+        void setTC(ThreadContext * _tc)
+        {
+            tc = _tc;
+        }
+
+        void process();
+
+        //Use this to give data to the predecoder. This should be used
+        //when there is control flow.
+        void moreBytes(Addr currPC, Addr off, MachInst data)
+        {
+            basePC = currPC;
+            offset = off;
+            fetchChunk = data;
+            assert(off < sizeof(MachInst));
+            outOfBytes = false;
+            process();
+        }
+
+        //Use this to give data to the predecoder. This should be used
+        //when instructions are executed in order.
+        void moreBytes(MachInst machInst)
+        {
+            moreBytes(basePC + sizeof(machInst), 0, machInst);
+        }
+
+        bool needMoreBytes()
+        {
+            return outOfBytes;
+        }
+
+        bool extMachInstReady()
+        {
+            return emiIsReady;
+        }
+
+        //This returns a constant reference to the ExtMachInst to avoid a copy
+        const ExtMachInst & getExtMachInst()
+        {
+            assert(emiIsReady);
+            emiIsReady = false;
+            return emi;
+        }
+    };
+};
+
+#endif // __ARCH_X86_PREDECODER_HH__
diff --git a/src/arch/x86/predecoder_tables.cc b/src/arch/x86/predecoder_tables.cc
new file mode 100644
index 000000000..f233ad234
--- /dev/null
+++ b/src/arch/x86/predecoder_tables.cc
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2007 The Hewlett-Packard Development Company
+ * All rights reserved.
+ *
+ * Redistribution and use of this software in source and binary forms,
+ * with or without modification, are permitted provided that the
+ * following conditions are met:
+ *
+ * The software must be used only for Non-Commercial Use which means any
+ * use which is NOT directed to receiving any direct monetary
+ * compensation for, or commercial advantage from such use.  Illustrative
+ * examples of non-commercial use are academic research, personal study,
+ * teaching, education and corporate research & development.
+ * Illustrative examples of commercial use are distributing products for
+ * commercial advantage and providing services using the software for
+ * commercial advantage.
+ *
+ * If you wish to use this software or functionality therein that may be
+ * covered by patents for commercial use, please contact:
+ *     Director of Intellectual Property Licensing
+ *     Office of Strategy and Technology
+ *     Hewlett-Packard Company
+ *     1501 Page Mill Road
+ *     Palo Alto, California  94304
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.  Redistributions
+ * in binary form must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.  Neither the name of
+ * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.  No right of
+ * sublicense is granted herewith.  Derivatives of the software and
+ * output created using the software may be prepared, but only for
+ * Non-Commercial Uses.  Derivatives of the software may be shared with
+ * others provided: (i) the others agree to abide by the list of
+ * conditions herein which includes the Non-Commercial Use restrictions;
+ * and (ii) such Derivatives of the software include the above copyright
+ * notice to acknowledge the contribution from this software where
+ * applicable, this list of conditions and the disclaimer below.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/x86/predecoder.hh"
+#include "arch/x86/types.hh"
+
+namespace X86ISA
+{
+    const uint8_t CS = CSOverride;
+    const uint8_t DS = DSOverride;
+    const uint8_t ES = ESOverride;
+    const uint8_t FS = FSOverride;
+    const uint8_t GS = GSOverride;
+    const uint8_t SS = SSOverride;
+
+    const uint8_t OO = OperandSizeOverride;
+    const uint8_t AO = AddressSizeOverride;
+    const uint8_t LO = Lock;
+    const uint8_t RE = Rep;
+    const uint8_t RN = Repne;
+    const uint8_t RX = RexPrefix;
+
+    //This table identifies whether a byte is a prefix, and if it is,
+    //which prefix it is.
+    const uint8_t Predecoder::Prefixes[256] =
+    {    //LSB
+// MSB   0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F
+/*   0*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   1*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   2*/ 0 , 0 , 0 , 0 , 0 , 0 , ES, 0 , 0 , 0 , 0 , 0 , 0 , 0 , CS, 0,
+/*   3*/ 0 , 0 , 0 , 0 , 0 , 0 , SS, 0 , 0 , 0 , 0 , 0 , 0 , 0 , DS, 0,
+/*   4*/ RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX,
+/*   5*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   6*/ 0 , 0 , 0 , 0 , FS, GS, OO, AO, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   7*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   8*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   9*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   A*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   B*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   C*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   D*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   E*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   F*/ LO, 0 , RN, RE, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
+    };
+
+    //This table identifies whether a particular opcode uses the ModRM byte
+    const uint8_t Predecoder::UsesModRM[2][256] =
+    {//For one byte instructions
+        {    //LSB
+//     MSB   0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F
+/*      0 */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0,
+/*      1 */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0,
+/*      2 */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0,
+/*      3 */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0,
+/*      4 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      6 */ 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 1 , 0 , 0 , 0 , 0,
+/*      7 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      8 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1,
+/*      9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      A */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      B */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      C */ 1 , 1 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      D */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1,
+/*      E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      F */ 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1
+        },
+    //For two byte instructions
+        {    //LSB
+//     MSB   0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F
+/*      0 */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 1,
+/*      1 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      2 */ 1 , 1 , 1 , 1 , 1 , 0 , 1 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1,
+/*      3 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      4 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1,
+/*      5 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1,
+/*      6 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1,
+/*      7 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1,
+/*      8 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      9 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1,
+/*      A */ 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1,
+/*      B */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1,
+/*      C */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*      D */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1,
+/*      E */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1,
+/*      F */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0
+        }
+    };
+
+    enum ImmediateTypes {
+        NoImm,
+        NI = NoImm,
+        ByteImm,
+        BY = ByteImm,
+        WordImm,
+        WO = WordImm,
+        DWordImm,
+        DW = DWordImm,
+        QWordImm,
+        QW = QWordImm,
+        OWordImm,
+        OW = OWordImm,
+        VWordImm,
+        VW = VWordImm,
+        ZWordImm,
+        ZW = ZWordImm,
+        Pointer,
+        PO = Pointer,
+        //The enter instruction takes -2- immediates for a total of 3 bytes
+        Enter,
+        EN = Enter
+    };
+
+    const uint8_t Predecoder::ImmediateTypeToSize[3][10] =
+    {
+//       noimm byte word dword qword oword vword zword enter
+        {0,    1,   2,   4,    8,    16,   2,    2,    3,    4}, //16 bit
+        {0,    1,   2,   4,    8,    16,   4,    4,    3,    6}, //32 bit
+        {0,    1,   2,   4,    8,    16,   4,    8,    3,    0}  //64 bit
+    };
+
+    //This table determines the immediate type. The first index is the
+    //number of bytes in the instruction, and the second is the meaningful
+    //byte of the opcode. I didn't use the NI constant here for the sake
+    //of clarity.
+    const uint8_t Predecoder::ImmediateType[2][256] =
+    {//For one byte instructions
+        {    //LSB
+//     MSB   0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F
+/*      0 */ 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 ,
+/*      1 */ 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 ,
+/*      2 */ 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 ,
+/*      3 */ 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 ,
+/*      4 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      6 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , ZW, ZW, BY, BY, 0 , 0 , 0 , 0 ,
+/*      7 */ BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY,
+/*      8 */ BY, ZW, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      A */ BY, VW, BY, VW, 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      B */ BY, BY, BY, BY, BY, BY, BY, BY, VW, VW, VW, VW, VW, VW, VW, VW,
+/*      C */ BY, BY, WO, 0 , 0 , 0 , BY, ZW, EN, 0 , WO, 0 , 0 , BY, 0 , 0 ,
+/*      D */ 0 , 0 , 0 , 0 , BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      E */ BY, BY, BY, BY, BY, BY, BY, BY, ZW, ZW, PO, BY, 0 , 0 , 0 , 0 ,
+/*      F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
+        },
+    //For two byte instructions
+        {    //LSB
+//     MSB   0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F
+/*      0 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      0 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      2 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      3 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      4 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      6 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      7 */ BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      8 */ ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW,
+/*      9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      A */ 0 , 0 , 0 , 0 , BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , BY, 0 , 0 , 0 ,
+/*      B */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , ZW, 0 , BY, 0 , 0 , 0 , 0 , 0 ,
+/*      C */ 0 , 0 , BY, 0 , BY, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      D */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*      F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
+        }
+    };
+}
diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh
index 63f65eee5..ca4a15d24 100644
--- a/src/arch/x86/types.hh
+++ b/src/arch/x86/types.hh
@@ -59,13 +59,111 @@
 #define __ARCH_X86_TYPES_HH__
 
 #include <inttypes.h>
+#include <iostream>
+
+#include "base/bitfield.hh"
+#include "base/cprintf.hh"
 
 namespace X86ISA
 {
-    //XXX This won't work
-    typedef uint32_t MachInst;
-    //XXX This won't work either
-    typedef uint64_t ExtMachInst;
+    //This really determines how many bytes are passed to the predecoder.
+    typedef uint64_t MachInst;
+
+    enum Prefixes {
+        NoOverride = 0,
+        CSOverride = 1,
+        DSOverride = 2,
+        ESOverride = 3,
+        FSOverride = 4,
+        GSOverride = 5,
+        SSOverride = 6,
+        //The Rex prefix obviously doesn't fit in with the above, but putting
+        //it here lets us save double the space the enums take up.
+        RexPrefix = 7,
+        //There can be only one segment override, so they share the
+        //first 3 bits in the legacyPrefixes bitfield.
+        SegmentOverride = 0x7,
+        OperandSizeOverride = 8,
+        AddressSizeOverride = 16,
+        Lock = 32,
+        Rep = 64,
+        Repne = 128
+    };
+
+    BitUnion8(ModRM)
+        Bitfield<7,6> mod;
+        Bitfield<5,3> reg;
+        Bitfield<2,0> rm;
+    EndBitUnion(ModRM)
+
+    BitUnion8(Sib)
+        Bitfield<7,6> scale;
+        Bitfield<5,3> index;
+        Bitfield<2,0> base;
+    EndBitUnion(Sib)
+
+    BitUnion8(Rex)
+        Bitfield<3> w;
+        Bitfield<2> r;
+        Bitfield<1> x;
+        Bitfield<0> b;
+    EndBitUnion(Rex)
+
+    BitUnion8(Opcode)
+        Bitfield<7,3> top5;
+        Bitfield<2,0> bottom3;
+    EndBitUnion(Opcode)
+
+    //The intermediate structure the x86 predecoder returns.
+    struct ExtMachInst
+    {
+        //Prefixes
+        uint8_t legacy;
+        Rex rex;
+        //This holds all of the bytes of the opcode
+        struct
+        {
+            //The number of bytes in this opcode. Right now, we ignore that
+            //this can be 3 in some cases
+            uint8_t num;
+            //The first byte detected in a 2+ byte opcode. Should be 0xF0.
+            uint8_t prefixA;
+            //The second byte detected in a 3+ byte opcode. Could be 0xF0 for
+            //3dnow instructions, or 0x38-0x3F for some SSE instructions.
+            uint8_t prefixB;
+            //The main opcode byte. The highest addressed byte in the opcode.
+            Opcode op;
+        } opcode;
+        //Modifier bytes
+        ModRM modRM;
+        uint8_t sib;
+        //Immediate fields
+        uint64_t immediate;
+        uint64_t displacement;
+    };
+
+    inline static std::ostream &
+        operator << (std::ostream & os, const ExtMachInst & emi)
+    {
+        ccprintf(os, "\n{\n\tleg = %#x,\n\trex = %#x,\n\t"
+                     "op = {\n\t\tnum = %d,\n\t\top = %#x,\n\t\t"
+                           "prefixA = %#x,\n\t\tprefixB = %#x\n\t},\n\t"
+                     "modRM = %#x,\n\tsib = %#x,\n\t"
+                     "immediate = %#x,\n\tdisplacement = %#x\n}\n",
+                     emi.legacy, (uint8_t)emi.rex,
+                     emi.opcode.num, emi.opcode.op,
+                     emi.opcode.prefixA, emi.opcode.prefixB,
+                     (uint8_t)emi.modRM, (uint8_t)emi.sib,
+                     emi.immediate, emi.displacement);
+        return os;
+    }
+
+    inline static bool
+        operator == (const ExtMachInst &emi1, const ExtMachInst &emi2)
+    {
+        //Since this is empty, it's always equal
+        return true;
+    }
 
     typedef uint64_t IntReg;
     //XXX Should this be a 128 bit structure for XMM memory ops?
diff --git a/src/arch/x86/utility.hh b/src/arch/x86/utility.hh
index 1fbe1fffe..d89e223de 100644
--- a/src/arch/x86/utility.hh
+++ b/src/arch/x86/utility.hh
@@ -59,11 +59,23 @@
 #define __ARCH_X86_UTILITY_HH__
 
 #include "arch/x86/types.hh"
+#include "base/hashmap.hh"
 #include "base/misc.hh"
+#include "cpu/thread_context.hh"
 #include "sim/host.hh"
 
 class ThreadContext;
 
+namespace __hash_namespace {
+    template<>
+    struct hash<X86ISA::ExtMachInst> {
+        size_t operator()(const X86ISA::ExtMachInst &emi) const {
+            //Because these are all the same, return 0
+            return 0;
+        };
+    };
+}
+
 namespace X86ISA
 {
     static inline bool
@@ -72,11 +84,6 @@ namespace X86ISA
         return false;
     }
 
-    inline ExtMachInst
-    makeExtMI(MachInst inst, ThreadContext * xc) {
-        return inst;
-    }
-
     inline bool isCallerSaveIntegerRegister(unsigned int reg) {
         panic("register classification not implemented");
         return false;
diff --git a/src/base/SConscript b/src/base/SConscript
index 788aa3e6f..6fc140145 100644
--- a/src/base/SConscript
+++ b/src/base/SConscript
@@ -30,10 +30,26 @@
 
 Import('*')
 
-# base/traceflags.{cc,hh} are generated from base/traceflags.py.
-# $TARGET.base will expand to "<build-dir>/base/traceflags".
-env.Command(['traceflags.hh', 'traceflags.cc'], 'traceflags.py',
-            'python $SOURCE $TARGET.base')
+def make_cc(target, source, env):
+    assert(len(source) == 1)
+    assert(len(target) == 1)
+
+    traceflags = {}
+    execfile(str(source[0]), traceflags)
+    func = traceflags['gen_cc']
+    func(str(target[0]))
+
+def make_hh(target, source, env):
+    assert(len(source) == 1)
+    assert(len(target) == 1)
+
+    traceflags = {}
+    execfile(str(source[0]), traceflags)
+    func = traceflags['gen_hh']
+    func(str(target[0]))
+
+env.Command('traceflags.hh', 'traceflags.py', make_hh)
+env.Command('traceflags.cc', 'traceflags.py', make_cc)
 
 Source('annotate.cc')
 Source('bigint.cc')
diff --git a/src/base/bigint.hh b/src/base/bigint.hh
index ed48c67fe..d60684231 100644
--- a/src/base/bigint.hh
+++ b/src/base/bigint.hh
@@ -28,6 +28,8 @@
  * Authors: Ali Saidi
  */
 
+#include "base/misc.hh"
+
 #include <iostream>
 
 #ifndef __BASE_BIGINT_HH__
@@ -49,6 +51,12 @@ struct m5_twin64_t {
         b = x;
         return *this;
     }
+
+    operator uint64_t()
+    {
+        panic("Tried to cram a twin64_t into an integer!\n");
+        return a;
+    }
 };
 
 struct m5_twin32_t {
@@ -67,6 +75,12 @@ struct m5_twin32_t {
         b = x;
         return *this;
     }
+
+    operator uint32_t()
+    {
+        panic("Tried to cram a twin32_t into an integer!\n");
+        return a;
+    }
 };
 
 
diff --git a/src/base/bitfield.hh b/src/base/bitfield.hh
index 83b9138b4..69cce2245 100644
--- a/src/base/bitfield.hh
+++ b/src/base/bitfield.hh
@@ -121,20 +121,282 @@ findMsbSet(uint64_t val) {
     int msb = 0;
     if (!val)
         return 0;
-    if (bits(val, 63,32)) msb += 32;
-    val >>= 32;
-    if (bits(val, 31,16)) msb += 16;
-    val >>= 16;
-    if (bits(val, 15,8)) msb += 8;
-    val >>= 8;
-    if (bits(val, 7,4)) msb += 4;
-    val >>= 4;
-    if (bits(val, 3,2)) msb += 2;
-    val >>= 2;
-    if (bits(val, 1,1)) msb += 1;
+    if (bits(val, 63,32)) { msb += 32; val >>= 32; }
+    if (bits(val, 31,16)) { msb += 16; val >>= 16; }
+    if (bits(val, 15,8))  { msb += 8;  val >>= 8;  }
+    if (bits(val, 7,4))   { msb += 4;  val >>= 4;  }
+    if (bits(val, 3,2))   { msb += 2;  val >>= 2;  }
+    if (bits(val, 1,1))   { msb += 1; }
     return msb;
 }
 
+//	The following implements the BitUnion system of defining bitfields
+//on top of an underlying class. This is done through the pervasive use of
+//both named and unnamed unions which all contain the same actual storage.
+//Since they're unioned with each other, all of these storage locations
+//overlap. This allows all of the bitfields to manipulate the same data
+//without having to have access to each other. More details are provided with the
+//individual components.
 
+//This namespace is for classes which implement the backend of the BitUnion
+//stuff. Don't use any of these directly, except for the Bitfield classes in
+//the *BitfieldTypes class(es).
+namespace BitfieldBackend
+{
+    //A base class for all bitfields. It instantiates the actual storage,
+    //and provides getBits and setBits functions for manipulating it. The
+    //Data template parameter is type of the underlying storage.
+    template<class Data>
+    class BitfieldBase
+    {
+      protected:
+        Data __data;
+
+        //This function returns a range of bits from the underlying storage.
+        //It relies on the "bits" function above. It's the user's
+        //responsibility to make sure that there is a properly overloaded
+        //version of this function for whatever type they want to overlay.
+        inline uint64_t
+        getBits(int first, int last) const
+        {
+            return bits(__data, first, last);
+        }
+
+        //Similar to the above, but for settings bits with replaceBits.
+        inline void
+        setBits(int first, int last, uint64_t val)
+        {
+            replaceBits(__data, first, last, val);
+        }
+    };
+
+    //This class contains all the "regular" bitfield classes. It is inherited
+    //by all BitUnions which give them access to those types.
+    template<class Type>
+    class RegularBitfieldTypes
+    {
+      protected:
+        //This class implements ordinary bitfields, that is a span of bits
+        //who's msb is "first", and who's lsb is "last".
+        template<int first, int last=first>
+        class Bitfield : public BitfieldBase<Type>
+        {
+          public:
+            operator uint64_t () const
+            {
+                return this->getBits(first, last);
+            }
+
+            uint64_t
+            operator=(const uint64_t _data)
+            {
+                this->setBits(first, last, _data);
+                return _data;
+            }
+        };
+
+        //A class which specializes the above so that it can only be read
+        //from. This is accomplished explicitly making sure the assignment
+        //operator is blocked. The conversion operator is carried through
+        //inheritance. This will unfortunately need to be copied into each
+        //bitfield type due to limitations with how templates work
+        template<int first, int last=first>
+        class BitfieldRO : public Bitfield<first, last>
+        {
+          private:
+            uint64_t
+            operator=(const uint64_t _data);
+        };
+
+        //Similar to the above, but only allows writing.
+        template<int first, int last=first>
+        class BitfieldWO : public Bitfield<first, last>
+        {
+          private:
+            operator uint64_t () const;
+
+          public:
+            using Bitfield<first, last>::operator=;
+        };
+    };
+
+    //This class contains all the "regular" bitfield classes. It is inherited
+    //by all BitUnions which give them access to those types.
+    template<class Type>
+    class SignedBitfieldTypes
+    {
+      protected:
+        //This class implements ordinary bitfields, that is a span of bits
+        //who's msb is "first", and who's lsb is "last".
+        template<int first, int last=first>
+        class SignedBitfield : public BitfieldBase<Type>
+        {
+          public:
+            operator int64_t () const
+            {
+                return sext<first - last + 1>(this->getBits(first, last));
+            }
+
+            int64_t
+            operator=(const int64_t _data)
+            {
+                this->setBits(first, last, _data);
+                return _data;
+            }
+        };
+
+        //A class which specializes the above so that it can only be read
+        //from. This is accomplished explicitly making sure the assignment
+        //operator is blocked. The conversion operator is carried through
+        //inheritance. This will unfortunately need to be copied into each
+        //bitfield type due to limitations with how templates work
+        template<int first, int last=first>
+        class SignedBitfieldRO : public SignedBitfield<first, last>
+        {
+          private:
+            int64_t
+            operator=(const int64_t _data);
+        };
+
+        //Similar to the above, but only allows writing.
+        template<int first, int last=first>
+        class SignedBitfieldWO : public SignedBitfield<first, last>
+        {
+          private:
+            operator int64_t () const;
+
+          public:
+            int64_t operator=(const int64_t _data)
+            {
+                *((SignedBitfield<first, last> *)this) = _data;
+                return _data;
+            }
+        };
+    };
+
+    template<class Type>
+    class BitfieldTypes : public RegularBitfieldTypes<Type>,
+                          public SignedBitfieldTypes<Type>
+    {};
+
+    //When a BitUnion is set up, an underlying class is created which holds
+    //the actual union. This class then inherits from it, and provids the
+    //implementations for various operators. Setting things up this way
+    //prevents having to redefine these functions in every different BitUnion
+    //type. More operators could be implemented in the future, as the need
+    //arises.
+    template <class Type, class Base>
+    class BitUnionOperators : public Base
+    {
+      public:
+        operator Type () const
+        {
+            return Base::__data;
+        }
+
+        Type
+        operator=(const Type & _data)
+        {
+            Base::__data = _data;
+            return _data;
+        }
+
+        bool
+        operator<(const Base & base) const
+        {
+            return Base::__data < base.__data;
+        }
+
+        bool
+        operator==(const Base & base) const
+        {
+            return Base::__data == base.__data;
+        }
+    };
+}
+
+//This macro is a backend for other macros that specialize it slightly.
+//First, it creates/extends a namespace "BitfieldUnderlyingClasses" and
+//sticks the class which has the actual union in it, which
+//BitfieldOperators above inherits from. Putting these classes in a special
+//namespace ensures that there will be no collisions with other names as long
+//as the BitUnion names themselves are all distinct and nothing else uses
+//the BitfieldUnderlyingClasses namespace, which is unlikely. The class itself
+//creates a typedef of the "type" parameter called __DataType. This allows
+//the type to propagate outside of the macro itself in a controlled way.
+//Finally, the base storage is defined which BitfieldOperators will refer to
+//in the operators it defines. This macro is intended to be followed by
+//bitfield definitions which will end up inside it's union. As explained
+//above, these is overlayed the __data member in its entirety by each of the
+//bitfields which are defined in the union, creating shared storage with no
+//overhead.
+#define __BitUnion(type, name) \
+    namespace BitfieldUnderlyingClasses \
+    { \
+        class name; \
+    } \
+    class BitfieldUnderlyingClasses::name : \
+        public BitfieldBackend::BitfieldTypes<type> \
+    { \
+      public: \
+        typedef type __DataType; \
+        union { \
+            type __data;\
+
+//This closes off the class and union started by the above macro. It is
+//followed by a typedef which makes "name" refer to a BitfieldOperator
+//class inheriting from the class and union just defined, which completes
+//building up the type for the user.
+#define EndBitUnion(name) \
+        }; \
+    }; \
+    typedef BitfieldBackend::BitUnionOperators< \
+        BitfieldUnderlyingClasses::name::__DataType, \
+        BitfieldUnderlyingClasses::name> name;
+
+//This sets up a bitfield which has other bitfields nested inside of it. The
+//__data member functions like the "underlying storage" of the top level
+//BitUnion. Like everything else, it overlays with the top level storage, so
+//making it a regular bitfield type makes the entire thing function as a
+//regular bitfield when referred to by itself.
+#define __SubBitUnion(fieldType, first, last, name) \
+    class : public BitfieldBackend::BitfieldTypes<__DataType> \
+    { \
+      public: \
+        union { \
+            fieldType<first, last> __data;
+
+//This closes off the union created above and gives it a name. Unlike the top
+//level BitUnion, we're interested in creating an object instead of a type.
+//The operators are defined in the macro itself instead of a class for
+//technical reasons. If someone determines a way to move them to one, please
+//do so.
+#define EndSubBitUnion(name) \
+        }; \
+        inline operator const __DataType () \
+        { return __data; } \
+        \
+        inline const __DataType operator = (const __DataType & _data) \
+        { __data = _data; } \
+    } name;
+
+//Regular bitfields
+//These define macros for read/write regular bitfield based subbitfields.
+#define SubBitUnion(name, first, last) \
+    __SubBitUnion(Bitfield, first, last, name)
+
+//Regular bitfields
+//These define macros for read/write regular bitfield based subbitfields.
+#define SignedSubBitUnion(name, first, last) \
+    __SubBitUnion(SignedBitfield, first, last, name)
+
+//Use this to define an arbitrary type overlayed with bitfields.
+#define BitUnion(type, name) __BitUnion(type, name)
+
+//Use this to define conveniently sized values overlayed with bitfields.
+#define BitUnion64(name) __BitUnion(uint64_t, name)
+#define BitUnion32(name) __BitUnion(uint32_t, name)
+#define BitUnion16(name) __BitUnion(uint16_t, name)
+#define BitUnion8(name) __BitUnion(uint8_t, name)
 
 #endif // __BASE_BITFIELD_HH__
diff --git a/src/base/cprintf.hh b/src/base/cprintf.hh
index 7f8e33367..cff73a228 100644
--- a/src/base/cprintf.hh
+++ b/src/base/cprintf.hh
@@ -143,20 +143,20 @@ ccprintf(std::ostream &stream, const std::string &format,
 inline void
 ccprintf(std::ostream &stream, const std::string &format, CPRINTF_DECLARATION)
 {
-    ccprintf(stream, format, VARARGS_ALLARGS);
+    ccprintf(stream, format.c_str(), VARARGS_ALLARGS);
 }
 
 inline void
 cprintf(const std::string &format, CPRINTF_DECLARATION)
 {
-    ccprintf(std::cout, format, VARARGS_ALLARGS);
+    ccprintf(std::cout, format.c_str(), VARARGS_ALLARGS);
 }
 
 inline std::string
 csprintf(const std::string &format, CPRINTF_DECLARATION)
 {
     std::stringstream stream;
-    ccprintf(stream, format, VARARGS_ALLARGS);
+    ccprintf(stream, format.c_str(), VARARGS_ALLARGS);
     return stream.str();
 }
 
diff --git a/src/base/traceflags.py b/src/base/traceflags.py
index cb17d98d3..6b241c410 100644
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@@ -116,6 +116,7 @@ baseFlags = [
     'ISP',
     'IdeCtrl',
     'IdeDisk',
+    'Iob',
     'Interrupt',
     'LLSC',
     'LSQ',
@@ -136,6 +137,7 @@ baseFlags = [
     'PciConfigAll',
     'Pipeline',
     'Printf',
+    'Predecoder',
     'Quiesce',
     'ROB',
     'Regs',
@@ -348,16 +350,3 @@ const char *Trace::flagStrings[] =
     print >>ccfile, '};'
 
     ccfile.close()
-
-if __name__ == '__main__':
-    # This file generates the header and source files for the flags
-    # that control the tracing facility.
-
-    import sys
-
-    if len(sys.argv) != 2:
-        print "%s: Need argument (basename of cc/hh files)" % sys.argv[0]
-        sys.exit(1)
-
-    gen_hh(sys.argv[1] + '.hh')
-    gen_cc(sys.argv[1] + '.cc')
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 3e0be6ad8..4dccee0d3 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -226,7 +226,8 @@ BaseCPU::startup()
 #endif
 
     if (params->progress_interval) {
-        new CPUProgressEvent(&mainEventQueue, params->progress_interval,
+        new CPUProgressEvent(&mainEventQueue,
+                             cycles(params->progress_interval),
                              this);
     }
 }
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 7167bfde0..4d8300186 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -34,11 +34,11 @@
 
 #include <vector>
 
+#include "arch/isa_traits.hh"
 #include "base/statistics.hh"
 #include "config/full_system.hh"
 #include "sim/eventq.hh"
 #include "mem/mem_object.hh"
-#include "arch/isa_traits.hh"
 
 #if FULL_SYSTEM
 #include "arch/interrupts.hh"
@@ -50,6 +50,11 @@ class ThreadContext;
 class System;
 class Port;
 
+namespace TheISA
+{
+    class Predecoder;
+}
+
 class CPUProgressEvent : public Event
 {
   protected:
@@ -125,6 +130,7 @@ class BaseCPU : public MemObject
 
   protected:
     std::vector<ThreadContext *> threadContexts;
+    std::vector<TheISA::Predecoder *> predecoders;
 
   public:
 
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 9ccdcdccc..6c6d90076 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -171,15 +171,15 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** The kind of fault this instruction has generated. */
     Fault fault;
 
-    /** The memory request. */
-    Request *req;
-
     /** Pointer to the data for the memory access. */
     uint8_t *memData;
 
     /** The effective virtual address (lds & stores only). */
     Addr effAddr;
 
+    /** Is the effective virtual address valid. */
+    bool effAddrValid;
+
     /** The effective physical address. */
     Addr physEffAddr;
 
@@ -601,12 +601,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Returns whether or not this instruction is ready to issue. */
     bool readyToIssue() const { return status[CanIssue]; }
 
+    /** Clears this instruction being able to issue. */
+    void clearCanIssue() { status.reset(CanIssue); }
+
     /** Sets this instruction as issued from the IQ. */
     void setIssued() { status.set(Issued); }
 
     /** Returns whether or not this instruction has issued. */
     bool isIssued() const { return status[Issued]; }
 
+    /** Clears this instruction as being issued. */
+    void clearIssued() { status.reset(Issued); }
+
     /** Sets this instruction as executed. */
     void setExecuted() { status.set(Executed); }
 
@@ -729,6 +735,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
      */
     bool eaCalcDone;
 
+    /** Is this instruction's memory access uncacheable. */
+    bool isUncacheable;
+
+    /** Has this instruction generated a memory request. */
+    bool reqMade;
+
   public:
     /** Sets the effective address. */
     void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
@@ -745,6 +757,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Whether or not the memory operation is done. */
     bool memOpDone;
 
+    /** Is this instruction's memory access uncacheable. */
+    bool uncacheable() { return isUncacheable; }
+
+    /** Has this instruction generated a memory request. */
+    bool hasRequest() { return reqMade; }
+
   public:
     /** Load queue index. */
     int16_t lqIdx;
@@ -776,25 +794,25 @@ template<class T>
 inline Fault
 BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
 {
-    // Sometimes reads will get retried, so they may come through here
-    // twice.
-    if (!req) {
-        req = new Request();
-        req->setVirt(asid, addr, sizeof(T), flags, this->PC);
-        req->setThreadContext(thread->readCpuId(), threadNumber);
-    } else {
-        assert(addr == req->getVaddr());
-    }
+    reqMade = true;
+    Request *req = new Request();
+    req->setVirt(asid, addr, sizeof(T), flags, this->PC);
+    req->setThreadContext(thread->readCpuId(), threadNumber);
 
     if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
         TheISA::VMPageSize) {
+        delete req;
         return TheISA::genAlignmentFault();
     }
 
     fault = cpu->translateDataReadReq(req, thread);
 
+    if (req->isUncacheable())
+        isUncacheable = true;
+
     if (fault == NoFault) {
         effAddr = req->getVaddr();
+        effAddrValid = true;
         physEffAddr = req->getPaddr();
         memReqFlags = req->getFlags();
 
@@ -817,6 +835,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
         // Commit will have to clean up whatever happened.  Set this
         // instruction as executed.
         this->setExecuted();
+        delete req;
     }
 
     if (traceData) {
@@ -837,21 +856,25 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
         traceData->setData(data);
     }
 
-    assert(req == NULL);
-
-    req = new Request();
+    reqMade = true;
+    Request *req = new Request();
     req->setVirt(asid, addr, sizeof(T), flags, this->PC);
     req->setThreadContext(thread->readCpuId(), threadNumber);
 
     if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
         TheISA::VMPageSize) {
+        delete req;
         return TheISA::genAlignmentFault();
     }
 
     fault = cpu->translateDataWriteReq(req, thread);
 
+    if (req->isUncacheable())
+        isUncacheable = true;
+
     if (fault == NoFault) {
         effAddr = req->getVaddr();
+        effAddrValid = true;
         physEffAddr = req->getPaddr();
         memReqFlags = req->getFlags();
 #if 0
@@ -863,12 +886,8 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
 #else
         fault = cpu->write(req, data, sqIdx);
 #endif
-    }
-
-    if (res) {
-        // always return some result to keep misspeculated paths
-        // (which will ignore faults) deterministic
-        *res = (fault == NoFault) ? req->getExtraData() : 0;
+    } else {
+        delete req;
     }
 
     return fault;
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index c3d71e428..a1c866336 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -92,11 +92,13 @@ template <class Impl>
 void
 BaseDynInst<Impl>::initVars()
 {
-    req = NULL;
     memData = NULL;
     effAddr = 0;
+    effAddrValid = false;
     physEffAddr = 0;
 
+    isUncacheable = false;
+    reqMade = false;
     readyRegs = 0;
 
     instResult.integer = 0;
@@ -140,10 +142,6 @@ BaseDynInst<Impl>::initVars()
 template <class Impl>
 BaseDynInst<Impl>::~BaseDynInst()
 {
-    if (req) {
-        delete req;
-    }
-
     if (memData) {
         delete [] memData;
     }
@@ -271,7 +269,7 @@ void
 BaseDynInst<Impl>::markSrcRegReady()
 {
     if (++readyRegs == numSrcRegs()) {
-        status.set(CanIssue);
+        setCanIssue();
     }
 }
 
diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc
index 54d8c68fa..c568b1439 100644
--- a/src/cpu/exetrace.cc
+++ b/src/cpu/exetrace.cc
@@ -31,14 +31,17 @@
  *          Steve Raasch
  */
 
+#include <errno.h>
 #include <fstream>
 #include <iomanip>
 #include <sys/ipc.h>
 #include <sys/shm.h>
 
+#include "arch/predecoder.hh"
 #include "arch/regfile.hh"
 #include "arch/utility.hh"
 #include "base/loader/symtab.hh"
+#include "base/socket.hh"
 #include "config/full_system.hh"
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
@@ -64,6 +67,7 @@ static bool wasMicro = false;
 
 namespace Trace {
 SharedData *shared_data = NULL;
+ListenSocket *cosim_listener = NULL;
 
 void
 setupSharedData()
@@ -149,9 +153,96 @@ Trace::InstRecord::dump()
     ostream &outs = Trace::output();
 
     DPRINTF(Sparc, "Instruction: %#X\n", staticInst->machInst);
+    bool diff = true;
     if (IsOn(ExecRegDelta))
     {
+        diff = false;
+#ifndef NDEBUG
 #if THE_ISA == SPARC_ISA
+        static int fd = 0;
+        //Don't print what happens for each micro-op, just print out
+        //once at the last op, and for regular instructions.
+        if(!staticInst->isMicroOp() || staticInst->isLastMicroOp())
+        {
+            if(!cosim_listener)
+            {
+                int port = 8000;
+                cosim_listener = new ListenSocket();
+                while(!cosim_listener->listen(port, true))
+                {
+                    DPRINTF(GDBMisc, "Can't bind port %d\n", port);
+                    port++;
+                }
+                ccprintf(cerr, "Listening for cosimulator on port %d\n", port);
+                fd = cosim_listener->accept();
+            }
+            char prefix[] = "goli";
+            for(int p = 0; p < 4; p++)
+            {
+                for(int i = 0; i < 8; i++)
+                {
+                    uint64_t regVal;
+                    int res = read(fd, &regVal, sizeof(regVal));
+                    if(res < 0)
+                        panic("First read call failed! %s\n", strerror(errno));
+                    regVal = TheISA::gtoh(regVal);
+                    uint64_t realRegVal = thread->readIntReg(p * 8 + i);
+                    if((regVal & 0xffffffffULL) != (realRegVal & 0xffffffffULL))
+                    {
+                        DPRINTF(ExecRegDelta, "Register %s%d should be %#x but is %#x.\n", prefix[p], i, regVal, realRegVal);
+                        diff = true;
+                    }
+                    //ccprintf(outs, "%s%d m5 = %#x statetrace = %#x\n", prefix[p], i, realRegVal, regVal);
+                }
+            }
+            /*for(int f = 0; f <= 62; f+=2)
+            {
+                uint64_t regVal;
+                int res = read(fd, &regVal, sizeof(regVal));
+                if(res < 0)
+                    panic("First read call failed! %s\n", strerror(errno));
+                regVal = TheISA::gtoh(regVal);
+                uint64_t realRegVal = thread->readFloatRegBits(f, 64);
+                if(regVal != realRegVal)
+                {
+                    DPRINTF(ExecRegDelta, "Register f%d should be %#x but is %#x.\n", f, regVal, realRegVal);
+                }
+            }*/
+            uint64_t regVal;
+            int res = read(fd, &regVal, sizeof(regVal));
+            if(res < 0)
+                panic("First read call failed! %s\n", strerror(errno));
+            regVal = TheISA::gtoh(regVal);
+            uint64_t realRegVal = thread->readNextPC();
+            if(regVal != realRegVal)
+            {
+                DPRINTF(ExecRegDelta, "Register pc should be %#x but is %#x.\n", regVal, realRegVal);
+                diff = true;
+            }
+            res = read(fd, &regVal, sizeof(regVal));
+            if(res < 0)
+                panic("First read call failed! %s\n", strerror(errno));
+            regVal = TheISA::gtoh(regVal);
+            realRegVal = thread->readNextNPC();
+            if(regVal != realRegVal)
+            {
+                DPRINTF(ExecRegDelta, "Register npc should be %#x but is %#x.\n", regVal, realRegVal);
+                diff = true;
+            }
+            res = read(fd, &regVal, sizeof(regVal));
+            if(res < 0)
+                panic("First read call failed! %s\n", strerror(errno));
+            regVal = TheISA::gtoh(regVal);
+            realRegVal = thread->readIntReg(SparcISA::NumIntArchRegs + 2);
+            if((regVal & 0xF) != (realRegVal & 0xF))
+            {
+                DPRINTF(ExecRegDelta, "Register ccr should be %#x but is %#x.\n", regVal, realRegVal);
+                diff = true;
+            }
+        }
+#endif
+#endif
+#if 0 //THE_ISA == SPARC_ISA
         //Don't print what happens for each micro-op, just print out
         //once at the last op, and for regular instructions.
         if(!staticInst->isMicroOp() || staticInst->isLastMicroOp())
@@ -210,7 +301,8 @@ Trace::InstRecord::dump()
         }
 #endif
     }
-    else if (IsOn(ExecIntel)) {
+    if(!diff) {
+    } else if (IsOn(ExecIntel)) {
         ccprintf(outs, "%7d ) ", when);
         outs << "0x" << hex << PC << ":\t";
         if (staticInst->isLoad()) {
@@ -302,6 +394,7 @@ Trace::InstRecord::dump()
         outs << endl;
     }
 #if THE_ISA == SPARC_ISA && FULL_SYSTEM
+    static TheISA::Predecoder predecoder(NULL);
     // Compare
     if (IsOn(ExecLegion))
     {
@@ -556,9 +649,13 @@ Trace::InstRecord::dump()
                              << staticInst->disassemble(m5Pc, debugSymbolTable)
                              << endl;
 
+                        predecoder.setTC(thread);
+                        predecoder.moreBytes(m5Pc, 0, shared_data->instruction);
+
+                        assert(predecoder.extMachInstRead());
+
                         StaticInstPtr legionInst =
-                            StaticInst::decode(makeExtMI(shared_data->instruction,
-                                        thread));
+                            StaticInst::decode(predecoder.getExtMachInst());
                         outs << setfill(' ') << setw(15)
                              << " Legion Inst: "
                              << "0x" << setw(8) << setfill('0') << hex
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index 5a375a4b8..34754d3c5 100644
--- a/src/cpu/o3/alpha/cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
     Param<int> clock;
     Param<int> phase;
     Param<int> numThreads;
+Param<int> cpu_id;
 Param<int> activity;
 
 #if FULL_SYSTEM
 SimObjectParam<System *> system;
-Param<int> cpu_id;
 SimObjectParam<AlphaISA::ITB *> itb;
 SimObjectParam<AlphaISA::DTB *> dtb;
 Param<Tick> profile;
@@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
     INIT_PARAM(clock, "clock speed"),
     INIT_PARAM_DFLT(phase, "clock phase", 0),
     INIT_PARAM(numThreads, "number of HW thread contexts"),
+    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM_DFLT(activity, "Initial activity count", 0),
 
 #if FULL_SYSTEM
     INIT_PARAM(system, "System object"),
-    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(itb, "Instruction translation buffer"),
     INIT_PARAM(dtb, "Data translation buffer"),
     INIT_PARAM(profile, ""),
@@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU)
     AlphaSimpleParams *params = new AlphaSimpleParams;
 
     params->clock = clock;
+    params->phase = phase;
 
     params->name = getInstanceName();
     params->numberOfThreads = actual_num_threads;
+    params->cpu_id = cpu_id;
     params->activity = activity;
 
 #if FULL_SYSTEM
     params->system = system;
-    params->cpu_id = cpu_id;
     params->itb = itb;
     params->dtb = dtb;
     params->profile = profile;
diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh
index b91972704..304ee6c38 100644
--- a/src/cpu/o3/alpha/cpu_impl.hh
+++ b/src/cpu/o3/alpha/cpu_impl.hh
@@ -114,6 +114,7 @@ AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params)
 #endif
         // Give the thread the TC.
         this->thread[i]->tc = tc;
+        this->thread[i]->setCpuId(params->cpu_id);
 
         // Add the TC to the CPU's list of TC's.
         this->threadContexts.push_back(tc);
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 0d7d82529..e2ad23954 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -247,6 +247,11 @@ class DefaultCommit
     /** Handles squashing due to an TC write. */
     void squashFromTC(unsigned tid);
 
+#if FULL_SYSTEM
+    /** Handles processing an interrupt. */
+    void handleInterrupt();
+#endif // FULL_SYSTEM
+
     /** Commits as many instructions as possible. */
     void commitInsts();
 
@@ -409,6 +414,16 @@ class DefaultCommit
     /** The sequence number of the youngest valid instruction in the ROB. */
     InstSeqNum youngestSeqNum[Impl::MaxThreads];
 
+    /** Records if there is a trap currently in flight. */
+    bool trapInFlight[Impl::MaxThreads];
+
+    /** Records if there were any stores committed this cycle. */
+    bool committedStores[Impl::MaxThreads];
+
+    /** Records if commit should check if the ROB is truly empty (see
+        commit_impl.hh). */
+    bool checkEmptyROB[Impl::MaxThreads];
+
     /** Pointer to the list of active threads. */
     std::list<unsigned> *activeThreads;
 
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 18fb2aaa3..3fd85595f 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -118,6 +118,9 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
     for (int i=0; i < numThreads; i++) {
         commitStatus[i] = Idle;
         changedROBNumEntries[i] = false;
+        checkEmptyROB[i] = false;
+        trapInFlight[i] = false;
+        committedStores[i] = false;
         trapSquash[i] = false;
         tcSquash[i] = false;
         PC[i] = nextPC[i] = nextNPC[i] = 0;
@@ -335,6 +338,7 @@ DefaultCommit<Impl>::initStage()
     for (int i=0; i < numThreads; i++) {
         toIEW->commitInfo[i].usedROB = true;
         toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i);
+        toIEW->commitInfo[i].emptyROB = true;
     }
 
     cpu->activityThisCycle();
@@ -473,14 +477,14 @@ DefaultCommit<Impl>::generateTrapEvent(unsigned tid)
     TrapEvent *trap = new TrapEvent(this, tid);
 
     trap->schedule(curTick + trapLatency);
-
-    thread[tid]->trapPending = true;
+    trapInFlight[tid] = true;
 }
 
 template <class Impl>
 void
 DefaultCommit<Impl>::generateTCEvent(unsigned tid)
 {
+    assert(!trapInFlight[tid]);
     DPRINTF(Commit, "Generating TC squash event for [tid:%i]\n", tid);
 
     tcSquash[tid] = true;
@@ -495,7 +499,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
     // Hopefully this doesn't mess things up.  Basically I want to squash
     // all instructions of this thread.
     InstSeqNum squashed_inst = rob->isEmpty() ?
-        0 : rob->readHeadInst(tid)->seqNum - 1;;
+        0 : rob->readHeadInst(tid)->seqNum - 1;
 
     // All younger instructions will be squashed. Set the sequence
     // number as the youngest instruction in the ROB (0 in this case.
@@ -532,6 +536,7 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid)
 
     thread[tid]->trapPending = false;
     thread[tid]->inSyscall = false;
+    trapInFlight[tid] = false;
 
     trapSquash[tid] = false;
 
@@ -580,6 +585,10 @@ DefaultCommit<Impl>::tick()
     while (threads != end) {
         unsigned tid = *threads++;
 
+        // Clear the bit saying if the thread has committed stores
+        // this cycle.
+        committedStores[tid] = false;
+
         if (commitStatus[tid] == ROBSquashing) {
 
             if (rob->isDoneSquashing(tid)) {
@@ -635,16 +644,11 @@ DefaultCommit<Impl>::tick()
     updateStatus();
 }
 
+#if FULL_SYSTEM
 template <class Impl>
 void
-DefaultCommit<Impl>::commit()
+DefaultCommit<Impl>::handleInterrupt()
 {
-
-    //////////////////////////////////////
-    // Check for interrupts
-    //////////////////////////////////////
-
-#if FULL_SYSTEM
     if (interrupt != NoFault) {
         // Wait until the ROB is empty and all stores have drained in
         // order to enter the interrupt.
@@ -653,6 +657,12 @@ DefaultCommit<Impl>::commit()
             // an interrupt needed to be handled.
             DPRINTF(Commit, "Interrupt detected.\n");
 
+            Fault new_interrupt = cpu->getInterrupts();
+            assert(new_interrupt != NoFault);
+
+            // Clear the interrupt now that it's going to be handled
+            toIEW->commitInfo[0].clearInterrupt = true;
+
             assert(!thread[0]->inSyscall);
             thread[0]->inSyscall = true;
 
@@ -666,16 +676,14 @@ DefaultCommit<Impl>::commit()
             // Generate trap squash event.
             generateTrapEvent(0);
 
-            // Clear the interrupt now that it's been handled
-            toIEW->commitInfo[0].clearInterrupt = true;
             interrupt = NoFault;
         } else {
             DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n");
         }
-    } else if (cpu->check_interrupts(cpu->tcBase(0)) &&
-        commitStatus[0] != TrapPending &&
-        !trapSquash[0] &&
-        !tcSquash[0]) {
+    } else if (commitStatus[0] != TrapPending &&
+               cpu->check_interrupts(cpu->tcBase(0)) &&
+               !trapSquash[0] &&
+               !tcSquash[0]) {
         // Process interrupts if interrupts are enabled, not in PAL
         // mode, and no other traps or external squashes are currently
         // pending.
@@ -691,7 +699,21 @@ DefaultCommit<Impl>::commit()
             toIEW->commitInfo[0].interruptPending = true;
         }
     }
+}
+#endif // FULL_SYSTEM
+
+template <class Impl>
+void
+DefaultCommit<Impl>::commit()
+{
 
+#if FULL_SYSTEM
+    // Check for any interrupt, and start processing it.  Or if we
+    // have an outstanding interrupt and are at a point when it is
+    // valid to take an interrupt, process it.
+    if (cpu->check_interrupts(cpu->tcBase(0))) {
+        handleInterrupt();
+    }
 #endif // FULL_SYSTEM
 
     ////////////////////////////////////
@@ -709,6 +731,7 @@ DefaultCommit<Impl>::commit()
             assert(!tcSquash[tid]);
             squashFromTrap(tid);
         } else if (tcSquash[tid] == true) {
+            assert(commitStatus[tid] != TrapPending);
             squashFromTC(tid);
         }
 
@@ -753,6 +776,7 @@ DefaultCommit<Impl>::commit()
                 bdelay_done_seq_num--;
 #endif
             }
+
             // All younger instructions will be squashed. Set the sequence
             // number as the youngest instruction in the ROB.
             youngestSeqNum[tid] = squashed_inst;
@@ -817,13 +841,29 @@ DefaultCommit<Impl>::commit()
             toIEW->commitInfo[tid].usedROB = true;
             toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
 
-            if (rob->isEmpty(tid)) {
-                toIEW->commitInfo[tid].emptyROB = true;
-            }
-
             wroteToTimeBuffer = true;
             changedROBNumEntries[tid] = false;
+            if (rob->isEmpty(tid))
+                checkEmptyROB[tid] = true;
         }
+
+        // ROB is only considered "empty" for previous stages if: a)
+        // ROB is empty, b) there are no outstanding stores, c) IEW
+        // stage has received any information regarding stores that
+        // committed.
+        // c) is checked by making sure to not consider the ROB empty
+        // on the same cycle as when stores have been committed.
+        // @todo: Make this handle multi-cycle communication between
+        // commit and IEW.
+        if (checkEmptyROB[tid] && rob->isEmpty(tid) &&
+            !iewStage->hasStoresToWB() && !committedStores[tid]) {
+            checkEmptyROB[tid] = false;
+            toIEW->commitInfo[tid].usedROB = true;
+            toIEW->commitInfo[tid].emptyROB = true;
+            toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
+            wroteToTimeBuffer = true;
+        }
+
     }
 }
 
@@ -966,8 +1006,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         // and committed this instruction.
         thread[tid]->funcExeInst--;
 
-        head_inst->setAtCommit();
-
         if (head_inst->isNonSpeculative() ||
             head_inst->isStoreConditional() ||
             head_inst->isMemBarrier() ||
@@ -977,19 +1015,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
                     "instruction [sn:%lli] at the head of the ROB, PC %#x.\n",
                     head_inst->seqNum, head_inst->readPC());
 
-            // Hack to make sure syscalls/memory barriers/quiesces
-            // aren't executed until all stores write back their data.
-            // This direct communication shouldn't be used for
-            // anything other than this.
-            if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
-                    head_inst->isQuiesce()) &&
-                iewStage->hasStoresToWB())
-            {
+            if (inst_num > 0 || iewStage->hasStoresToWB()) {
                 DPRINTF(Commit, "Waiting for all stores to writeback.\n");
                 return false;
-            } else if (inst_num > 0 || iewStage->hasStoresToWB()) {
-                DPRINTF(Commit, "Waiting to become head of commit.\n");
-                return false;
             }
 
             toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum;
@@ -1002,6 +1030,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
 
             return false;
         } else if (head_inst->isLoad()) {
+            if (inst_num > 0 || iewStage->hasStoresToWB()) {
+                DPRINTF(Commit, "Waiting for all stores to writeback.\n");
+                return false;
+            }
+
+            assert(head_inst->uncacheable());
             DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n",
                     head_inst->seqNum, head_inst->readPC());
 
@@ -1025,8 +1059,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         panic("Thread sync instructions are not handled yet.\n");
     }
 
+    // Check if the instruction caused a fault.  If so, trap.
+    Fault inst_fault = head_inst->getFault();
+
     // Stores mark themselves as completed.
-    if (!head_inst->isStore()) {
+    if (!head_inst->isStore() && inst_fault == NoFault) {
         head_inst->setCompleted();
     }
 
@@ -1038,9 +1075,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
     }
 #endif
 
-    // Check if the instruction caused a fault.  If so, trap.
-    Fault inst_fault = head_inst->getFault();
-
     // DTB will sometimes need the machine instruction for when
     // faults happen.  So we will set it here, prior to the DTB
     // possibly needing it for its fault.
@@ -1048,7 +1082,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
 
     if (inst_fault != NoFault) {
-        head_inst->setCompleted();
         DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n",
                 head_inst->seqNum, head_inst->readPC());
 
@@ -1057,6 +1090,8 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
             return false;
         }
 
+        head_inst->setCompleted();
+
 #if USE_CHECKER
         if (cpu->checker && head_inst->isStore()) {
             cpu->checker->verify(head_inst);
@@ -1082,6 +1117,14 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
 
         commitStatus[tid] = TrapPending;
 
+        if (head_inst->traceData) {
+            head_inst->traceData->setFetchSeq(head_inst->seqNum);
+            head_inst->traceData->setCPSeq(thread[tid]->numInst);
+            head_inst->traceData->dump();
+            delete head_inst->traceData;
+            head_inst->traceData = NULL;
+        }
+
         // Generate trap squash event.
         generateTrapEvent(tid);
 //        warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
@@ -1123,6 +1166,10 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
     // Finally clear the head ROB entry.
     rob->retireHead(tid);
 
+    // If this was a store, record it for this cycle.
+    if (head_inst->isStore())
+        committedStores[tid] = true;
+
     // Return true to indicate that we have committed an instruction.
     return true;
 }
@@ -1167,7 +1214,8 @@ DefaultCommit<Impl>::getInsts()
         int tid = inst->threadNumber;
 
         if (!inst->isSquashed() &&
-            commitStatus[tid] != ROBSquashing) {
+            commitStatus[tid] != ROBSquashing &&
+            commitStatus[tid] != TrapPending) {
             changedROBNumEntries[tid] = true;
 
             DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n",
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 38e6a0b5b..354e3c490 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -466,7 +466,7 @@ FullO3CPU<Impl>::tick()
             lastRunningCycle = curTick;
             timesIdled++;
         } else {
-            tickEvent.schedule(curTick + cycles(1));
+            tickEvent.schedule(nextCycle(curTick + cycles(1)));
             DPRINTF(O3CPU, "Scheduling next tick!\n");
         }
     }
@@ -886,7 +886,7 @@ FullO3CPU<Impl>::resume()
 #endif
 
     if (!tickEvent.scheduled())
-        tickEvent.schedule(curTick);
+        tickEvent.schedule(nextCycle());
     _status = Running;
 }
 
@@ -979,11 +979,11 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
         ThreadContext *tc = threadContexts[i];
         if (tc->status() == ThreadContext::Active && _status != Running) {
             _status = Running;
-            tickEvent.schedule(curTick);
+            tickEvent.schedule(nextCycle());
         }
     }
     if (!tickEvent.scheduled())
-        tickEvent.schedule(curTick);
+        tickEvent.schedule(nextCycle());
 }
 
 template <class Impl>
@@ -1393,7 +1393,7 @@ FullO3CPU<Impl>::wakeCPU()
 
     idleCycles += (curTick - 1) - lastRunningCycle;
 
-    tickEvent.schedule(curTick);
+    tickEvent.schedule(nextCycle());
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index ea374dd57..0ab20ba2a 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -146,9 +146,9 @@ class FullO3CPU : public BaseO3CPU
     void scheduleTickEvent(int delay)
     {
         if (tickEvent.squashed())
-            tickEvent.reschedule(curTick + cycles(delay));
+            tickEvent.reschedule(nextCycle(curTick + cycles(delay)));
         else if (!tickEvent.scheduled())
-            tickEvent.schedule(curTick + cycles(delay));
+            tickEvent.schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule tick event, regardless of its current state. */
@@ -186,9 +186,11 @@ class FullO3CPU : public BaseO3CPU
     {
         // Schedule thread to activate, regardless of its current state.
         if (activateThreadEvent[tid].squashed())
-            activateThreadEvent[tid].reschedule(curTick + cycles(delay));
+            activateThreadEvent[tid].
+                reschedule(nextCycle(curTick + cycles(delay)));
         else if (!activateThreadEvent[tid].scheduled())
-            activateThreadEvent[tid].schedule(curTick + cycles(delay));
+            activateThreadEvent[tid].
+                schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule actiavte thread event, regardless of its current state. */
@@ -235,9 +237,11 @@ class FullO3CPU : public BaseO3CPU
     {
         // Schedule thread to activate, regardless of its current state.
         if (deallocateContextEvent[tid].squashed())
-            deallocateContextEvent[tid].reschedule(curTick + cycles(delay));
+            deallocateContextEvent[tid].
+                reschedule(nextCycle(curTick + cycles(delay)));
         else if (!deallocateContextEvent[tid].scheduled())
-            deallocateContextEvent[tid].schedule(curTick + cycles(delay));
+            deallocateContextEvent[tid].
+                schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule thread deallocation in CPU */
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 8347ed775..da7ce00f5 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -33,6 +33,7 @@
 #define __CPU_O3_FETCH_HH__
 
 #include "arch/utility.hh"
+#include "arch/predecoder.hh"
 #include "base/statistics.hh"
 #include "base/timebuf.hh"
 #include "cpu/pc_event.hh"
@@ -338,6 +339,9 @@ class DefaultFetch
     /** BPredUnit. */
     BPredUnit branchPred;
 
+    /** Predecoder. */
+    TheISA::Predecoder predecoder;
+
     /** Per-thread fetch PC. */
     Addr PC[Impl::MaxThreads];
 
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index ac0149d18..663cd3142 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -103,6 +103,7 @@ DefaultFetch<Impl>::IcachePort::recvRetry()
 template<class Impl>
 DefaultFetch<Impl>::DefaultFetch(Params *params)
     : branchPred(params),
+      predecoder(NULL),
       decodeToFetchDelay(params->decodeToFetchDelay),
       renameToFetchDelay(params->renameToFetchDelay),
       iewToFetchDelay(params->iewToFetchDelay),
@@ -619,6 +620,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
                 fault = TheISA::genMachineCheckFault();
                 delete mem_req;
                 memReq[tid] = NULL;
+                warn("Bad address!\n");
             }
             assert(retryPkt == NULL);
             assert(retryTid == -1);
@@ -669,11 +671,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
     // Get rid of the retrying packet if it was from this thread.
     if (retryTid == tid) {
         assert(cacheBlocked);
-        cacheBlocked = false;
-        retryTid = -1;
-        delete retryPkt->req;
-        delete retryPkt;
+        if (retryPkt) {
+            delete retryPkt->req;
+            delete retryPkt;
+        }
         retryPkt = NULL;
+        retryTid = -1;
     }
 
     fetchStatus[tid] = Squashing;
@@ -1117,13 +1120,10 @@ DefaultFetch<Impl>::fetch(bool &status_change)
             inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
                         (&cacheData[tid][offset]));
 
-#if THE_ISA == ALPHA_ISA
-            ext_inst = TheISA::makeExtMI(inst, fetch_PC);
-#elif THE_ISA == SPARC_ISA
-            ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC());
-#elif THE_ISA == MIPS_ISA
-            ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC());
-#endif
+            predecoder.setTC(cpu->thread[tid]->getTC());
+            predecoder.moreBytes(fetch_PC, 0, inst);
+
+            ext_inst = predecoder.getExtMachInst();
 
             // Create a new DynInst from the instruction fetched.
             DynInstPtr instruction = new DynInst(ext_inst,
@@ -1152,7 +1152,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 
             ///FIXME This needs to be more robust in dealing with delay slots
 #if !ISA_HAS_DELAY_SLOT
-            predicted_branch |=
+//	    predicted_branch |=
 #endif
             lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
             predicted_branch |= (next_PC != fetch_NPC);
@@ -1223,7 +1223,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         // until commit handles the fault.  The only other way it can
         // wake up is if a squash comes along and changes the PC.
 #if FULL_SYSTEM
-        assert(numInst != fetchWidth);
+        assert(numInst < fetchWidth);
         // Get a sequence number.
         inst_seq = cpu->getAndIncrementInstSeq();
         // We will use a nop in order to carry the fault.
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index f24eaf2c4..4883e5a5c 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1153,19 +1153,6 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
             inst->setCanCommit();
             instQueue.insertBarrier(inst);
             add_to_iq = false;
-        } else if (inst->isNonSpeculative()) {
-            DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
-                    "encountered, skipping.\n", tid);
-
-            // Same as non-speculative stores.
-            inst->setCanCommit();
-
-            // Specifically insert it as nonspeculative.
-            instQueue.insertNonSpec(inst);
-
-            ++iewDispNonSpecInsts;
-
-            add_to_iq = false;
         } else if (inst->isNop()) {
             DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, "
                     "skipping.\n", tid);
@@ -1193,6 +1180,20 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
         } else {
             add_to_iq = true;
         }
+        if (inst->isNonSpeculative()) {
+            DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
+                    "encountered, skipping.\n", tid);
+
+            // Same as non-speculative stores.
+            inst->setCanCommit();
+
+            // Specifically insert it as nonspeculative.
+            instQueue.insertNonSpec(inst);
+
+            ++iewDispNonSpecInsts;
+
+            add_to_iq = false;
+        }
 
         // If the instruction queue is not full, then add the
         // instruction.
@@ -1379,6 +1380,7 @@ DefaultIEW<Impl>::executeInsts()
                     predictedNotTakenIncorrect++;
                 }
             } else if (ldstQueue.violation(tid)) {
+                assert(inst->isMemRef());
                 // If there was an ordering violation, then get the
                 // DynInst that caused the violation.  Note that this
                 // clears the violation signal.
@@ -1391,10 +1393,10 @@ DefaultIEW<Impl>::executeInsts()
 
                 // Ensure the violating instruction is older than
                 // current squash
-                if (fetchRedirect[tid] &&
-                    violator->seqNum >= toCommit->squashedSeqNum[tid])
+/*                if (fetchRedirect[tid] &&
+                    violator->seqNum >= toCommit->squashedSeqNum[tid] + 1)
                     continue;
-
+*/
                 fetchRedirect[tid] = true;
 
                 // Tell the instruction queue that a violation has occured.
@@ -1414,6 +1416,33 @@ DefaultIEW<Impl>::executeInsts()
 
                 squashDueToMemBlocked(inst, tid);
             }
+        } else {
+            // Reset any state associated with redirects that will not
+            // be used.
+            if (ldstQueue.violation(tid)) {
+                assert(inst->isMemRef());
+
+                DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
+
+                DPRINTF(IEW, "LDSTQ detected a violation.  Violator PC: "
+                        "%#x, inst PC: %#x.  Addr is: %#x.\n",
+                        violator->readPC(), inst->readPC(), inst->physEffAddr);
+                DPRINTF(IEW, "Violation will not be handled because "
+                        "already squashing\n");
+
+                ++memOrderViolationEvents;
+            }
+            if (ldstQueue.loadBlocked(tid) &&
+                !ldstQueue.isLoadBlockedHandled(tid)) {
+                DPRINTF(IEW, "Load operation couldn't execute because the "
+                        "memory system is blocked.  PC: %#x [sn:%lli]\n",
+                        inst->readPC(), inst->seqNum);
+                DPRINTF(IEW, "Blocked load will not be handled because "
+                        "already squashing\n");
+
+                ldstQueue.setLoadBlockedHandled(tid);
+            }
+
         }
     }
 
@@ -1563,6 +1592,7 @@ DefaultIEW<Impl>::tick()
             //DPRINTF(IEW,"NonspecInst from thread %i",tid);
             if (fromCommit->commitInfo[tid].uncached) {
                 instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad);
+                fromCommit->commitInfo[tid].uncachedLoad->setAtCommit();
             } else {
                 instQueue.scheduleNonSpec(
                     fromCommit->commitInfo[tid].nonSpecSeqNum);
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index d5781d89d..79e03d4bf 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -829,6 +829,8 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
 
     unsigned tid = (*inst_it).second->threadNumber;
 
+    (*inst_it).second->setAtCommit();
+
     (*inst_it).second->setCanIssue();
 
     if (!(*inst_it).second->isMemRef()) {
@@ -960,6 +962,8 @@ template <class Impl>
 void
 InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
 {
+    DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum);
+    resched_inst->clearCanIssue();
     memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
 }
 
@@ -984,7 +988,6 @@ InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
     completed_inst->memOpDone = true;
 
     memDepUnit[tid].completed(completed_inst);
-
     count[tid]--;
 }
 
@@ -1084,16 +1087,21 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
 
                     ++iqSquashedOperandsExamined;
                 }
-            } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
+            } else if (!squashed_inst->isStoreConditional() ||
+                       !squashed_inst->isCompleted()) {
                 NonSpecMapIt ns_inst_it =
                     nonSpecInsts.find(squashed_inst->seqNum);
                 assert(ns_inst_it != nonSpecInsts.end());
+                if (ns_inst_it == nonSpecInsts.end()) {
+                    assert(squashed_inst->getFault() != NoFault);
+                } else {
 
-                (*ns_inst_it).second = NULL;
+                    (*ns_inst_it).second = NULL;
 
-                nonSpecInsts.erase(ns_inst_it);
+                    nonSpecInsts.erase(ns_inst_it);
 
-                ++iqSquashedNonSpecRemoved;
+                    ++iqSquashedNonSpecRemoved;
+                }
             }
 
             // Might want to also clear out the head of the dependency graph.
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 2419afe29..1b10843f5 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -497,6 +497,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
         (load_idx != loadHead || !load_inst->isAtCommit())) {
         iewStage->rescheduleMemInst(load_inst);
         ++lsqRescheduledLoads;
+
+        // Must delete request now that it wasn't handed off to
+        // memory.  This is quite ugly.  @todo: Figure out the proper
+        // place to really handle request deletes.
+        delete req;
         return TheISA::genMachineCheckFault();
     }
 
@@ -534,6 +539,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
 
         if (store_size == 0)
             continue;
+        else if (storeQueue[store_idx].inst->uncacheable())
+            continue;
+
+        assert(storeQueue[store_idx].inst->effAddrValid);
 
         // Check if the store data is within the lower and upper bounds of
         // addresses that the request needs.
@@ -550,7 +559,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             storeQueue[store_idx].inst->effAddr;
 
         // If the store's data has all of the data needed, we can forward.
-        if (store_has_lower_limit && store_has_upper_limit) {
+        if ((store_has_lower_limit && store_has_upper_limit)) {
             // Get shift amount for offset into the store's data.
             int shift_amt = req->getVaddr() & (store_size - 1);
             // @todo: Magic number, assumes byte addressing
@@ -596,6 +605,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // If it's already been written back, then don't worry about
             // stalling on it.
             if (storeQueue[store_idx].completed) {
+                panic("Should not check one of these");
                 continue;
             }
 
@@ -614,6 +624,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // rescheduled eventually
             iewStage->rescheduleMemInst(load_inst);
             iewStage->decrWb(load_inst->seqNum);
+            load_inst->clearIssued();
             ++lsqRescheduledLoads;
 
             // Do not generate a writeback event as this instruction is not
@@ -622,7 +633,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
                     "Store idx %i to load addr %#x\n",
                     store_idx, req->getVaddr());
 
-            ++lsqBlockedLoads;
+            // Must delete request now that it wasn't handed off to
+            // memory.  This is quite ugly.  @todo: Figure out the
+            // proper place to really handle request deletes.
+            delete req;
+
             return NoFault;
         }
     }
@@ -654,8 +669,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // Delete state and data packet because a load retry
             // initiates a pipeline restart; it does not retry.
             delete state;
+            delete data_pkt->req;
             delete data_pkt;
 
+            req = NULL;
+
             if (result == Packet::BadAddress) {
                 return TheISA::genMachineCheckFault();
             }
@@ -669,6 +687,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
     // If the cache was blocked, or has become blocked due to the access,
     // handle it.
     if (lsq->cacheBlocked()) {
+        if (req)
+            delete req;
+
         ++lsqCacheBlocked;
 
         iewStage->decrWb(load_inst->seqNum);
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 3ba22a530..e70c960b3 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -81,6 +81,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
     if (isSwitchedOut() || inst->isSquashed()) {
         iewStage->decrWb(inst->seqNum);
         delete state;
+        delete pkt->req;
         delete pkt;
         return;
     } else {
@@ -94,6 +95,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
     }
 
     delete state;
+    delete pkt->req;
     delete pkt;
 }
 
@@ -403,12 +405,15 @@ template <class Impl>
 Fault
 LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
 {
+    using namespace TheISA;
     // Execute a specific load.
     Fault load_fault = NoFault;
 
     DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
             inst->readPC(),inst->seqNum);
 
+    assert(!inst->isSquashed());
+
     load_fault = inst->initiateAcc();
 
     // If the instruction faulted, then we need to send it along to commit
@@ -418,12 +423,44 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
         // realizes there is activity.
         // Mark it as executed unless it is an uncached load that
         // needs to hit the head of commit.
-        if (!(inst->req && inst->req->isUncacheable()) ||
+        if (!(inst->hasRequest() && inst->uncacheable()) ||
             inst->isAtCommit()) {
             inst->setExecuted();
         }
         iewStage->instToCommit(inst);
         iewStage->activityThisCycle();
+    } else if (!loadBlocked()) {
+        assert(inst->effAddrValid);
+        int load_idx = inst->lqIdx;
+        incrLdIdx(load_idx);
+        while (load_idx != loadTail) {
+            // Really only need to check loads that have actually executed
+
+            // @todo: For now this is extra conservative, detecting a
+            // violation if the addresses match assuming all accesses
+            // are quad word accesses.
+
+            // @todo: Fix this, magic number being used here
+            if (loadQueue[load_idx]->effAddrValid &&
+                (loadQueue[load_idx]->effAddr >> 8) ==
+                (inst->effAddr >> 8)) {
+                // A load incorrectly passed this load.  Squash and refetch.
+                // For now return a fault to show that it was unsuccessful.
+                DynInstPtr violator = loadQueue[load_idx];
+                if (!memDepViolator ||
+                    (violator->seqNum < memDepViolator->seqNum)) {
+                    memDepViolator = violator;
+                } else {
+                    break;
+                }
+
+                ++lsqMemOrderViolation;
+
+                return genMachineCheckFault();
+            }
+
+            incrLdIdx(load_idx);
+        }
     }
 
     return load_fault;
@@ -442,6 +479,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
     DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
             store_inst->readPC(), store_inst->seqNum);
 
+    assert(!store_inst->isSquashed());
+
     // Check the recently completed loads to see if any match this store's
     // address.  If so, then we have a memory ordering violation.
     int load_idx = store_inst->lqIdx;
@@ -465,32 +504,36 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
         ++storesToWB;
     }
 
-    if (!memDepViolator) {
-        while (load_idx != loadTail) {
-            // Really only need to check loads that have actually executed
-            // It's safe to check all loads because effAddr is set to
-            // InvalAddr when the dyn inst is created.
-
-            // @todo: For now this is extra conservative, detecting a
-            // violation if the addresses match assuming all accesses
-            // are quad word accesses.
-
-            // @todo: Fix this, magic number being used here
-            if ((loadQueue[load_idx]->effAddr >> 8) ==
-                (store_inst->effAddr >> 8)) {
-                // A load incorrectly passed this store.  Squash and refetch.
-                // For now return a fault to show that it was unsuccessful.
-                memDepViolator = loadQueue[load_idx];
-                ++lsqMemOrderViolation;
-
-                return genMachineCheckFault();
+    assert(store_inst->effAddrValid);
+    while (load_idx != loadTail) {
+        // Really only need to check loads that have actually executed
+        // It's safe to check all loads because effAddr is set to
+        // InvalAddr when the dyn inst is created.
+
+        // @todo: For now this is extra conservative, detecting a
+        // violation if the addresses match assuming all accesses
+        // are quad word accesses.
+
+        // @todo: Fix this, magic number being used here
+        if (loadQueue[load_idx]->effAddrValid &&
+            (loadQueue[load_idx]->effAddr >> 8) ==
+            (store_inst->effAddr >> 8)) {
+            // A load incorrectly passed this store.  Squash and refetch.
+            // For now return a fault to show that it was unsuccessful.
+            DynInstPtr violator = loadQueue[load_idx];
+            if (!memDepViolator ||
+                (violator->seqNum < memDepViolator->seqNum)) {
+                memDepViolator = violator;
+            } else {
+                break;
             }
 
-            incrLdIdx(load_idx);
+            ++lsqMemOrderViolation;
+
+            return genMachineCheckFault();
         }
 
-        // If we've reached this point, there was no violation.
-        memDepViolator = NULL;
+        incrLdIdx(load_idx);
     }
 
     return store_fault;
@@ -660,7 +703,7 @@ LSQUnit<Impl>::writebackStores()
                 panic("LSQ sent out a bad address for a completed store!");
             }
             // Need to handle becoming blocked on a store.
-            DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will"
+            DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
                     "retry later\n",
                     inst->seqNum);
             isStoreBlocked = true;
@@ -735,6 +778,10 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         }
     }
 
+    if (memDepViolator && squashed_num < memDepViolator->seqNum) {
+        memDepViolator = NULL;
+    }
+
     int store_idx = storeTail;
     decrStIdx(store_idx);
 
@@ -764,6 +811,11 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         storeQueue[store_idx].inst = NULL;
         storeQueue[store_idx].canWB = 0;
 
+        // Must delete request now that it wasn't handed off to
+        // memory.  This is quite ugly.  @todo: Figure out the proper
+        // place to really handle request deletes.
+        delete storeQueue[store_idx].req;
+
         storeQueue[store_idx].req = NULL;
         --stores;
 
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index f19980fd5..64558efaa 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -214,6 +214,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
             inst_entry->regsReady = true;
         }
 
+        // Clear the bit saying this instruction can issue.
+        inst->clearCanIssue();
+
         // Add this instruction to the list of dependents.
         store_entry->dependInsts.push_back(inst_entry);
 
@@ -357,7 +360,6 @@ void
 MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
 {
     DynInstPtr temp_inst;
-    bool found_inst = false;
 
     // For now this replay function replays all waiting memory ops.
     while (!instsToReplay.empty()) {
@@ -371,14 +373,8 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
 
         moveToReady(inst_entry);
 
-        if (temp_inst == inst) {
-            found_inst = true;
-        }
-
         instsToReplay.pop_front();
     }
-
-    assert(found_inst);
 }
 
 template <class MemDepPred, class Impl>
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index 620daf691..b436ec1c3 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -192,8 +192,6 @@ SimpleRenameMap::rename(RegIndex arch_reg)
         // known that the prev reg was outside the range of normal registers
         // so the free list can avoid adding it.
         prev_reg = renamed_reg;
-
-        assert(renamed_reg < numPhysicalRegs + numMiscRegs);
     }
 
     DPRINTF(Rename, "Renamed reg %d to physical reg %d old mapping was %d\n",
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index d2acc6232..a145e046e 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -103,7 +103,7 @@ void
 O3ThreadContext<Impl>::delVirtPort(VirtualPort *vp)
 {
     if (vp != thread->getVirtPort()) {
-        delete vp->getPeer();
+        vp->removeConn();
         delete vp;
     }
 }
diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh
index 4a76ae110..d78162243 100644
--- a/src/cpu/ozone/cpu_impl.hh
+++ b/src/cpu/ozone/cpu_impl.hh
@@ -748,7 +748,7 @@ template <class Impl>
 void
 OzoneCPU<Impl>::OzoneTC::delVirtPort(VirtualPort *vp)
 {
-    delete vp->getPeer();
+    vp->removeConn();
     delete vp;
 }
 #endif
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index ca4627bbf..6f69b5ac4 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -446,6 +446,17 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+template
+Fault
+AtomicSimpleCPU::write(Twin32_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+AtomicSimpleCPU::write(Twin64_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
 template
 Fault
 AtomicSimpleCPU::write(uint64_t data, Addr addr,
@@ -505,17 +516,28 @@ AtomicSimpleCPU::tick()
         Fault fault = setupFetchRequest(ifetch_req);
 
         if (fault == NoFault) {
-            ifetch_pkt->reinitFromRequest();
+            Tick icache_latency = 0;
+            bool icache_access = false;
+            dcache_access = false; // assume no dcache access
 
-            Tick icache_latency = icachePort.sendAtomic(ifetch_pkt);
-            // ifetch_req is initialized to read the instruction directly
-            // into the CPU object's inst field.
+            //Fetch more instruction memory if necessary
+            if(predecoder.needMoreBytes())
+            {
+                icache_access = true;
+                ifetch_pkt->reinitFromRequest();
+
+                icache_latency = icachePort.sendAtomic(ifetch_pkt);
+                // ifetch_req is initialized to read the instruction directly
+                // into the CPU object's inst field.
+            }
 
-            dcache_access = false; // assume no dcache access
             preExecute();
 
-            fault = curStaticInst->execute(this, traceData);
-            postExecute();
+            if(curStaticInst)
+            {
+                fault = curStaticInst->execute(this, traceData);
+                postExecute();
+            }
 
             // @todo remove me after debugging with legion done
             if (curStaticInst && (!curStaticInst->isMicroOp() ||
@@ -523,7 +545,8 @@ AtomicSimpleCPU::tick()
                 instCnt++;
 
             if (simulate_stalls) {
-                Tick icache_stall = icache_latency - cycles(1);
+                Tick icache_stall =
+                    icache_access ? icache_latency - cycles(1) : 0;
                 Tick dcache_stall =
                     dcache_access ? dcache_latency - cycles(1) : 0;
                 Tick stall_cycles = (icache_stall + dcache_stall) / cycles(1);
@@ -534,8 +557,8 @@ AtomicSimpleCPU::tick()
             }
 
         }
-
-        advancePC(fault);
+        if(predecoder.needMoreBytes())
+            advancePC(fault);
     }
 
     if (_status != Idle)
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index f6c109127..877dc5bd4 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -70,7 +70,7 @@ using namespace std;
 using namespace TheISA;
 
 BaseSimpleCPU::BaseSimpleCPU(Params *p)
-    : BaseCPU(p), thread(NULL)
+    : BaseCPU(p), thread(NULL), predecoder(NULL)
 {
 #if FULL_SYSTEM
     thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb);
@@ -301,7 +301,7 @@ BaseSimpleCPU::post_interrupt(int int_num, int index)
     BaseCPU::post_interrupt(int_num, index);
 
     if (thread->status() == ThreadContext::Suspended) {
-                DPRINTF(IPI,"Suspended Processor awoke\n");
+                DPRINTF(Quiesce,"Suspended Processor awoke\n");
         thread->activate();
     }
 }
@@ -367,18 +367,23 @@ BaseSimpleCPU::preExecute()
     inst = gtoh(inst);
     //If we're not in the middle of a macro instruction
     if (!curMacroStaticInst) {
-#if THE_ISA == ALPHA_ISA
-        StaticInstPtr instPtr = StaticInst::decode(makeExtMI(inst, thread->readPC()));
-#elif THE_ISA == SPARC_ISA
-        StaticInstPtr instPtr = StaticInst::decode(makeExtMI(inst, thread->getTC()));
-#elif THE_ISA == X86_ISA
-        StaticInstPtr instPtr = StaticInst::decode(makeExtMI(inst, thread->getTC()));
-#elif THE_ISA == MIPS_ISA
-        //Mips doesn't do anything in it's MakeExtMI function right now,
-        //so it won't be called.
-        StaticInstPtr instPtr = StaticInst::decode(inst);
-#endif
-        if (instPtr->isMacroOp()) {
+        StaticInstPtr instPtr = NULL;
+
+        //Predecode, ie bundle up an ExtMachInst
+        //This should go away once the constructor can be set up properly
+        predecoder.setTC(thread->getTC());
+        //If more fetch data is needed, pass it in.
+        if(predecoder.needMoreBytes())
+            predecoder.moreBytes(thread->readPC(), 0, inst);
+        else
+            predecoder.process();
+        //If an instruction is ready, decode it
+        if (predecoder.extMachInstReady())
+            instPtr = StaticInst::decode(predecoder.getExtMachInst());
+
+        //If we decoded an instruction and it's microcoded, start pulling
+        //out micro ops
+        if (instPtr && instPtr->isMacroOp()) {
             curMacroStaticInst = instPtr;
             curStaticInst = curMacroStaticInst->
                 fetchMicroOp(thread->readMicroPC());
@@ -391,17 +396,19 @@ BaseSimpleCPU::preExecute()
             fetchMicroOp(thread->readMicroPC());
     }
 
+    //If we decoded an instruction this "tick", record information about it.
+    if(curStaticInst)
+    {
+        traceData = Trace::getInstRecord(curTick, tc, curStaticInst,
+                                         thread->readPC());
 
-    traceData = Trace::getInstRecord(curTick, tc, curStaticInst,
-                                     thread->readPC());
-
-    DPRINTF(Decode,"Decode: Decoded %s instruction (opcode: 0x%x): 0x%x\n",
-            curStaticInst->getName(), curStaticInst->getOpcode(),
-            curStaticInst->machInst);
+        DPRINTF(Decode,"Decode: Decoded %s instruction: 0x%x\n",
+                curStaticInst->getName(), curStaticInst->machInst);
 
 #if FULL_SYSTEM
-    thread->setInst(inst);
+        thread->setInst(inst);
 #endif // FULL_SYSTEM
+    }
 }
 
 void
@@ -411,7 +418,8 @@ BaseSimpleCPU::postExecute()
     if (thread->profile) {
         bool usermode = TheISA::inUserMode(tc);
         thread->profilePC = usermode ? 1 : thread->readPC();
-        ProfileNode *node = thread->profile->consume(tc, inst);
+        StaticInstPtr si(inst);
+        ProfileNode *node = thread->profile->consume(tc, si);
         if (node)
             thread->profileNode = node;
     }
@@ -444,9 +452,9 @@ BaseSimpleCPU::advancePC(Fault fault)
         fault->invoke(tc);
         thread->setMicroPC(0);
         thread->setNextMicroPC(1);
-    } else {
+    } else if (predecoder.needMoreBytes()) {
         //If we're at the last micro op for this instruction
-        if (curStaticInst->isLastMicroOp()) {
+        if (curStaticInst && curStaticInst->isLastMicroOp()) {
             //We should be working with a macro op
             assert(curMacroStaticInst);
             //Close out this macro op, and clean up the
@@ -465,13 +473,9 @@ BaseSimpleCPU::advancePC(Fault fault)
         } else {
             // go to the next instruction
             thread->setPC(thread->readNextPC());
-#if ISA_HAS_DELAY_SLOT
             thread->setNextPC(thread->readNextNPC());
             thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
             assert(thread->readNextPC() != thread->readNextNPC());
-#else
-            thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
-#endif
         }
     }
 
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index eae24014b..787259c96 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -33,6 +33,7 @@
 #ifndef __CPU_SIMPLE_BASE_HH__
 #define __CPU_SIMPLE_BASE_HH__
 
+#include "arch/predecoder.hh"
 #include "base/statistics.hh"
 #include "config/full_system.hh"
 #include "cpu/base.hh"
@@ -63,6 +64,10 @@ class Process;
 class RemoteGDB;
 class GDBListener;
 
+namespace TheISA
+{
+    class Predecoder;
+}
 class ThreadContext;
 class Checkpoint;
 
@@ -74,7 +79,6 @@ namespace Trace {
 class BaseSimpleCPU : public BaseCPU
 {
   protected:
-    typedef TheISA::MachInst MachInst;
     typedef TheISA::MiscReg MiscReg;
     typedef TheISA::FloatReg FloatReg;
     typedef TheISA::FloatRegBits FloatRegBits;
@@ -122,7 +126,10 @@ class BaseSimpleCPU : public BaseCPU
 #endif
 
     // current instruction
-    MachInst inst;
+    TheISA::MachInst inst;
+
+    // The predecoder
+    TheISA::Predecoder predecoder;
 
     // Static data storage
     TheISA::LargestRead dataReg;
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 2e602648a..45da7c3eb 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -398,6 +398,16 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 template
 Fault
+TimingSimpleCPU::write(Twin32_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+TimingSimpleCPU::write(Twin64_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
 TimingSimpleCPU::write(uint64_t data, Addr addr,
                        unsigned flags, uint64_t *res);
 
diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc
index 13d0e2e29..39f31782b 100644
--- a/src/cpu/simple_thread.cc
+++ b/src/cpu/simple_thread.cc
@@ -305,7 +305,7 @@ void
 SimpleThread::delVirtPort(VirtualPort *vp)
 {
     if (vp != virtPort) {
-        delete vp->getPeer();
+        vp->removeConn();
         delete vp;
     }
 }
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index 416c8ab56..a58ac85d6 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -35,6 +35,7 @@
 #include <string>
 
 #include "arch/isa_traits.hh"
+#include "arch/utility.hh"
 #include "sim/faults.hh"
 #include "base/bitfield.hh"
 #include "base/hashmap.hh"
@@ -439,9 +440,6 @@ class StaticInst : public StaticInstBase
     //This is defined as inline below.
     static StaticInstPtr decode(ExtMachInst mach_inst);
 
-    /// Return opcode of machine instruction
-    uint32_t getOpcode() { return bits(machInst, 31, 26);}
-
     /// Return name of machine instruction
     std::string getName() { return mnemonic; }
 };
@@ -474,7 +472,7 @@ class StaticInstPtr : public RefCountingPtr<StaticInst>
 
     /// Construct directly from machine instruction.
     /// Calls StaticInst::decode().
-    StaticInstPtr(TheISA::ExtMachInst mach_inst)
+    explicit StaticInstPtr(TheISA::ExtMachInst mach_inst)
         : RefCountingPtr<StaticInst>(StaticInst::decode(mach_inst))
     {
     }
diff --git a/src/dev/SConscript b/src/dev/SConscript
index 1ec83de4b..ea529b536 100644
--- a/src/dev/SConscript
+++ b/src/dev/SConscript
@@ -40,7 +40,7 @@ if env['FULL_SYSTEM']:
     Source('etherlink.cc')
     Source('etherpkt.cc')
     Source('ethertap.cc')	
-    #Source('i8254xGBe.cc')
+    Source('i8254xGBe.cc')
     Source('ide_ctrl.cc')
     Source('ide_disk.cc')
     Source('io_device.cc')
diff --git a/src/dev/i8254xGBe.cc b/src/dev/i8254xGBe.cc
index 7fc68f4e7..c38a9e873 100644
--- a/src/dev/i8254xGBe.cc
+++ b/src/dev/i8254xGBe.cc
@@ -35,7 +35,13 @@
  * other MACs with slight modifications.
  */
 
+
+/*
+ * @todo really there are multiple dma engines.. we should implement them.
+ */
+
 #include "base/inet.hh"
+#include "base/trace.hh"
 #include "dev/i8254xGBe.hh"
 #include "mem/packet.hh"
 #include "mem/packet_access.hh"
@@ -43,32 +49,38 @@
 #include "sim/stats.hh"
 #include "sim/system.hh"
 
+#include <algorithm>
+
 using namespace iGbReg;
+using namespace Net;
 
 IGbE::IGbE(Params *p)
-    : PciDev(p), etherInt(NULL)
+    : PciDev(p), etherInt(NULL),  useFlowControl(p->use_flow_control),
+      rxFifo(p->rx_fifo_size), txFifo(p->tx_fifo_size), rxTick(false),
+      txTick(false), rdtrEvent(this), radvEvent(this), tadvEvent(this),
+      tidvEvent(this), tickEvent(this), interEvent(this),
+      rxDescCache(this, name()+".RxDesc", p->rx_desc_cache_size),
+      txDescCache(this, name()+".TxDesc", p->tx_desc_cache_size), clock(p->clock)
 {
     // Initialized internal registers per Intel documentation
-    regs.tctl.reg       = 0;
-    regs.rctl.reg       = 0;
-    regs.ctrl.reg       = 0;
-    regs.ctrl.fd        = 1;
-    regs.ctrl.lrst      = 1;
-    regs.ctrl.speed     = 2;
-    regs.ctrl.frcspd    = 1;
-    regs.sts.reg        = 0;
-    regs.eecd.reg       = 0;
-    regs.eecd.fwe       = 1;
-    regs.eecd.ee_type   = 1;
-    regs.eerd.reg       = 0;
-    regs.icd.reg        = 0;
-    regs.imc.reg        = 0;
-    regs.rctl.reg       = 0;
-    regs.tctl.reg       = 0;
-    regs.manc.reg       = 0;
-
-    regs.pba.rxa        = 0x30;
-    regs.pba.txa        = 0x10;
+    // All registers intialized to 0 by per register constructor
+    regs.ctrl.fd(1);
+    regs.ctrl.lrst(1);
+    regs.ctrl.speed(2);
+    regs.ctrl.frcspd(1);
+    regs.sts.speed(3); // Say we're 1000Mbps
+    regs.sts.fd(1); // full duplex
+    regs.sts.lu(1); // link up
+    regs.eecd.fwe(1);
+    regs.eecd.ee_type(1);
+    regs.imr = 0;
+    regs.iam = 0;
+    regs.rxdctl.gran(1);
+    regs.rxdctl.wthresh(1);
+    regs.fcrth(1);
+
+    regs.pba.rxa(0x30);
+    regs.pba.txa(0x10);
 
     eeOpBits            = 0;
     eeAddrBits          = 0;
@@ -78,8 +90,21 @@ IGbE::IGbE(Params *p)
     // clear all 64 16 bit words of the eeprom
     memset(&flash, 0, EEPROM_SIZE*2);
 
+    // Set the MAC address
+    memcpy(flash, p->hardware_address.bytes(), ETH_ADDR_LEN);
+    for (int x = 0; x < ETH_ADDR_LEN/2; x++)
+        flash[x] = htobe(flash[x]);
+
+    uint16_t csum = 0;
+    for (int x = 0; x < EEPROM_SIZE; x++)
+        csum += htobe(flash[x]);
+
+
     // Magic happy checksum value
-    flash[0] = 0xBABA;
+    flash[EEPROM_SIZE-1] = htobe((uint16_t)(EEPROM_CSUM - csum));
+
+    rxFifo.clear();
+    txFifo.clear();
 }
 
 
@@ -114,7 +139,7 @@ IGbE::read(PacketPtr pkt)
     // Only 32bit accesses allowed
     assert(pkt->getSize() == 4);
 
-    //DPRINTF(Ethernet, "Read device register %#X\n", daddr);
+    DPRINTF(Ethernet, "Read device register %#X\n", daddr);
 
     pkt->allocate();
 
@@ -124,47 +149,125 @@ IGbE::read(PacketPtr pkt)
 
 
     switch (daddr) {
-      case CTRL:
-       pkt->set<uint32_t>(regs.ctrl.reg);
-       break;
-      case STATUS:
-       pkt->set<uint32_t>(regs.sts.reg);
-       break;
-      case EECD:
-       pkt->set<uint32_t>(regs.eecd.reg);
-       break;
-      case EERD:
-       pkt->set<uint32_t>(regs.eerd.reg);
-       break;
-      case ICR:
-       pkt->set<uint32_t>(regs.icd.reg);
-       break;
-      case IMC:
-       pkt->set<uint32_t>(regs.imc.reg);
-       break;
-      case RCTL:
-       pkt->set<uint32_t>(regs.rctl.reg);
-       break;
-      case TCTL:
-       pkt->set<uint32_t>(regs.tctl.reg);
-       break;
-      case PBA:
-       pkt->set<uint32_t>(regs.pba.reg);
-       break;
-      case WUC:
-      case LEDCTL:
-       pkt->set<uint32_t>(0); // We don't care, so just return 0
-       break;
-      case MANC:
-       pkt->set<uint32_t>(regs.manc.reg);
-       break;
+      case REG_CTRL:
+        pkt->set<uint32_t>(regs.ctrl());
+        break;
+      case REG_STATUS:
+        pkt->set<uint32_t>(regs.sts());
+        break;
+      case REG_EECD:
+        pkt->set<uint32_t>(regs.eecd());
+        break;
+      case REG_EERD:
+        pkt->set<uint32_t>(regs.eerd());
+        break;
+      case REG_CTRL_EXT:
+        pkt->set<uint32_t>(regs.ctrl_ext());
+        break;
+      case REG_MDIC:
+        pkt->set<uint32_t>(regs.mdic());
+        break;
+      case REG_ICR:
+        DPRINTF(Ethernet, "Reading ICR. ICR=%#x IMR=%#x IAM=%#x IAME=%d\n", regs.icr(),
+                regs.imr, regs.iam, regs.ctrl_ext.iame());
+        pkt->set<uint32_t>(regs.icr());
+        if (regs.icr.int_assert() || regs.imr == 0) {
+            regs.icr = regs.icr() & ~mask(30);
+            DPRINTF(Ethernet, "Cleared ICR. ICR=%#x\n", regs.icr());
+        }
+        if (regs.ctrl_ext.iame() && regs.icr.int_assert())
+            regs.imr &= ~regs.iam;
+        chkInterrupt();
+        break;
+      case REG_ITR:
+        pkt->set<uint32_t>(regs.itr());
+        break;
+      case REG_RCTL:
+        pkt->set<uint32_t>(regs.rctl());
+        break;
+      case REG_FCTTV:
+        pkt->set<uint32_t>(regs.fcttv());
+        break;
+      case REG_TCTL:
+        pkt->set<uint32_t>(regs.tctl());
+        break;
+      case REG_PBA:
+        pkt->set<uint32_t>(regs.pba());
+        break;
+      case REG_WUC:
+      case REG_LEDCTL:
+        pkt->set<uint32_t>(0); // We don't care, so just return 0
+        break;
+      case REG_FCRTL:
+        pkt->set<uint32_t>(regs.fcrtl());
+        break;
+      case REG_FCRTH:
+        pkt->set<uint32_t>(regs.fcrth());
+        break;
+      case REG_RDBAL:
+        pkt->set<uint32_t>(regs.rdba.rdbal());
+        break;
+      case REG_RDBAH:
+        pkt->set<uint32_t>(regs.rdba.rdbah());
+        break;
+      case REG_RDLEN:
+        pkt->set<uint32_t>(regs.rdlen());
+        break;
+      case REG_RDH:
+        pkt->set<uint32_t>(regs.rdh());
+        break;
+      case REG_RDT:
+        pkt->set<uint32_t>(regs.rdt());
+        break;
+      case REG_RDTR:
+        pkt->set<uint32_t>(regs.rdtr());
+        if (regs.rdtr.fpd()) {
+            rxDescCache.writeback(0);
+            postInterrupt(IT_RXT);
+            regs.rdtr.fpd(0);
+        }
+        break;
+      case REG_RADV:
+        pkt->set<uint32_t>(regs.radv());
+        break;
+      case REG_TDBAL:
+        pkt->set<uint32_t>(regs.tdba.tdbal());
+        break;
+      case REG_TDBAH:
+        pkt->set<uint32_t>(regs.tdba.tdbah());
+        break;
+      case REG_TDLEN:
+        pkt->set<uint32_t>(regs.tdlen());
+        break;
+      case REG_TDH:
+        pkt->set<uint32_t>(regs.tdh());
+        break;
+      case REG_TDT:
+        pkt->set<uint32_t>(regs.tdt());
+        break;
+      case REG_TIDV:
+        pkt->set<uint32_t>(regs.tidv());
+        break;
+      case REG_TXDCTL:
+        pkt->set<uint32_t>(regs.txdctl());
+        break;
+      case REG_TADV:
+        pkt->set<uint32_t>(regs.tadv());
+        break;
+      case REG_RXCSUM:
+        pkt->set<uint32_t>(regs.rxcsum());
+        break;
+      case REG_MANC:
+        pkt->set<uint32_t>(regs.manc());
+        break;
       default:
-       if (!(daddr >= VFTA && daddr < (VFTA + VLAN_FILTER_TABLE_SIZE)*4) &&
-           !(daddr >= RAL && daddr < (RAL + RCV_ADDRESS_TABLE_SIZE)*4) &&
-           !(daddr >= MTA && daddr < (MTA + MULTICAST_TABLE_SIZE)*4))
-           pkt->set<uint32_t>(0);
-       else
-           panic("Read request to unknown register number: %#x\n", daddr);
+        if (!(daddr >= REG_VFTA && daddr < (REG_VFTA + VLAN_FILTER_TABLE_SIZE*4)) &&
+            !(daddr >= REG_RAL && daddr < (REG_RAL + RCV_ADDRESS_TABLE_SIZE*8)) &&
+            !(daddr >= REG_MTA && daddr < (REG_MTA + MULTICAST_TABLE_SIZE*4)) &&
+            !(daddr >= REG_CRCERRS && daddr < (REG_CRCERRS + STATS_REGS_SIZE)))
+            panic("Read request to unknown register number: %#x\n", daddr);
+        else
+            pkt->set<uint32_t>(0);
     };
 
     pkt->result = Packet::Success;
@@ -187,100 +290,253 @@ IGbE::write(PacketPtr pkt)
     // Only 32bit accesses allowed
     assert(pkt->getSize() == sizeof(uint32_t));
 
-    //DPRINTF(Ethernet, "Wrote device register %#X value %#X\n", daddr, pkt->get<uint32_t>());
+    DPRINTF(Ethernet, "Wrote device register %#X value %#X\n", daddr, pkt->get<uint32_t>());
 
     ///
     /// Handle write of register here
     ///
     uint32_t val = pkt->get<uint32_t>();
 
+    Regs::RCTL oldrctl;
+    Regs::TCTL oldtctl;
+
     switch (daddr) {
-      case CTRL:
-       regs.ctrl.reg = val;
-       break;
-      case STATUS:
-       regs.sts.reg = val;
-       break;
-      case EECD:
-       int oldClk;
-       oldClk = regs.eecd.sk;
-       regs.eecd.reg = val;
-       // See if this is a eeprom access and emulate accordingly
-       if (!oldClk && regs.eecd.sk) {
-           if (eeOpBits < 8) {
-               eeOpcode = eeOpcode << 1 | regs.eecd.din;
-               eeOpBits++;
-           } else if (eeAddrBits < 8 && eeOpcode == EEPROM_READ_OPCODE_SPI) {
-               eeAddr = eeAddr << 1 | regs.eecd.din;
-               eeAddrBits++;
-           } else if (eeDataBits < 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) {
-               assert(eeAddr>>1 < EEPROM_SIZE);
-               DPRINTF(EthernetEEPROM, "EEPROM bit read: %d word: %#X\n",
-                       flash[eeAddr>>1] >> eeDataBits & 0x1, flash[eeAddr>>1]);
-               regs.eecd.dout = (flash[eeAddr>>1] >> (15-eeDataBits)) & 0x1;
-               eeDataBits++;
-           } else if (eeDataBits < 8 && eeOpcode == EEPROM_RDSR_OPCODE_SPI) {
-               regs.eecd.dout = 0;
-               eeDataBits++;
-           } else
-               panic("What's going on with eeprom interface? opcode:"
-                      " %#x:%d addr: %#x:%d, data: %d\n", (uint32_t)eeOpcode,
-                      (uint32_t)eeOpBits, (uint32_t)eeAddr,
-                      (uint32_t)eeAddrBits, (uint32_t)eeDataBits);
-
-           // Reset everything for the next command
-           if ((eeDataBits == 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) ||
+      case REG_CTRL:
+        regs.ctrl = val;
+        if (regs.ctrl.tfce())
+            warn("TX Flow control enabled, should implement\n");
+        if (regs.ctrl.rfce())
+            warn("RX Flow control enabled, should implement\n");
+        break;
+      case REG_CTRL_EXT:
+        regs.ctrl_ext = val;
+        break;
+      case REG_STATUS:
+        regs.sts = val;
+        break;
+      case REG_EECD:
+        int oldClk;
+        oldClk = regs.eecd.sk();
+        regs.eecd = val;
+        // See if this is a eeprom access and emulate accordingly
+        if (!oldClk && regs.eecd.sk()) {
+            if (eeOpBits < 8) {
+                eeOpcode = eeOpcode << 1 | regs.eecd.din();
+                eeOpBits++;
+            } else if (eeAddrBits < 8 && eeOpcode == EEPROM_READ_OPCODE_SPI) {
+                eeAddr = eeAddr << 1 | regs.eecd.din();
+                eeAddrBits++;
+            } else if (eeDataBits < 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) {
+                assert(eeAddr>>1 < EEPROM_SIZE);
+                DPRINTF(EthernetEEPROM, "EEPROM bit read: %d word: %#X\n",
+                        flash[eeAddr>>1] >> eeDataBits & 0x1, flash[eeAddr>>1]);
+                regs.eecd.dout((flash[eeAddr>>1] >> (15-eeDataBits)) & 0x1);
+                eeDataBits++;
+            } else if (eeDataBits < 8 && eeOpcode == EEPROM_RDSR_OPCODE_SPI) {
+                regs.eecd.dout(0);
+                eeDataBits++;
+            } else
+                panic("What's going on with eeprom interface? opcode:"
+                       " %#x:%d addr: %#x:%d, data: %d\n", (uint32_t)eeOpcode,
+                       (uint32_t)eeOpBits, (uint32_t)eeAddr,
+                       (uint32_t)eeAddrBits, (uint32_t)eeDataBits);
+
+            // Reset everything for the next command
+            if ((eeDataBits == 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) ||
                (eeDataBits == 8 && eeOpcode == EEPROM_RDSR_OPCODE_SPI)) {
-               eeOpBits = 0;
-               eeAddrBits = 0;
-               eeDataBits = 0;
+                eeOpBits = 0;
+                eeAddrBits = 0;
+                eeDataBits = 0;
                eeOpcode = 0;
-               eeAddr = 0;
-           }
+                eeAddr = 0;
+            }
 
            DPRINTF(EthernetEEPROM, "EEPROM: opcode: %#X:%d addr: %#X:%d\n",
-                   (uint32_t)eeOpcode, (uint32_t) eeOpBits,
-                   (uint32_t)eeAddr>>1, (uint32_t)eeAddrBits);
+                    (uint32_t)eeOpcode, (uint32_t) eeOpBits,
+                    (uint32_t)eeAddr>>1, (uint32_t)eeAddrBits);
            if (eeOpBits == 8 && !(eeOpcode == EEPROM_READ_OPCODE_SPI ||
-                                  eeOpcode == EEPROM_RDSR_OPCODE_SPI ))
-               panic("Unknown eeprom opcode: %#X:%d\n", (uint32_t)eeOpcode,
-                       (uint32_t)eeOpBits);
-
-
-       }
-       // If driver requests eeprom access, immediately give it to it
-       regs.eecd.ee_gnt = regs.eecd.ee_req;
-       break;
-      case EERD:
-       regs.eerd.reg = val;
-       break;
-      case ICR:
-       regs.icd.reg = val;
-       break;
-      case IMC:
-       regs.imc.reg = val;
-       break;
-      case RCTL:
-       regs.rctl.reg = val;
-       break;
-      case TCTL:
-       regs.tctl.reg = val;
-       break;
-      case PBA:
-       regs.pba.rxa = val;
-       regs.pba.txa = 64 - regs.pba.rxa;
-       break;
-      case WUC:
-      case LEDCTL:
-       ; // We don't care, so don't store anything
-       break;
-      case MANC:
-       regs.manc.reg = val;
-       break;
+                                   eeOpcode == EEPROM_RDSR_OPCODE_SPI ))
+                panic("Unknown eeprom opcode: %#X:%d\n", (uint32_t)eeOpcode,
+                        (uint32_t)eeOpBits);
+
+
+        }
+        // If driver requests eeprom access, immediately give it to it
+        regs.eecd.ee_gnt(regs.eecd.ee_req());
+        break;
+      case REG_EERD:
+        regs.eerd = val;
+        break;
+      case REG_MDIC:
+        regs.mdic = val;
+        if (regs.mdic.i())
+            panic("No support for interrupt on mdic complete\n");
+        if (regs.mdic.phyadd() != 1)
+            panic("No support for reading anything but phy\n");
+        DPRINTF(Ethernet, "%s phy address %x\n", regs.mdic.op() == 1 ? "Writing"
+                : "Reading", regs.mdic.regadd());
+        switch (regs.mdic.regadd()) {
+            case PHY_PSTATUS:
+                regs.mdic.data(0x796D); // link up
+                break;
+            case PHY_PID:
+                regs.mdic.data(0x02A8);
+                break;
+            case PHY_EPID:
+                regs.mdic.data(0x0380);
+                break;
+            case PHY_GSTATUS:
+                regs.mdic.data(0x7C00);
+                break;
+            case PHY_EPSTATUS:
+                regs.mdic.data(0x3000);
+                break;
+            case PHY_AGC:
+                regs.mdic.data(0x180); // some random length
+                break;
+            default:
+                regs.mdic.data(0);
+        }
+        regs.mdic.r(1);
+        break;
+      case REG_ICR:
+        DPRINTF(Ethernet, "Writing ICR. ICR=%#x IMR=%#x IAM=%#x IAME=%d\n", regs.icr(),
+                regs.imr, regs.iam, regs.ctrl_ext.iame());
+        if (regs.ctrl_ext.iame())
+            regs.imr &= ~regs.iam;
+        regs.icr = ~bits(val,30,0) & regs.icr();
+        chkInterrupt();
+        break;
+      case REG_ITR:
+        regs.itr = val;
+        break;
+      case REG_ICS:
+        postInterrupt((IntTypes)val);
+        break;
+       case REG_IMS:
+        regs.imr |= val;
+        chkInterrupt();
+        break;
+      case REG_IMC:
+        regs.imr &= ~val;
+        chkInterrupt();
+        break;
+      case REG_IAM:
+        regs.iam = val;
+        break;
+      case REG_RCTL:
+        oldrctl = regs.rctl;
+        regs.rctl = val;
+        if (regs.rctl.rst()) {
+            rxDescCache.reset();
+            rxFifo.clear();
+            regs.rctl.rst(0);
+        }
+        if (regs.rctl.en())
+            rxTick = true;
+        restartClock();
+        break;
+      case REG_FCTTV:
+        regs.fcttv = val;
+        break;
+      case REG_TCTL:
+        regs.tctl = val;
+        oldtctl = regs.tctl;
+        regs.tctl = val;
+        if (regs.tctl.en())
+           txTick = true;
+        restartClock();
+        if (regs.tctl.en() && !oldtctl.en()) {
+            txDescCache.reset();
+        }
+         break;
+      case REG_PBA:
+        regs.pba.rxa(val);
+        regs.pba.txa(64 - regs.pba.rxa());
+        break;
+      case REG_WUC:
+      case REG_LEDCTL:
+      case REG_FCAL:
+      case REG_FCAH:
+      case REG_FCT:
+      case REG_VET:
+      case REG_AIFS:
+      case REG_TIPG:
+        ; // We don't care, so don't store anything
+        break;
+      case REG_FCRTL:
+        regs.fcrtl = val;
+        break;
+      case REG_FCRTH:
+        regs.fcrth = val;
+        break;
+      case REG_RDBAL:
+        regs.rdba.rdbal( val & ~mask(4));
+        rxDescCache.areaChanged();
+        break;
+      case REG_RDBAH:
+        regs.rdba.rdbah(val);
+        rxDescCache.areaChanged();
+        break;
+      case REG_RDLEN:
+        regs.rdlen = val & ~mask(7);
+        rxDescCache.areaChanged();
+        break;
+      case REG_RDH:
+        regs.rdh = val;
+        rxDescCache.areaChanged();
+        break;
+      case REG_RDT:
+        regs.rdt = val;
+        rxTick = true;
+        restartClock();
+        break;
+      case REG_RDTR:
+        regs.rdtr = val;
+        break;
+      case REG_RADV:
+        regs.radv = val;
+        break;
+      case REG_TDBAL:
+        regs.tdba.tdbal( val & ~mask(4));
+        txDescCache.areaChanged();
+        break;
+      case REG_TDBAH:
+        regs.tdba.tdbah(val);
+        txDescCache.areaChanged();
+        break;
+      case REG_TDLEN:
+        regs.tdlen = val & ~mask(7);
+        txDescCache.areaChanged();
+        break;
+      case REG_TDH:
+        regs.tdh = val;
+        txDescCache.areaChanged();
+        break;
+      case REG_TDT:
+        regs.tdt = val;
+        txTick = true;
+        restartClock();
+        break;
+      case REG_TIDV:
+        regs.tidv = val;
+        break;
+      case REG_TXDCTL:
+        regs.txdctl = val;
+        break;
+      case REG_TADV:
+        regs.tadv = val;
+        break;
+      case REG_RXCSUM:
+        regs.rxcsum = val;
+        break;
+      case REG_MANC:
+        regs.manc = val;
+        break;
       default:
-       if (!(daddr >= VFTA && daddr < (VFTA + VLAN_FILTER_TABLE_SIZE)*4) &&
-           !(daddr >= RAL && daddr < (RAL + RCV_ADDRESS_TABLE_SIZE)*4) &&
-           !(daddr >= MTA && daddr < (MTA + MULTICAST_TABLE_SIZE)*4))
+       if (!(daddr >= REG_VFTA && daddr < (REG_VFTA + VLAN_FILTER_TABLE_SIZE*4)) &&
+           !(daddr >= REG_RAL && daddr < (REG_RAL + RCV_ADDRESS_TABLE_SIZE*8)) &&
+           !(daddr >= REG_MTA && daddr < (REG_MTA + MULTICAST_TABLE_SIZE*4)))
            panic("Write request to unknown register number: %#x\n", daddr);
     };
 
@@ -288,18 +544,658 @@ IGbE::write(PacketPtr pkt)
     return pioDelay;
 }
 
+void
+IGbE::postInterrupt(IntTypes t, bool now)
+{
+    assert(t);
+
+    // Interrupt is already pending
+    if (t & regs.icr())
+        return;
+
+    if (regs.icr() & regs.imr)
+    {
+        regs.icr = regs.icr() | t;
+        if (!interEvent.scheduled())
+            interEvent.schedule(curTick + Clock::Int::ns * 256 *
+                    regs.itr.interval());
+    } else {
+        regs.icr = regs.icr() | t;
+        if (regs.itr.interval() == 0 || now) {
+            if (interEvent.scheduled())
+                interEvent.deschedule();
+            cpuPostInt();
+        } else {
+           DPRINTF(EthernetIntr, "EINT: Scheduling timer interrupt for %d ticks\n",
+                    Clock::Int::ns * 256 * regs.itr.interval());
+           assert(!interEvent.scheduled());
+           interEvent.schedule(curTick + Clock::Int::ns * 256 * regs.itr.interval());
+        }
+    }
+}
+
+void
+IGbE::cpuPostInt()
+{
+    if (rdtrEvent.scheduled()) {
+        regs.icr.rxt0(1);
+        rdtrEvent.deschedule();
+    }
+    if (radvEvent.scheduled()) {
+        regs.icr.rxt0(1);
+        radvEvent.deschedule();
+    }
+    if (tadvEvent.scheduled()) {
+        regs.icr.txdw(1);
+        tadvEvent.deschedule();
+    }
+    if (tidvEvent.scheduled()) {
+        regs.icr.txdw(1);
+        tidvEvent.deschedule();
+    }
+
+    regs.icr.int_assert(1);
+    DPRINTF(EthernetIntr, "EINT: Posting interrupt to CPU now. Vector %#x\n",
+            regs.icr());
+    intrPost();
+}
+
+void
+IGbE::cpuClearInt()
+{
+    if (regs.icr.int_assert()) {
+        regs.icr.int_assert(0);
+        DPRINTF(EthernetIntr, "EINT: Clearing interrupt to CPU now. Vector %#x\n",
+                regs.icr());
+        intrClear();
+    }
+}
+
+void
+IGbE::chkInterrupt()
+{
+    // Check if we need to clear the cpu interrupt
+    if (!(regs.icr() & regs.imr)) {
+        if (interEvent.scheduled())
+           interEvent.deschedule();
+        if (regs.icr.int_assert())
+            cpuClearInt();
+    }
+
+    if (regs.icr() & regs.imr) {
+        if (regs.itr.interval() == 0)  {
+            cpuPostInt();
+        } else {
+            if (!interEvent.scheduled())
+               interEvent.schedule(curTick + Clock::Int::ns * 256 * regs.itr.interval());
+        }
+    }
+
+
+}
+
+
+IGbE::RxDescCache::RxDescCache(IGbE *i, const std::string n, int s)
+    : DescCache<RxDesc>(i, n, s), pktDone(false), pktEvent(this)
+
+{
+}
 
 bool
-IGbE::ethRxPkt(EthPacketPtr packet)
+IGbE::RxDescCache::writePacket(EthPacketPtr packet)
 {
-    panic("Need to implemenet\n");
+    // We shouldn't have to deal with any of these yet
+    DPRINTF(EthernetDesc, "Packet Length: %d Desc Size: %d\n",
+            packet->length, igbe->regs.rctl.descSize());
+    assert(packet->length < igbe->regs.rctl.descSize());
+
+    if (!unusedCache.size())
+        return false;
+
+    pktPtr = packet;
+
+    igbe->dmaWrite(igbe->platform->pciToDma(unusedCache.front()->buf),
+            packet->length, &pktEvent, packet->data);
+    return true;
+}
+
+void
+IGbE::RxDescCache::pktComplete()
+{
+    assert(unusedCache.size());
+    RxDesc *desc;
+    desc = unusedCache.front();
+
+    uint16_t crcfixup = igbe->regs.rctl.secrc() ? 0 : 4 ;
+    desc->len = htole((uint16_t)(pktPtr->length + crcfixup));
+    DPRINTF(EthernetDesc, "pktPtr->length: %d stripcrc offset: %d value written: %d %d\n",
+            pktPtr->length, crcfixup,
+            htole((uint16_t)(pktPtr->length + crcfixup)),
+            (uint16_t)(pktPtr->length + crcfixup));
+
+    // no support for anything but starting at 0
+    assert(igbe->regs.rxcsum.pcss() == 0);
+
+    DPRINTF(EthernetDesc, "RxDesc: Packet written to memory updating Descriptor\n");
+
+    uint8_t status = RXDS_DD | RXDS_EOP;
+    uint8_t err = 0;
+    IpPtr ip(pktPtr);
+    if (ip) {
+        if (igbe->regs.rxcsum.ipofld()) {
+            DPRINTF(EthernetDesc, "RxDesc: Checking IP checksum\n");
+            status |= RXDS_IPCS;
+            desc->csum = htole(cksum(ip));
+            if (cksum(ip) != 0) {
+                err |= RXDE_IPE;
+                DPRINTF(EthernetDesc, "RxDesc: Checksum is bad!!\n");
+            }
+        }
+        TcpPtr tcp(ip);
+        if (tcp && igbe->regs.rxcsum.tuofld()) {
+            DPRINTF(EthernetDesc, "RxDesc: Checking TCP checksum\n");
+            status |= RXDS_TCPCS;
+            desc->csum = htole(cksum(tcp));
+            if (cksum(tcp) != 0) {
+                DPRINTF(EthernetDesc, "RxDesc: Checksum is bad!!\n");
+                err |= RXDE_TCPE;
+            }
+        }
+
+        UdpPtr udp(ip);
+        if (udp && igbe->regs.rxcsum.tuofld()) {
+            DPRINTF(EthernetDesc, "RxDesc: Checking UDP checksum\n");
+            status |= RXDS_UDPCS;
+            desc->csum = htole(cksum(udp));
+            if (cksum(tcp) != 0) {
+                DPRINTF(EthernetDesc, "RxDesc: Checksum is bad!!\n");
+                err |= RXDE_TCPE;
+            }
+        }
+    } // if ip
+
+    desc->status = htole(status);
+    desc->errors = htole(err);
+
+    // No vlan support at this point... just set it to 0
+    desc->vlan = 0;
+
+    // Deal with the rx timer interrupts
+    if (igbe->regs.rdtr.delay()) {
+        DPRINTF(EthernetSM, "RXS: Scheduling DTR for %d\n",
+                igbe->regs.rdtr.delay() * igbe->intClock());
+        if (igbe->rdtrEvent.scheduled())
+            igbe->rdtrEvent.reschedule(curTick + igbe->regs.rdtr.delay() *
+                    igbe->intClock());
+        else
+            igbe->rdtrEvent.schedule(curTick + igbe->regs.rdtr.delay() *
+                    igbe->intClock());
+    }
+
+    if (igbe->regs.radv.idv() && igbe->regs.rdtr.delay()) {
+        DPRINTF(EthernetSM, "RXS: Scheduling ADV for %d\n",
+                igbe->regs.radv.idv() * igbe->intClock());
+        if (!igbe->radvEvent.scheduled())
+            igbe->radvEvent.schedule(curTick + igbe->regs.radv.idv() *
+                    igbe->intClock());
+    }
+
+    // if neither radv or rdtr, maybe itr is set...
+    if (!igbe->regs.rdtr.delay()) {
+        DPRINTF(EthernetSM, "RXS: Receive interrupt delay disabled, posting IT_RXT\n");
+        igbe->postInterrupt(IT_RXT);
+    }
+
+    // If the packet is small enough, interrupt appropriately
+    // I wonder if this is delayed or not?!
+    if (pktPtr->length <= igbe->regs.rsrpd.idv())
+        igbe->postInterrupt(IT_SRPD);
+
+    DPRINTF(EthernetDesc, "RxDesc: Processing of this descriptor complete\n");
+    unusedCache.pop_front();
+    usedCache.push_back(desc);
+    pktPtr = NULL;
+    enableSm();
+    pktDone = true;
+}
+
+void
+IGbE::RxDescCache::enableSm()
+{
+    igbe->rxTick = true;
+    igbe->restartClock();
+}
+
+bool
+IGbE::RxDescCache::packetDone()
+{
+    if (pktDone) {
+        pktDone = false;
+        return true;
+    }
+    return false;
+}
+
+///////////////////////////////////// IGbE::TxDesc /////////////////////////////////
+
+IGbE::TxDescCache::TxDescCache(IGbE *i, const std::string n, int s)
+    : DescCache<TxDesc>(i,n, s), pktDone(false), isTcp(false), pktWaiting(false),
+      hLen(0), pktEvent(this)
+
+{
+}
+
+int
+IGbE::TxDescCache::getPacketSize()
+{
+    assert(unusedCache.size());
+
+    TxDesc *desc;
+
+    DPRINTF(EthernetDesc, "TxDesc: Starting processing of descriptor\n");
+
+    while (unusedCache.size() && TxdOp::isContext(unusedCache.front())) {
+        DPRINTF(EthernetDesc, "TxDesc: Got context descriptor type... skipping\n");
+
+        // I think we can just ignore these for now?
+        desc = unusedCache.front();
+        // is this going to be a tcp or udp packet?
+        isTcp = TxdOp::tcp(desc) ? true : false;
+
+        // make sure it's ipv4
+        assert(TxdOp::ip(desc));
+
+        TxdOp::setDd(desc);
+        unusedCache.pop_front();
+        usedCache.push_back(desc);
+    }
+
+    if (!unusedCache.size())
+        return -1;
+
+    DPRINTF(EthernetDesc, "TxDesc: Next TX packet is %d bytes\n",
+            TxdOp::getLen(unusedCache.front()));
+
+    return TxdOp::getLen(unusedCache.front());
+}
+
+void
+IGbE::TxDescCache::getPacketData(EthPacketPtr p)
+{
+    assert(unusedCache.size());
+
+    TxDesc *desc;
+    desc = unusedCache.front();
+
+    assert((TxdOp::isLegacy(desc) || TxdOp::isData(desc)) && TxdOp::getLen(desc));
+
+    pktPtr = p;
+
+    pktWaiting = true;
+
+    DPRINTF(EthernetDesc, "TxDesc: Starting DMA of packet\n");
+    igbe->dmaRead(igbe->platform->pciToDma(TxdOp::getBuf(desc)),
+            TxdOp::getLen(desc), &pktEvent, p->data + hLen);
+
+
+}
+
+void
+IGbE::TxDescCache::pktComplete()
+{
+
+    TxDesc *desc;
+    assert(unusedCache.size());
+    assert(pktPtr);
+
+    DPRINTF(EthernetDesc, "TxDesc: DMA of packet complete\n");
+
+
+    desc = unusedCache.front();
+    assert((TxdOp::isLegacy(desc) || TxdOp::isData(desc)) && TxdOp::getLen(desc));
+
+    DPRINTF(EthernetDesc, "TxDescriptor data d1: %#llx d2: %#llx\n", desc->d1, desc->d2);
+
+    if (!TxdOp::eop(desc)) {
+        assert(hLen == 0);
+        hLen = TxdOp::getLen(desc);
+        unusedCache.pop_front();
+        usedCache.push_back(desc);
+        pktDone = true;
+        pktWaiting = false;
+        pktPtr = NULL;
+
+        DPRINTF(EthernetDesc, "TxDesc: Partial Packet Descriptor Done\n");
+        return;
+    }
+
+    // Set the length of the data in the EtherPacket
+    pktPtr->length = TxdOp::getLen(desc) + hLen;
+
+    // no support for vlans
+    assert(!TxdOp::vle(desc));
+
+    // we alway report status
+    assert(TxdOp::rs(desc));
+
+    // we only support single packet descriptors at this point
+    assert(TxdOp::eop(desc));
+
+    // set that this packet is done
+    TxdOp::setDd(desc);
+
+    DPRINTF(EthernetDesc, "TxDescriptor data d1: %#llx d2: %#llx\n", desc->d1, desc->d2);
+
+    // Checksums are only ofloaded for new descriptor types
+    if (TxdOp::isData(desc) && ( TxdOp::ixsm(desc) || TxdOp::txsm(desc)) ) {
+        DPRINTF(EthernetDesc, "TxDesc: Calculating checksums for packet\n");
+        IpPtr ip(pktPtr);
+        if (TxdOp::ixsm(desc)) {
+            ip->sum(0);
+            ip->sum(cksum(ip));
+            DPRINTF(EthernetDesc, "TxDesc: Calculated IP checksum\n");
+        }
+       if (TxdOp::txsm(desc)) {
+           if (isTcp) {
+                TcpPtr tcp(ip);
+                tcp->sum(0);
+                tcp->sum(cksum(tcp));
+                DPRINTF(EthernetDesc, "TxDesc: Calculated TCP checksum\n");
+           } else {
+                UdpPtr udp(ip);
+                udp->sum(0);
+                udp->sum(cksum(udp));
+                DPRINTF(EthernetDesc, "TxDesc: Calculated UDP checksum\n");
+           }
+        }
+    }
+
+    if (TxdOp::ide(desc)) {
+        // Deal with the rx timer interrupts
+        DPRINTF(EthernetDesc, "TxDesc: Descriptor had IDE set\n");
+        if (igbe->regs.tidv.idv()) {
+            DPRINTF(EthernetDesc, "TxDesc: setting tidv\n");
+            if (igbe->tidvEvent.scheduled())
+                igbe->tidvEvent.reschedule(curTick + igbe->regs.tidv.idv() *
+                        igbe->intClock());
+            else
+                igbe->tidvEvent.schedule(curTick + igbe->regs.tidv.idv() *
+                        igbe->intClock());
+        }
+
+        if (igbe->regs.tadv.idv() && igbe->regs.tidv.idv()) {
+            DPRINTF(EthernetDesc, "TxDesc: setting tadv\n");
+            if (!igbe->tadvEvent.scheduled())
+                igbe->tadvEvent.schedule(curTick + igbe->regs.tadv.idv() *
+                        igbe->intClock());
+        }
+    }
+
+
+
+    unusedCache.pop_front();
+    usedCache.push_back(desc);
+    pktDone = true;
+    pktWaiting = false;
+    pktPtr = NULL;
+
+    hLen = 0;
+    DPRINTF(EthernetDesc, "TxDesc: Descriptor Done\n");
+
+    if (igbe->regs.txdctl.wthresh() == 0) {
+        DPRINTF(EthernetDesc, "TxDesc: WTHRESH == 0, writing back descriptor\n");
+        writeback(0);
+    } else if (igbe->regs.txdctl.wthresh() >= usedCache.size()) {
+        DPRINTF(EthernetDesc, "TxDesc: used > WTHRESH, writing back descriptor\n");
+        writeback((igbe->cacheBlockSize()-1)>>4);
+    }
+
+}
+
+bool
+IGbE::TxDescCache::packetAvailable()
+{
+    if (pktDone) {
+        pktDone = false;
+        return true;
+    }
+    return false;
+}
+
+void
+IGbE::TxDescCache::enableSm()
+{
+    igbe->txTick = true;
+    igbe->restartClock();
+}
+
+
+
+
+///////////////////////////////////// IGbE /////////////////////////////////
+
+void
+IGbE::restartClock()
+{
+    if (!tickEvent.scheduled() && (rxTick || txTick))
+        tickEvent.schedule((curTick/cycles(1)) * cycles(1) + cycles(1));
 }
 
 
 void
+IGbE::txStateMachine()
+{
+    if (!regs.tctl.en()) {
+        txTick = false;
+        DPRINTF(EthernetSM, "TXS: TX disabled, stopping ticking\n");
+        return;
+    }
+
+    // If we have a packet available and it's length is not 0 (meaning it's not
+    // a multidescriptor packet) put it in the fifo, otherwise an the next
+    // iteration we'll get the rest of the data
+    if (txPacket && txDescCache.packetAvailable() && txPacket->length) {
+        bool success;
+        DPRINTF(EthernetSM, "TXS: packet placed in TX FIFO\n");
+        success = txFifo.push(txPacket);
+        assert(success);
+        txPacket = NULL;
+        return;
+    }
+
+    // Only support descriptor granularity
+    assert(regs.txdctl.gran());
+    if (regs.txdctl.lwthresh() && txDescCache.descLeft() < (regs.txdctl.lwthresh() * 8)) {
+        DPRINTF(EthernetSM, "TXS: LWTHRESH caused posting of TXDLOW\n");
+        postInterrupt(IT_TXDLOW);
+    }
+
+    if (!txPacket) {
+        txPacket = new EthPacketData(16384);
+    }
+
+    if (!txDescCache.packetWaiting()) {
+        if (txDescCache.descLeft() == 0) {
+            DPRINTF(EthernetSM, "TXS: No descriptors left in ring, forcing "
+                    "writeback stopping ticking and posting TXQE\n");
+            txDescCache.writeback(0);
+            txTick = false;
+            postInterrupt(IT_TXQE, true);
+        }
+
+
+        if (!(txDescCache.descUnused())) {
+            DPRINTF(EthernetSM, "TXS: No descriptors available in cache, fetching and stopping ticking\n");
+            txTick = false;
+            txDescCache.fetchDescriptors();
+            return;
+        }
+
+        int size;
+        size = txDescCache.getPacketSize();
+        if (size > 0 && txFifo.avail() > size) {
+            DPRINTF(EthernetSM, "TXS: Reserving %d bytes in FIFO and begining "
+                    "DMA of next packet\n", size);
+            txFifo.reserve(size);
+            txDescCache.getPacketData(txPacket);
+        } else {
+            DPRINTF(EthernetSM, "TXS: No packets to get, writing back used descriptors\n");
+            txDescCache.writeback(0);
+        }
+
+        return;
+    }
+}
+
+bool
+IGbE::ethRxPkt(EthPacketPtr pkt)
+{
+    DPRINTF(Ethernet, "RxFIFO: Receiving pcakte from wire\n");
+    if (!regs.rctl.en()) {
+        DPRINTF(Ethernet, "RxFIFO: RX not enabled, dropping\n");
+        return true;
+    }
+
+    // restart the state machines if they are stopped
+    rxTick = true;
+    if ((rxTick || txTick) && !tickEvent.scheduled()) {
+        DPRINTF(EthernetSM, "RXS: received packet into fifo, starting ticking\n");
+        restartClock();
+    }
+
+    if (!rxFifo.push(pkt)) {
+        DPRINTF(Ethernet, "RxFIFO: Packet won't fit in fifo... dropped\n");
+        postInterrupt(IT_RXO, true);
+        return false;
+    }
+    return true;
+}
+
+
+void
+IGbE::rxStateMachine()
+{
+    if (!regs.rctl.en()) {
+        rxTick = false;
+        DPRINTF(EthernetSM, "RXS: RX disabled, stopping ticking\n");
+        return;
+    }
+
+    // If the packet is done check for interrupts/descriptors/etc
+    if (rxDescCache.packetDone()) {
+        DPRINTF(EthernetSM, "RXS: Packet completed DMA to memory\n");
+        int descLeft = rxDescCache.descLeft();
+        switch (regs.rctl.rdmts()) {
+            case 2: if (descLeft > .125 * regs.rdlen()) break;
+            case 1: if (descLeft > .250 * regs.rdlen()) break;
+            case 0: if (descLeft > .500 * regs.rdlen())  break;
+                DPRINTF(Ethernet, "RXS: Interrupting (RXDMT) because of descriptors left\n");
+                postInterrupt(IT_RXDMT);
+                break;
+        }
+
+        if (descLeft == 0) {
+            DPRINTF(EthernetSM, "RXS: No descriptors left in ring, forcing writeback\n");
+            rxDescCache.writeback(0);
+            DPRINTF(EthernetSM, "RXS: No descriptors left, stopping ticking\n");
+            rxTick = false;
+        }
+
+        // only support descriptor granulaties
+        assert(regs.rxdctl.gran());
+
+        if (regs.rxdctl.wthresh() >= rxDescCache.descUsed()) {
+            DPRINTF(EthernetSM, "RXS: Writing back because WTHRESH >= descUsed\n");
+            if (regs.rxdctl.wthresh() < (cacheBlockSize()>>4))
+                rxDescCache.writeback(regs.rxdctl.wthresh()-1);
+            else
+                rxDescCache.writeback((cacheBlockSize()-1)>>4);
+        }
+
+        if ((rxDescCache.descUnused() < regs.rxdctl.pthresh()) &&
+             ((rxDescCache.descLeft() - rxDescCache.descUnused()) > regs.rxdctl.hthresh())) {
+            DPRINTF(EthernetSM, "RXS: Fetching descriptors because descUnused < PTHRESH\n");
+            rxDescCache.fetchDescriptors();
+        }
+
+        if (rxDescCache.descUnused() == 0) {
+            DPRINTF(EthernetSM, "RXS: No descriptors available in cache, stopping ticking\n");
+            rxTick = false;
+            DPRINTF(EthernetSM, "RXS: Fetching descriptors because none available\n");
+            rxDescCache.fetchDescriptors();
+        }
+        return;
+    }
+
+    if (!rxDescCache.descUnused()) {
+        DPRINTF(EthernetSM, "RXS: No descriptors available in cache, stopping ticking\n");
+        rxTick = false;
+        DPRINTF(EthernetSM, "RXS: No descriptors available, fetching\n");
+        rxDescCache.fetchDescriptors();
+        return;
+    }
+
+    if (rxFifo.empty()) {
+        DPRINTF(EthernetSM, "RXS: RxFIFO empty, stopping ticking\n");
+        rxTick = false;
+        return;
+    }
+
+    EthPacketPtr pkt;
+    pkt = rxFifo.front();
+
+    DPRINTF(EthernetSM, "RXS: Writing packet into memory\n");
+    if (!rxDescCache.writePacket(pkt)) {
+        return;
+    }
+
+    DPRINTF(EthernetSM, "RXS: Removing packet from FIFO\n");
+    rxFifo.pop();
+    DPRINTF(EthernetSM, "RXS: stopping ticking until packet DMA completes\n");
+    rxTick = false;
+}
+
+void
+IGbE::txWire()
+{
+    if (txFifo.empty()) {
+        return;
+    }
+
+    txTick = true;
+
+    if (etherInt->sendPacket(txFifo.front())) {
+        DPRINTF(Ethernet, "TxFIFO: Successful transmit, bytes in fifo: %d\n",
+                txFifo.avail());
+        txFifo.pop();
+    }
+
+}
+
+void
+IGbE::tick()
+{
+    DPRINTF(EthernetSM, "IGbE: -------------- Cycle --------------\n");
+
+    if (rxTick)
+        rxStateMachine();
+
+    if (txTick) {
+        txStateMachine();
+        txWire();
+    }
+
+    if (rxTick || txTick)
+        tickEvent.schedule(curTick + cycles(1));
+}
+
+void
 IGbE::ethTxDone()
 {
-    panic("Need to implemenet\n");
+    // restart the state machines if they are stopped
+    txTick = true;
+    restartClock();
+    DPRINTF(Ethernet, "TxFIFO: Transmission complete\n");
 }
 
 void
@@ -355,6 +1251,14 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(IGbE)
     Param<uint32_t> pci_func;
     Param<Tick> pio_latency;
     Param<Tick> config_latency;
+    Param<std::string> hardware_address;
+    Param<bool> use_flow_control;
+    Param<int> rx_fifo_size;
+    Param<int> tx_fifo_size;
+    Param<int> rx_desc_cache_size;
+    Param<int> tx_desc_cache_size;
+    Param<Tick> clock;
+
 
 END_DECLARE_SIM_OBJECT_PARAMS(IGbE)
 
@@ -367,7 +1271,14 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(IGbE)
     INIT_PARAM(pci_dev, "PCI device number"),
     INIT_PARAM(pci_func, "PCI function code"),
     INIT_PARAM_DFLT(pio_latency, "Programmed IO latency in bus cycles", 1),
-    INIT_PARAM(config_latency, "Number of cycles for a config read or write")
+    INIT_PARAM(config_latency, "Number of cycles for a config read or write"),
+    INIT_PARAM(hardware_address, "Ethernet Hardware Address"),
+    INIT_PARAM(use_flow_control,"Should the device use xon/off packets"),
+    INIT_PARAM(rx_fifo_size,"Size of the RX FIFO"),
+    INIT_PARAM(tx_fifo_size,"Size of the TX FIFO"),
+    INIT_PARAM(rx_desc_cache_size,"Size of the RX descriptor cache"),
+    INIT_PARAM(tx_desc_cache_size,"Size of the TX descriptor cache"),
+    INIT_PARAM(clock,"Clock rate for the device to tick at")
 
 END_INIT_SIM_OBJECT_PARAMS(IGbE)
 
@@ -385,6 +1296,14 @@ CREATE_SIM_OBJECT(IGbE)
     params->functionNum = pci_func;
     params->pio_delay = pio_latency;
     params->config_delay = config_latency;
+    params->hardware_address = hardware_address;
+    params->use_flow_control = use_flow_control;
+    params->rx_fifo_size = rx_fifo_size;
+    params->tx_fifo_size = tx_fifo_size;
+    params->rx_desc_cache_size = rx_desc_cache_size;
+    params->tx_desc_cache_size = tx_desc_cache_size;
+    params->clock = clock;
+
 
     return new IGbE(params);
 }
diff --git a/src/dev/i8254xGBe.hh b/src/dev/i8254xGBe.hh
index ce4007263..a2b9f38d5 100644
--- a/src/dev/i8254xGBe.hh
+++ b/src/dev/i8254xGBe.hh
@@ -35,6 +35,9 @@
 #ifndef __DEV_I8254XGBE_HH__
 #define __DEV_I8254XGBE_HH__
 
+#include <deque>
+#include <string>
+
 #include "base/inet.hh"
 #include "base/statistics.hh"
 #include "dev/etherint.hh"
@@ -50,22 +53,451 @@ class IGbE : public PciDev
 {
   private:
     IGbEInt *etherInt;
+
+    // device registers
     iGbReg::Regs regs;
+
+    // eeprom data, status and control bits
     int eeOpBits, eeAddrBits, eeDataBits;
     uint8_t eeOpcode, eeAddr;
-
     uint16_t flash[iGbReg::EEPROM_SIZE];
 
+    // cached parameters from params struct
+    Tick tickRate;
+    bool useFlowControl;
+
+    // packet fifos
+    PacketFifo rxFifo;
+    PacketFifo txFifo;
+
+    // Packet that we are currently putting into the txFifo
+    EthPacketPtr txPacket;
+
+    // Should to Rx/Tx State machine tick?
+    bool rxTick;
+    bool txTick;
+
+    // Event and function to deal with RDTR timer expiring
+    void rdtrProcess() { rxDescCache.writeback(0); postInterrupt(iGbReg::IT_RXT, true); }
+    //friend class EventWrapper<IGbE, &IGbE::rdtrProcess>;
+    EventWrapper<IGbE, &IGbE::rdtrProcess> rdtrEvent;
+
+    // Event and function to deal with RADV timer expiring
+    void radvProcess() { rxDescCache.writeback(0); postInterrupt(iGbReg::IT_RXT, true); }
+    //friend class EventWrapper<IGbE, &IGbE::radvProcess>;
+    EventWrapper<IGbE, &IGbE::radvProcess> radvEvent;
+
+    // Event and function to deal with TADV timer expiring
+    void tadvProcess() { postInterrupt(iGbReg::IT_TXDW, true); }
+    //friend class EventWrapper<IGbE, &IGbE::tadvProcess>;
+    EventWrapper<IGbE, &IGbE::tadvProcess> tadvEvent;
+
+    // Event and function to deal with TIDV timer expiring
+    void tidvProcess() { postInterrupt(iGbReg::IT_TXDW, true); };
+    //friend class EventWrapper<IGbE, &IGbE::tidvProcess>;
+    EventWrapper<IGbE, &IGbE::tidvProcess> tidvEvent;
+
+    // Main event to tick the device
+    void tick();
+    //friend class EventWrapper<IGbE, &IGbE::tick>;
+    EventWrapper<IGbE, &IGbE::tick> tickEvent;
+
+
+    void rxStateMachine();
+    void txStateMachine();
+    void txWire();
+
+    /** Write an interrupt into the interrupt pending register and check mask
+     * and interrupt limit timer before sending interrupt to CPU
+     * @param t the type of interrupt we are posting
+     * @param now should we ignore the interrupt limiting timer
+     */
+    void postInterrupt(iGbReg::IntTypes t, bool now = false);
+
+    /** Check and see if changes to the mask register have caused an interrupt
+     * to need to be sent or perhaps removed an interrupt cause.
+     */
+    void chkInterrupt();
+
+    /** Send an interrupt to the cpu
+     */
+    void cpuPostInt();
+    // Event to moderate interrupts
+    EventWrapper<IGbE, &IGbE::cpuPostInt> interEvent;
+
+    /** Clear the interupt line to the cpu
+     */
+    void cpuClearInt();
+
+    Tick intClock() { return Clock::Int::ns * 1024; }
+
+    void restartClock();
+
+    template<class T>
+    class DescCache
+    {
+      protected:
+        virtual Addr descBase() const = 0;
+        virtual long descHead() const = 0;
+        virtual long descTail() const = 0;
+        virtual long descLen() const = 0;
+        virtual void updateHead(long h) = 0;
+        virtual void enableSm() = 0;
+        virtual void intAfterWb() const {}
+
+        std::deque<T*> usedCache;
+        std::deque<T*> unusedCache;
+
+        T *fetchBuf;
+        T *wbBuf;
+
+        // Pointer to the device we cache for
+        IGbE *igbe;
+
+        // Name of this  descriptor cache
+        std::string _name;
+
+        // How far we've cached
+        int cachePnt;
+
+        // The size of the descriptor cache
+        int size;
+
+        // How many descriptors we are currently fetching
+        int curFetching;
+
+        // How many descriptors we are currently writing back
+        int wbOut;
+
+        // if the we wrote back to the end of the descriptor ring and are going
+        // to have to wrap and write more
+        bool moreToWb;
+
+        // What the alignment is of the next descriptor writeback
+        Addr wbAlignment;
+
+       /** The packet that is currently being dmad to memory if any
+         */
+        EthPacketPtr pktPtr;
+
+      public:
+        DescCache(IGbE *i, const std::string n, int s)
+            : igbe(i), _name(n), cachePnt(0), size(s), curFetching(0), wbOut(0),
+              pktPtr(NULL), fetchEvent(this), wbEvent(this)
+        {
+            fetchBuf = new T[size];
+            wbBuf = new T[size];
+        }
+
+        virtual ~DescCache()
+        {
+            reset();
+        }
+
+        std::string name() { return _name; }
+
+        /** If the address/len/head change when we've got descriptors that are
+         * dirty that is very bad. This function checks that we don't and if we
+         * do panics.
+         */
+        void areaChanged()
+        {
+            if (usedCache.size() > 0 || unusedCache.size() > 0)
+                panic("Descriptor Address, Length or Head changed. Bad\n");
+        }
+
+        void writeback(Addr aMask)
+        {
+            int curHead = descHead();
+            int max_to_wb = usedCache.size();
+
+            DPRINTF(EthernetDesc, "Writing back descriptors head: %d tail: "
+                    "%d len: %d cachePnt: %d max_to_wb: %d descleft: %d\n",
+                    curHead, descTail(), descLen(), cachePnt, max_to_wb,
+                    descLeft());
+
+            // Check if this writeback is less restrictive that the previous
+            // and if so setup another one immediately following it
+            if (wbOut && (aMask < wbAlignment)) {
+                moreToWb = true;
+                wbAlignment = aMask;
+                DPRINTF(EthernetDesc, "Writing back already in process, returning\n");
+                return;
+            }
+
+
+            moreToWb = false;
+            wbAlignment = aMask;
+
+            if (max_to_wb + curHead > descLen()) {
+                max_to_wb = descLen() - curHead;
+                moreToWb = true;
+                // this is by definition aligned correctly
+            } else if (aMask != 0) {
+                // align the wb point to the mask
+                max_to_wb = max_to_wb & ~aMask;
+            }
+
+            DPRINTF(EthernetDesc, "Writing back %d descriptors\n", max_to_wb);
+
+            if (max_to_wb <= 0 || wbOut)
+                return;
+
+            wbOut = max_to_wb;
+
+            for (int x = 0; x < wbOut; x++)
+                memcpy(&wbBuf[x], usedCache[x], sizeof(T));
+
+            for (int x = 0; x < wbOut; x++) {
+                assert(usedCache.size());
+                delete usedCache[0];
+                usedCache.pop_front();
+            };
+
+
+            assert(wbOut);
+            igbe->dmaWrite(igbe->platform->pciToDma(descBase() + curHead * sizeof(T)),
+                    wbOut * sizeof(T), &wbEvent, (uint8_t*)wbBuf);
+        }
+
+        /** Fetch a chunk of descriptors into the descriptor cache.
+         * Calls fetchComplete when the memory system returns the data
+         */
+        void fetchDescriptors()
+        {
+            size_t max_to_fetch = descTail() - cachePnt;
+            if (max_to_fetch < 0)
+                max_to_fetch = descLen() - cachePnt;
+
+            max_to_fetch = std::min(max_to_fetch, (size - usedCache.size() -
+                        unusedCache.size()));
+
+            DPRINTF(EthernetDesc, "Fetching descriptors head: %d tail: "
+                    "%d len: %d cachePnt: %d max_to_fetch: %d descleft: %d\n",
+                    descHead(), descTail(), descLen(), cachePnt,
+                    max_to_fetch, descLeft());
+
+            // Nothing to do
+            if (max_to_fetch == 0 || curFetching)
+                return;
+
+            // So we don't have two descriptor fetches going on at once
+            curFetching = max_to_fetch;
+
+            DPRINTF(EthernetDesc, "Fetching descriptors at %#x (%#x), size: %#x\n",
+                    descBase() + cachePnt * sizeof(T),
+                    igbe->platform->pciToDma(descBase() + cachePnt * sizeof(T)),
+                    curFetching * sizeof(T));
+
+            assert(curFetching);
+            igbe->dmaRead(igbe->platform->pciToDma(descBase() + cachePnt * sizeof(T)),
+                    curFetching * sizeof(T), &fetchEvent, (uint8_t*)fetchBuf);
+        }
+
+
+        /** Called by event when dma to read descriptors is completed
+         */
+        void fetchComplete()
+        {
+            T *newDesc;
+            for (int x = 0; x < curFetching; x++) {
+                newDesc = new T;
+                memcpy(newDesc, &fetchBuf[x], sizeof(T));
+                unusedCache.push_back(newDesc);
+            }
+
+#ifndef NDEBUG
+            int oldCp = cachePnt;
+#endif
+
+            cachePnt += curFetching;
+            if (cachePnt > descLen())
+                cachePnt -= descLen();
+
+            curFetching = 0;
+
+            DPRINTF(EthernetDesc, "Fetching complete cachePnt %d -> %d\n",
+                    oldCp, cachePnt);
+
+            enableSm();
+
+        }
+
+        EventWrapper<DescCache, &DescCache::fetchComplete> fetchEvent;
+
+        /** Called by event when dma to writeback descriptors is completed
+         */
+        void wbComplete()
+        {
+            long  curHead = descHead();
+#ifndef NDEBUG
+            long oldHead = curHead;
+#endif
+
+            curHead += wbOut;
+            wbOut = 0;
+
+            if (curHead > descLen())
+                curHead = 0;
+
+            // Update the head
+            updateHead(curHead);
+
+            DPRINTF(EthernetDesc, "Writeback complete curHead %d -> %d\n",
+                    oldHead, curHead);
+
+            // If we still have more to wb, call wb now
+            if (moreToWb) {
+                DPRINTF(EthernetDesc, "Writeback has more todo\n");
+                writeback(wbAlignment);
+            }
+            intAfterWb();
+        }
+
+
+        EventWrapper<DescCache, &DescCache::wbComplete> wbEvent;
+
+        /* Return the number of descriptors left in the ring, so the device has
+         * a way to figure out if it needs to interrupt.
+         */
+        int descLeft() const
+        {
+            int left = unusedCache.size();
+            if (cachePnt - descTail() >= 0)
+                left += (cachePnt - descTail());
+            else
+                left += (descTail() - cachePnt);
+
+            return left;
+        }
+
+        /* Return the number of descriptors used and not written back.
+         */
+        int descUsed() const { return usedCache.size(); }
+
+        /* Return the number of cache unused descriptors we have. */
+        int descUnused() const {return unusedCache.size(); }
+
+        /* Get into a state where the descriptor address/head/etc colud be
+         * changed */
+        void reset()
+        {
+            DPRINTF(EthernetDesc, "Reseting descriptor cache\n");
+            for (int x = 0; x < usedCache.size(); x++)
+                delete usedCache[x];
+            for (int x = 0; x < unusedCache.size(); x++)
+                delete unusedCache[x];
+
+            usedCache.clear();
+            unusedCache.clear();
+        }
+
+     };
+
+
+    class RxDescCache : public DescCache<iGbReg::RxDesc>
+    {
+      protected:
+        virtual Addr descBase() const { return igbe->regs.rdba(); }
+        virtual long descHead() const { return igbe->regs.rdh(); }
+        virtual long descLen() const { return igbe->regs.rdlen() >> 4; }
+        virtual long descTail() const { return igbe->regs.rdt(); }
+        virtual void updateHead(long h) { igbe->regs.rdh(h); }
+        virtual void enableSm();
+
+        bool pktDone;
+
+      public:
+        RxDescCache(IGbE *i, std::string n, int s);
+
+        /** Write the given packet into the buffer(s) pointed to by the
+         * descriptor and update the book keeping. Should only be called when
+         * there are no dma's pending.
+         * @param packet ethernet packet to write
+         * @return if the packet could be written (there was a free descriptor)
+         */
+        bool writePacket(EthPacketPtr packet);
+        /** Called by event when dma to write packet is completed
+         */
+        void pktComplete();
+
+        /** Check if the dma on the packet has completed.
+         */
+
+        bool packetDone();
+
+        EventWrapper<RxDescCache, &RxDescCache::pktComplete> pktEvent;
+
+    };
+    friend class RxDescCache;
+
+    RxDescCache rxDescCache;
+
+    class TxDescCache  : public DescCache<iGbReg::TxDesc>
+    {
+      protected:
+        virtual Addr descBase() const { return igbe->regs.tdba(); }
+        virtual long descHead() const { return igbe->regs.tdh(); }
+        virtual long descTail() const { return igbe->regs.tdt(); }
+        virtual long descLen() const { return igbe->regs.tdlen() >> 4; }
+        virtual void updateHead(long h) { igbe->regs.tdh(h); }
+        virtual void enableSm();
+        virtual void intAfterWb() const { igbe->postInterrupt(iGbReg::IT_TXDW);}
+
+        bool pktDone;
+        bool isTcp;
+        bool pktWaiting;
+        int hLen;
+
+      public:
+        TxDescCache(IGbE *i, std::string n, int s);
+
+        /** Tell the cache to DMA a packet from main memory into its buffer and
+         * return the size the of the packet to reserve space in tx fifo.
+         * @return size of the packet
+         */
+        int getPacketSize();
+        void getPacketData(EthPacketPtr p);
+
+        /** Ask if the packet has been transfered so the state machine can give
+         * it to the fifo.
+         * @return packet available in descriptor cache
+         */
+        bool packetAvailable();
+
+        /** Ask if we are still waiting for the packet to be transfered.
+         * @return packet still in transit.
+         */
+        bool packetWaiting() { return pktWaiting; }
+
+        /** Called by event when dma to write packet is completed
+         */
+        void pktComplete();
+        EventWrapper<TxDescCache, &TxDescCache::pktComplete> pktEvent;
+
+    };
+    friend class TxDescCache;
+
+    TxDescCache txDescCache;
 
   public:
     struct Params : public PciDev::Params
     {
-        ;
+        Net::EthAddr hardware_address;
+        bool use_flow_control;
+        int rx_fifo_size;
+        int tx_fifo_size;
+        int rx_desc_cache_size;
+        int tx_desc_cache_size;
+        Tick clock;
     };
 
     IGbE(Params *params);
     ~IGbE() {;}
 
+    Tick clock;
+    inline Tick cycles(int numCycles) const { return numCycles * clock; }
+
     virtual Tick read(PacketPtr pkt);
     virtual Tick write(PacketPtr pkt);
 
@@ -76,6 +508,7 @@ class IGbE : public PciDev
 
     void setEthInt(IGbEInt *i) { assert(!etherInt); etherInt = i; }
 
+
     const Params *params() const {return (const Params *)_params; }
 
     virtual void serialize(std::ostream &os);
diff --git a/src/dev/i8254xGBe_defs.hh b/src/dev/i8254xGBe_defs.hh
index ae0925356..8538c155b 100644
--- a/src/dev/i8254xGBe_defs.hh
+++ b/src/dev/i8254xGBe_defs.hh
@@ -31,433 +31,570 @@
 /* @file
  * Register and structure descriptions for Intel's 8254x line of gigabit ethernet controllers.
  */
+#include "base/bitfield.hh"
 
 namespace iGbReg {
 
-const uint32_t CTRL     = 0x00000; //*
-const uint32_t STATUS   = 0x00008; //*
-const uint32_t EECD     = 0x00010; //*
-const uint32_t EERD     = 0x00014; //*
-const uint32_t CTRL_EXT = 0x00018;
-const uint32_t PBA      = 0x01000;
-const uint32_t ICR      = 0x000C0; //*
-const uint32_t ITR      = 0x000C4;
-const uint32_t ICS      = 0x000C8;
-const uint32_t IMS      = 0x000D0;
-const uint32_t IMC      = 0x000D8; //*
-const uint32_t RCTL     = 0x00100; //*
-const uint32_t RDBAL    = 0x02800;
-const uint32_t RDBAH    = 0x02804;
-const uint32_t RDLEN    = 0x02808;
-const uint32_t RDH      = 0x02810;
-const uint32_t RDT      = 0x02818;
-const uint32_t RDTR     = 0x02820;
-const uint32_t RADV     = 0x0282C;
-const uint32_t RSRPD    = 0x02C00;
-const uint32_t TCTL     = 0x00400; //*
-const uint32_t TDBAL    = 0x03800;
-const uint32_t TDBAH    = 0x03804;
-const uint32_t TDLEN    = 0x03808;
-const uint32_t TDH      = 0x03810;
-const uint32_t THT      = 0x03818;
-const uint32_t TIDV     = 0x03820;
-const uint32_t TXDMAC   = 0x03000;
-const uint32_t TXDCTL   = 0x03828;
-const uint32_t TADV     = 0x0282C;
-const uint32_t TSPMT    = 0x03830;
-const uint32_t RXDCTL   = 0x02828;
-const uint32_t RXCSUM   = 0x05000;
-const uint32_t MANC     = 0x05820;//*
+
+// Registers used by the Intel GbE NIC
+const uint32_t REG_CTRL     = 0x00000;
+const uint32_t REG_STATUS   = 0x00008;
+const uint32_t REG_EECD     = 0x00010;
+const uint32_t REG_EERD     = 0x00014;
+const uint32_t REG_CTRL_EXT = 0x00018;
+const uint32_t REG_MDIC     = 0x00020;
+const uint32_t REG_FCAL     = 0x00028;
+const uint32_t REG_FCAH     = 0x0002C;
+const uint32_t REG_FCT      = 0x00030;
+const uint32_t REG_VET      = 0x00038;
+const uint32_t REG_PBA      = 0x01000;
+const uint32_t REG_ICR      = 0x000C0;
+const uint32_t REG_ITR      = 0x000C4;
+const uint32_t REG_ICS      = 0x000C8;
+const uint32_t REG_IMS      = 0x000D0;
+const uint32_t REG_IMC      = 0x000D8;
+const uint32_t REG_IAM      = 0x000E0;
+const uint32_t REG_RCTL     = 0x00100;
+const uint32_t REG_FCTTV    = 0x00170;
+const uint32_t REG_TIPG     = 0x00410;
+const uint32_t REG_AIFS     = 0x00458;
+const uint32_t REG_LEDCTL   = 0x00e00;
+const uint32_t REG_FCRTL    = 0x02160;
+const uint32_t REG_FCRTH    = 0x02168;
+const uint32_t REG_RDBAL    = 0x02800;
+const uint32_t REG_RDBAH    = 0x02804;
+const uint32_t REG_RDLEN    = 0x02808;
+const uint32_t REG_RDH      = 0x02810;
+const uint32_t REG_RDT      = 0x02818;
+const uint32_t REG_RDTR     = 0x02820;
+const uint32_t REG_RXDCTL   = 0x02828;
+const uint32_t REG_RADV     = 0x0282C;
+const uint32_t REG_TCTL     = 0x00400;
+const uint32_t REG_TDBAL    = 0x03800;
+const uint32_t REG_TDBAH    = 0x03804;
+const uint32_t REG_TDLEN    = 0x03808;
+const uint32_t REG_TDH      = 0x03810;
+const uint32_t REG_TDT      = 0x03818;
+const uint32_t REG_TIDV     = 0x03820;
+const uint32_t REG_TXDCTL   = 0x03828;
+const uint32_t REG_TADV     = 0x0382C;
+const uint32_t REG_CRCERRS  = 0x04000;
+const uint32_t REG_RXCSUM   = 0x05000;
+const uint32_t REG_MTA      = 0x05200;
+const uint32_t REG_RAL      = 0x05400;
+const uint32_t REG_RAH      = 0x05404;
+const uint32_t REG_VFTA     = 0x05600;
+
+const uint32_t REG_WUC      = 0x05800;
+const uint32_t REG_MANC     = 0x05820;
 
 const uint8_t EEPROM_READ_OPCODE_SPI    = 0x03;
 const uint8_t EEPROM_RDSR_OPCODE_SPI    = 0x05;
 const uint8_t EEPROM_SIZE               = 64;
+const uint16_t EEPROM_CSUM              = 0xBABA;
+
+const uint8_t VLAN_FILTER_TABLE_SIZE    = 128;
+const uint8_t RCV_ADDRESS_TABLE_SIZE    = 16;
+const uint8_t MULTICAST_TABLE_SIZE      = 128;
+const uint32_t STATS_REGS_SIZE           = 0x124;
+
+
+// Registers in that are accessed in the PHY
+const uint8_t PHY_PSTATUS       = 0x1;
+const uint8_t PHY_PID           = 0x2;
+const uint8_t PHY_EPID          = 0x3;
+const uint8_t PHY_GSTATUS       = 10;
+const uint8_t PHY_EPSTATUS      = 15;
+const uint8_t PHY_AGC           = 18;
+
+// Receive Descriptor Status Flags
+const uint8_t RXDS_PIF         = 0x80;
+const uint8_t RXDS_IPCS        = 0x40;
+const uint8_t RXDS_TCPCS       = 0x20;
+const uint8_t RXDS_UDPCS       = 0x10;
+const uint8_t RXDS_VP          = 0x08;
+const uint8_t RXDS_IXSM        = 0x04;
+const uint8_t RXDS_EOP         = 0x02;
+const uint8_t RXDS_DD          = 0x01;
 
+// Receive Descriptor Error Flags
+const uint8_t RXDE_RXE         = 0x80;
+const uint8_t RXDE_IPE         = 0x40;
+const uint8_t RXDE_TCPE        = 0x20;
+const uint8_t RXDE_SEQ         = 0x04;
+const uint8_t RXDE_SE          = 0x02;
+const uint8_t RXDE_CE          = 0x01;
+
+// Interrupt types
+enum IntTypes
+{
+    IT_NONE    = 0x00000, //dummy value
+    IT_TXDW    = 0x00001,
+    IT_TXQE    = 0x00002,
+    IT_LSC     = 0x00004,
+    IT_RXSEQ   = 0x00008,
+    IT_RXDMT   = 0x00010,
+    IT_RXO     = 0x00040,
+    IT_RXT     = 0x00080,
+    IT_MADC    = 0x00200,
+    IT_RXCFG   = 0x00400,
+    IT_GPI0    = 0x02000,
+    IT_GPI1    = 0x04000,
+    IT_TXDLOW  = 0x08000,
+    IT_SRPD    = 0x10000,
+    IT_ACK     = 0x20000
+};
+
+// Receive Descriptor struct
 struct RxDesc {
     Addr buf;
     uint16_t len;
     uint16_t csum;
-    union {
-        uint8_t status;
-        struct { // these may be in the worng order
-            uint8_t dd:1;    // descriptor done (hw is done when 1)
-            uint8_t eop:1;   // end of packet
-            uint8_t xism:1;  // ignore checksum
-            uint8_t vp:1;    // packet is vlan packet
-            uint8_t rsv:1;   // reserved
-            uint8_t tcpcs:1; // TCP checksum done
-            uint8_t ipcs:1;  // IP checksum done
-            uint8_t pif:1;   // passed in-exact filter
-        } st;
-    };
-    union {
-        uint8_t errors;
-        struct {
-            uint8_t ce:1;   // crc error or alignment error
-            uint8_t se:1;   // symbol error
-            uint8_t seq:1;  // sequence error
-            uint8_t rsv:1;  // reserved
-            uint8_t cxe:1;  // carrier extension error
-            uint8_t tcpe:1; // tcp checksum error
-            uint8_t ipe:1;  // ip checksum error
-            uint8_t rxe:1;  // PX data error
-        } er;
-    };
-    union {
-        uint16_t special;
-        struct {
-            uint16_t vlan:12; //vlan id
-            uint16_t cfi:1;   // canocial form id
-            uint16_t pri:3;   // user priority
-        } sp;
-    };
+    uint8_t status;
+    uint8_t errors;
+    uint16_t vlan;
 };
 
-union TxDesc {
-    uint8_t data[16];
-    struct {
-        Addr buf;
-        uint16_t len;
-        uint8_t  cso;
-        union {
-            uint8_t command;
-            struct {
-                uint8_t eop:1;  // end of packet
-                uint8_t ifcs:1; // insert crc
-                uint8_t ic:1;   // insert checksum
-                uint8_t rs:1;   // report status
-                uint8_t rps:1;  // report packet sent
-                uint8_t dext:1; // extension
-                uint8_t vle:1;  // vlan enable
-                uint8_t ide:1;  // interrupt delay enable
-            } cmd;
-        };
-        union {
-            uint8_t status:4;
-            struct {
-                uint8_t dd:1; // descriptor done
-                uint8_t ec:1; // excess collisions
-                uint8_t lc:1; // late collision
-                uint8_t tu:1; // transmit underrun
-            } st;
-        };
-        uint8_t reserved:4;
-        uint8_t css;
-        union {
-            uint16_t special;
-            struct {
-                uint16_t vlan:12; //vlan id
-                uint16_t cfi:1;   // canocial form id
-                uint16_t pri:3;   // user priority
-            } sp;
-        };
-    } legacy;
-
-    // Type 0000 descriptor
-    struct {
-        uint8_t ipcss;
-        uint8_t ipcso;
-        uint16_t ipcse;
-        uint8_t tucss;
-        uint8_t tucso;
-        uint16_t tucse;
-        uint32_t paylen:20;
-        uint8_t dtype:4;
-        union {
-            uint8_t tucommand;
-            struct {
-                uint8_t tcp:1;  // tcp/udp
-                uint8_t ip:1;   // ip ipv4/ipv6
-                uint8_t tse:1;  // tcp segment enbale
-                uint8_t rs:1;   // report status
-                uint8_t rsv0:1; // reserved
-                uint8_t dext:1; // descriptor extension
-                uint8_t rsv1:1; // reserved
-                uint8_t ide:1;  // interrupt delay enable
-            } tucmd;
-        };
-        union {
-            uint8_t status:4;
-            struct {
-                uint8_t dd:1;
-                uint8_t rsvd:3;
-            } sta;
-        };
-        uint8_t reserved:4;
-        uint8_t hdrlen;
-        uint16_t mss;
-    } t0;
-
-    // Type 0001 descriptor
-    struct {
-        Addr buf;
-        uint32_t dtalen:20;
-        uint8_t dtype:4;
-        union {
-            uint8_t dcommand;
-            struct {
-                uint8_t eop:1;  // end of packet
-                uint8_t ifcs:1; // insert crc
-                uint8_t tse:1;  // segmentation enable
-                uint8_t rs:1;   // report status
-                uint8_t rps:1;  // report packet sent
-                uint8_t dext:1; // extension
-                uint8_t vle:1;  // vlan enable
-                uint8_t ide:1;  // interrupt delay enable
-            } dcmd;
-        };
-        union {
-            uint8_t status:4;
-            struct {
-                uint8_t dd:1; // descriptor done
-                uint8_t ec:1; // excess collisions
-                uint8_t lc:1; // late collision
-                uint8_t tu:1; // transmit underrun
-            } sta;
-        };
-        union {
-            uint8_t pktopts;
-            struct {
-                uint8_t ixsm:1; // insert ip checksum
-                uint8_t txsm:1; // insert tcp checksum
-            };
-        };
-        union {
-            uint16_t special;
-            struct {
-                uint16_t vlan:12; //vlan id
-                uint16_t cfi:1;   // canocial form id
-                uint16_t pri:3;   // user priority
-            } sp;
-        };
-    } t1;
-
-    // Junk to test descriptor type!
-    struct {
-        uint64_t junk;
-        uint32_t junk1:20;
-        uint8_t dtype;
-        uint8_t junk2:5;
-        uint8_t dext:1;
-        uint8_t junk3:2;
-        uint8_t junk4:4;
-        uint32_t junk5;
-    } type;
+struct TxDesc {
+    uint64_t d1;
+    uint64_t d2;
 };
 
+namespace TxdOp {
+const uint8_t TXD_CNXT = 0x0;
+const uint8_t TXD_DATA = 0x0;
+
+bool isLegacy(TxDesc *d) { return !bits(d->d2,29,29); }
+uint8_t getType(TxDesc *d) { return bits(d->d2, 23,20); }
+bool isContext(TxDesc *d) { return !isLegacy(d) && getType(d) == TXD_CNXT; }
+bool isData(TxDesc *d) { return !isLegacy(d) && getType(d) == TXD_DATA; }
+
+Addr getBuf(TxDesc *d) { assert(isLegacy(d) || isData(d)); return d->d1; }
+Addr getLen(TxDesc *d) { if (isLegacy(d)) return bits(d->d2,15,0); else return bits(d->d2, 19,0); }
+void setDd(TxDesc *d)
+{
+    replaceBits(d->d2, 35, 32, ULL(1));
+}
+
+bool ide(TxDesc *d)  { return bits(d->d2, 31,31); }
+bool vle(TxDesc *d)  { assert(isLegacy(d) || isData(d)); return bits(d->d2, 30,30); }
+bool rs(TxDesc *d)   { return bits(d->d2, 27,27); }
+bool ic(TxDesc *d)   { assert(isLegacy(d) || isData(d)); return isLegacy(d) && bits(d->d2, 26,26); }
+bool tse(TxDesc *d)  { return (isData(d) || isContext(d)) && bits(d->d2, 26,26); }
+bool ifcs(TxDesc *d) { assert(isLegacy(d) || isData(d)); return bits(d->d2, 25,25); }
+bool eop(TxDesc *d)  { assert(isLegacy(d) || isData(d)); return bits(d->d2, 24,24); }
+bool ip(TxDesc *d)   { assert(isContext(d)); return bits(d->d2, 25,25); }
+bool tcp(TxDesc *d)  { assert(isContext(d)); return bits(d->d2, 24,24); }
+
+uint8_t getCso(TxDesc *d) { assert(isLegacy(d)); return bits(d->d2, 23,16); }
+uint8_t getCss(TxDesc *d) { assert(isLegacy(d)); return bits(d->d2, 47,40); }
+
+bool ixsm(TxDesc *d)  { return isData(d) && bits(d->d2, 40,40); }
+bool txsm(TxDesc *d)  { return isData(d) && bits(d->d2, 41,41); }
+
+int tucse(TxDesc *d) { assert(isContext(d)); return bits(d->d1,63,48); }
+int tucso(TxDesc *d) { assert(isContext(d)); return bits(d->d1,47,40); }
+int tucss(TxDesc *d) { assert(isContext(d)); return bits(d->d1,39,32); }
+int ipcse(TxDesc *d) { assert(isContext(d)); return bits(d->d1,31,16); }
+int ipcso(TxDesc *d) { assert(isContext(d)); return bits(d->d1,15,8); }
+int ipcss(TxDesc *d) { assert(isContext(d)); return bits(d->d1,7,0); }
+int mss(TxDesc *d) { assert(isContext(d)); return bits(d->d2,63,48); }
+int hdrlen(TxDesc *d) { assert(isContext(d)); return bits(d->d2,47,40); }
+} // namespace TxdOp
+
+
+#define ADD_FIELD32(NAME, OFFSET, BITS) \
+    inline uint32_t NAME() { return bits(_data, OFFSET+BITS-1, OFFSET); } \
+    inline void NAME(uint32_t d) { replaceBits(_data, OFFSET+BITS-1, OFFSET,d); }
+
+#define ADD_FIELD64(NAME, OFFSET, BITS) \
+    inline uint64_t NAME() { return bits(_data, OFFSET+BITS-1, OFFSET); } \
+    inline void NAME(uint64_t d) { replaceBits(_data, OFFSET+BITS-1, OFFSET,d); }
+
 struct Regs {
-    union {  // 0x0000 CTRL Register
-       uint32_t reg;
-       struct {
-           uint8_t fd:1;      // full duplex
-           uint8_t bem:1;     // big endian mode
-           uint8_t pcipr:1;   // PCI priority
-           uint8_t lrst:1;    // link reset
-           uint8_t tme:1;     // test mode enable
-           uint8_t asde:1;    // Auto-speed detection
-           uint8_t slu:1;     // Set link up
-           uint8_t ilos:1;    // invert los-of-signal
-           uint8_t speed:2;   // speed selection bits
-           uint8_t be32:1;    // big endian mode 32
-           uint8_t frcspd:1;  // force speed
-           uint8_t frcdpx:1;  // force duplex
-           uint8_t duden:1;   // dock/undock enable
-           uint8_t dudpol:1;  // dock/undock polarity
-           uint8_t fphyrst:1; // force phy reset
-           uint8_t extlen:1;  // external link status enable
-           uint8_t rsvd:1;    // reserved
-           uint8_t sdp0d:1;   // software controlled pin data
-           uint8_t sdp1d:1;   // software controlled pin data
-           uint8_t sdp2d:1;   // software controlled pin data
-           uint8_t sdp3d:1;   // software controlled pin data
-           uint8_t sdp0i:1;   // software controlled pin dir
-           uint8_t sdp1i:1;   // software controlled pin dir
-           uint8_t sdp2i:1;   // software controlled pin dir
-           uint8_t sdp3i:1;   // software controlled pin dir
-           uint8_t rst:1;     // reset
-           uint8_t rfce:1;    // receive flow control enable
-           uint8_t tfce:1;    // transmit flow control enable
-           uint8_t rte:1;     // routing tag enable
-           uint8_t vme:1;     // vlan enable
-           uint8_t phyrst:1;  // phy reset
-       } ;
-    } ctrl;
-
-    union { // 0x0008 STATUS
-        uint32_t reg;
-        struct {
-            uint8_t fd:1;      // full duplex
-            uint8_t lu:1;      // link up
-            uint8_t func:2;    // function id
-            uint8_t txoff:1;   // transmission paused
-            uint8_t tbimode:1; // tbi mode
-            uint8_t speed:2;   // link speed
-            uint8_t asdv:2;    // auto speed detection value
-            uint8_t mtxckok:1; // mtx clock running ok
-            uint8_t pci66:1;   // In 66Mhz pci slot
-            uint8_t bus64:1;   // in 64 bit slot
-            uint8_t pcix:1;    // Pci mode
-            uint8_t pcixspd:1; // pci x speed
-            uint8_t reserved;  // reserved
-        } ;
-    } sts;
-
-    union { // 0x0010 EECD
-        uint32_t reg;
-        struct {
-            uint8_t sk:1;      // clack input to the eeprom
-            uint8_t cs:1;      // chip select to eeprom
-            uint8_t din:1;     // data input to eeprom
-            uint8_t dout:1;    // data output bit
-            uint8_t fwe:2;     // flash write enable
-            uint8_t ee_req:1;  // request eeprom access
-            uint8_t ee_gnt:1;  // grant eeprom access
-            uint8_t ee_pres:1; // eeprom present
-            uint8_t ee_size:1; // eeprom size
-            uint8_t ee_sz1:1;  // eeprom size
-            uint8_t rsvd:2;    // reserved
-            uint8_t ee_type:1; // type of eeprom
-        } ;
-    } eecd;
-
-    union { // 0x0014 EERD
-        uint32_t reg;
-        struct {
-            uint8_t start:1;  // start read
-            uint8_t done:1;   // done read
-            uint16_t addr:14; // address
-            uint16_t data;    // data
-        };
-    } eerd;
-
-    union { // 0x00C0 ICR
-        uint32_t reg;
-        struct {
-            uint8_t txdw:1;   // tx descr witten back
-            uint8_t txqe:1;   // tx queue empty
-            uint8_t lsc:1;    // link status change
-            uint8_t rxseq:1;  // rcv sequence error
-            uint8_t rxdmt0:1; // rcv descriptor min thresh
-            uint8_t rsvd1:1;  // reserved
-            uint8_t rxo:1;    // receive overrunn
-            uint8_t rxt0:1;   // receiver timer interrupt
-            uint8_t rsvd2:1;  // reserved
-            uint8_t mdac:1;   // mdi/o access complete
-            uint8_t rxcfg:1;  // recv /c/ ordered sets
-            uint8_t rsvd3:1;  // reserved
-            uint8_t phyint:1; // phy interrupt
-            uint8_t gpi1:1;   // gpi int 1
-            uint8_t gpi2:1;   // gpi int 2
-            uint8_t txdlow:1; // transmit desc low thresh
-            uint8_t srpd:1;   // small receive packet detected
-            uint16_t rsvd4:15; // reserved
-        } ;
-    } icd;
-
-    union { // 0x00C0 IMC
-        uint32_t reg;
-        struct {
-            uint8_t txdw:1;   // tx descr witten back
-            uint8_t txqe:1;   // tx queue empty
-            uint8_t lsc:1;    // link status change
-            uint8_t rxseq:1;  // rcv sequence error
-            uint8_t rxdmt0:1; // rcv descriptor min thresh
-            uint8_t rsvd1:1;  // reserved
-            uint8_t rxo:1;    // receive overrunn
-            uint8_t rxt0:1;   // receiver timer interrupt
-            uint8_t rsvd2:1;  // reserved
-            uint8_t mdac:1;   // mdi/o access complete
-            uint8_t rxcfg:1;  // recv /c/ ordered sets
-            uint8_t rsvd3:1;  // reserved
-            uint8_t phyint:1; // phy interrupt
-            uint8_t gpi1:1;   // gpi int 1
-            uint8_t gpi2:1;   // gpi int 2
-            uint8_t txdlow:1; // transmit desc low thresh
-            uint8_t srpd:1;   // small receive packet detected
-            uint16_t rsvd4:15; // reserved
-        } ;
-    } imc;
-
-    union { // 0x0100 RCTL
-        uint32_t reg;
-        struct {
-            uint8_t rst:1;   // Reset
-            uint8_t en:1;    // Enable
-            uint8_t sbp:1;   // Store bad packets
-            uint8_t upe:1;   // Unicast Promiscuous enabled
-            uint8_t mpe:1;   // Multicast promiscuous enabled
-            uint8_t lpe:1;   // long packet reception enabled
-            uint8_t lbm:2;   //
-            uint8_t rdmts:2; //
-            uint8_t rsvd:2;  //
-            uint8_t mo:2;    //
-            uint8_t mdr:1;   //
-            uint8_t bam:1;   //
-            uint8_t bsize:2; //
-            uint8_t vpe:1;   //
-            uint8_t cfien:1; //
-            uint8_t cfi:1;   //
-            uint8_t rsvd2:1; //
-            uint8_t dpf:1;   // discard pause frames
-            uint8_t pmcf:1;  // pass mac control  frames
-            uint8_t rsvd3:1; // reserved
-            uint8_t bsex:1;  // buffer size extension
-            uint8_t secrc:1; // strip ethernet crc from incoming packet
-            uint8_t rsvd1:5;  // reserved
-        } ;
-    } rctl;
-
-    union { // 0x0400 TCTL
-        uint32_t reg;
-        struct {
-            uint8_t rst:1;    // Reset
-            uint8_t en:1;     // Enable
-            uint8_t bce:1;    // busy check enable
-            uint8_t psp:1;    // pad short packets
-            uint8_t ct:8;     // collision threshold
-            uint16_t cold:10; // collision distance
-            uint8_t swxoff:1; // software xoff transmission
-            uint8_t pbe:1;    // packet burst enable
-            uint8_t rtlc:1;   // retransmit late collisions
-            uint8_t nrtu:1;   // on underrun no TX
-            uint8_t mulr:1;   // multiple request
-            uint8_t rsvd:5;   // reserved
-        } ;
-    } tctl;
-
-    union { // 0x5820 MANC
-        uint32_t reg;
-        struct {
-            uint8_t smbus:1;    // SMBus enabled #####
-            uint8_t asf:1;      // ASF enabled #####
-            uint8_t ronforce:1; // reset of force
-            uint8_t rsvd:5;     // reserved
-            uint8_t rmcp1:1;    // rcmp1 filtering
-            uint8_t rmcp2:1;    // rcmp2 filtering
-            uint8_t ipv4:1;     // enable ipv4
-            uint8_t ipv6:1;     // enable ipv6
-            uint8_t snap:1;     // accept snap
-            uint8_t arp:1;      // filter arp #####
-            uint8_t neighbor:1; // neighbor discovery
-            uint8_t arp_resp:1; // arp response
-            uint8_t tcorst:1;   // tco reset happened
-            uint8_t rcvtco:1;   // receive tco enabled ######
-            uint8_t blkphyrst:1;// block phy resets ########
-            uint8_t rcvall:1;   // receive all
-            uint8_t macaddrfltr:1; // mac address filtering ######
-            uint8_t mng2host:1; // mng2 host packets #######
-            uint8_t ipaddrfltr:1; // ip address filtering
-            uint8_t xsumfilter:1; // checksum filtering
-            uint8_t brfilter:1; // broadcast filtering
-            uint8_t smbreq:1;   // smb request
-            uint8_t smbgnt:1;   // smb grant
-            uint8_t smbclkin:1; // smbclkin
-            uint8_t smbdatain:1; // smbdatain
-            uint8_t smbdataout:1; // smb data out
-            uint8_t smbclkout:1; // smb clock out
-            uint8_t rsvd2:2;
-        };
-    } manc;
+    template<class T>
+    struct Reg {
+        T _data;
+        T operator()() { return _data; }
+        const Reg<T> &operator=(T d) { _data = d; return *this;}
+        bool operator==(T d) { return d == _data; }
+        void operator()(T d) { _data = d; }
+        Reg() { _data = 0; }
+    };
+
+    struct CTRL : public Reg<uint32_t> { // 0x0000 CTRL Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(fd,0,1);       // full duplex
+        ADD_FIELD32(bem,1,1);      // big endian mode
+        ADD_FIELD32(pcipr,2,1);    // PCI priority
+        ADD_FIELD32(lrst,3,1);     // link reset
+        ADD_FIELD32(tme,4,1);      // test mode enable
+        ADD_FIELD32(asde,5,1);     // Auto-speed detection
+        ADD_FIELD32(slu,6,1);      // Set link up
+        ADD_FIELD32(ilos,7,1);     // invert los-of-signal
+        ADD_FIELD32(speed,8,2);    // speed selection bits
+        ADD_FIELD32(be32,10,1);    // big endian mode 32
+        ADD_FIELD32(frcspd,11,1);  // force speed
+        ADD_FIELD32(frcdpx,12,1);  // force duplex
+        ADD_FIELD32(duden,13,1);   // dock/undock enable
+        ADD_FIELD32(dudpol,14,1);  // dock/undock polarity
+        ADD_FIELD32(fphyrst,15,1); // force phy reset
+        ADD_FIELD32(extlen,16,1);  // external link status enable
+        ADD_FIELD32(rsvd,17,1);    // reserved
+        ADD_FIELD32(sdp0d,18,1);   // software controlled pin data
+        ADD_FIELD32(sdp1d,19,1);   // software controlled pin data
+        ADD_FIELD32(sdp2d,20,1);   // software controlled pin data
+        ADD_FIELD32(sdp3d,21,1);   // software controlled pin data
+        ADD_FIELD32(sdp0i,22,1);   // software controlled pin dir
+        ADD_FIELD32(sdp1i,23,1);   // software controlled pin dir
+        ADD_FIELD32(sdp2i,24,1);   // software controlled pin dir
+        ADD_FIELD32(sdp3i,25,1);   // software controlled pin dir
+        ADD_FIELD32(rst,26,1);     // reset
+        ADD_FIELD32(rfce,27,1);    // receive flow control enable
+        ADD_FIELD32(tfce,28,1);    // transmit flow control enable
+        ADD_FIELD32(rte,29,1);     // routing tag enable
+        ADD_FIELD32(vme,30,1);     // vlan enable
+        ADD_FIELD32(phyrst,31,1);  // phy reset
+    };
+    CTRL ctrl;
+
+    struct STATUS : public Reg<uint32_t> { // 0x0008 STATUS Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(fd,0,1);       // full duplex
+        ADD_FIELD32(lu,1,1);       // link up
+        ADD_FIELD32(func,2,2);     // function id
+        ADD_FIELD32(txoff,4,1);    // transmission paused
+        ADD_FIELD32(tbimode,5,1);  // tbi mode
+        ADD_FIELD32(speed,6,2);    // link speed
+        ADD_FIELD32(asdv,8,2);     // auto speed detection value
+        ADD_FIELD32(mtxckok,10,1); // mtx clock running ok
+        ADD_FIELD32(pci66,11,1);   // In 66Mhz pci slot
+        ADD_FIELD32(bus64,12,1);   // in 64 bit slot
+        ADD_FIELD32(pcix,13,1);    // Pci mode
+        ADD_FIELD32(pcixspd,14,2); // pci x speed
+    };
+    STATUS sts;
+
+    struct EECD : public Reg<uint32_t> { // 0x0010 EECD Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(sk,0,1);       // clack input to the eeprom
+        ADD_FIELD32(cs,1,1);       // chip select to eeprom
+        ADD_FIELD32(din,2,1);      // data input to eeprom
+        ADD_FIELD32(dout,3,1);     // data output bit
+        ADD_FIELD32(fwe,4,2);      // flash write enable
+        ADD_FIELD32(ee_req,6,1);   // request eeprom access
+        ADD_FIELD32(ee_gnt,7,1);   // grant eeprom access
+        ADD_FIELD32(ee_pres,8,1);  // eeprom present
+        ADD_FIELD32(ee_size,9,1);  // eeprom size
+        ADD_FIELD32(ee_sz1,10,1);  // eeprom size
+        ADD_FIELD32(rsvd,11,2);    // reserved
+        ADD_FIELD32(ee_type,13,1); // type of eeprom
+    } ;
+    EECD eecd;
+
+    struct EERD : public Reg<uint32_t> { // 0x0014 EERD Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(start,0,1);  // start read
+        ADD_FIELD32(done,4,1);   // done read
+        ADD_FIELD32(addr,8,8);   // address
+        ADD_FIELD32(data,16,16); // data
+    };
+    EERD eerd;
+
+    struct CTRL_EXT : public Reg<uint32_t> { // 0x0018 CTRL_EXT Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(gpi_en,0,4);      // enable interrupts from gpio
+        ADD_FIELD32(phyint,5,1);      // reads the phy internal int status
+        ADD_FIELD32(sdp2_data,6,1);   // data from gpio sdp
+        ADD_FIELD32(spd3_data,7,1);   // data frmo gpio sdp
+        ADD_FIELD32(spd2_iodir,10,1); // direction of sdp2
+        ADD_FIELD32(spd3_iodir,11,1); // direction of sdp2
+        ADD_FIELD32(asdchk,12,1);     // initiate auto-speed-detection
+        ADD_FIELD32(eerst,13,1);      // reset the eeprom
+        ADD_FIELD32(spd_byps,15,1);   // bypass speed select
+        ADD_FIELD32(ro_dis,17,1);     // disable relaxed memory ordering
+        ADD_FIELD32(vreg,21,1);       // power down the voltage regulator
+        ADD_FIELD32(link_mode,22,2);  // interface to talk to the link
+        ADD_FIELD32(iame, 27,1);      // interrupt acknowledge auto-mask ??
+        ADD_FIELD32(drv_loaded, 28,1);// driver is loaded and incharge of device
+        ADD_FIELD32(timer_clr, 29,1); // clear interrupt timers after IMS clear ??
+    };
+    CTRL_EXT ctrl_ext;
+
+    struct MDIC : public Reg<uint32_t> { // 0x0020 MDIC Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(data,0,16);   // data
+        ADD_FIELD32(regadd,16,5); // register address
+        ADD_FIELD32(phyadd,21,5); // phy addresses
+        ADD_FIELD32(op,26,2);     // opcode
+        ADD_FIELD32(r,28,1);      // ready
+        ADD_FIELD32(i,29,1);      // interrupt
+        ADD_FIELD32(e,30,1);      // error
+    };
+    MDIC mdic;
+
+    struct ICR : public Reg<uint32_t> { // 0x00C0 ICR Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(txdw,0,1)   // tx descr witten back
+        ADD_FIELD32(txqe,1,1)   // tx queue empty
+        ADD_FIELD32(lsc,2,1)    // link status change
+        ADD_FIELD32(rxseq,3,1)  // rcv sequence error
+        ADD_FIELD32(rxdmt0,4,1) // rcv descriptor min thresh
+        ADD_FIELD32(rsvd1,5,1)  // reserved
+        ADD_FIELD32(rxo,6,1)    // receive overrunn
+        ADD_FIELD32(rxt0,7,1)   // receiver timer interrupt
+        ADD_FIELD32(mdac,9,1)   // mdi/o access complete
+        ADD_FIELD32(rxcfg,10,1)  // recv /c/ ordered sets
+        ADD_FIELD32(phyint,12,1) // phy interrupt
+        ADD_FIELD32(gpi1,13,1)   // gpi int 1
+        ADD_FIELD32(gpi2,14,1)   // gpi int 2
+        ADD_FIELD32(txdlow,15,1) // transmit desc low thresh
+        ADD_FIELD32(srpd,16,1)   // small receive packet detected
+        ADD_FIELD32(ack,17,1);    // receive ack frame
+        ADD_FIELD32(int_assert, 31,1); // interrupt caused a system interrupt
+    };
+    ICR icr;
+
+    uint32_t imr; // register that contains the current interrupt mask
+
+    struct ITR : public Reg<uint32_t> { // 0x00C4 ITR Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(interval, 0,16); // minimum inter-interrutp inteval
+                                     // specified in 256ns interrupts
+    };
+    ITR itr;
+
+    // When CTRL_EXT.IAME and the ICR.INT_ASSERT is 1 an ICR read or write
+    // causes the IAM register contents to be written into the IMC
+    // automatically clearing all interrupts that have a bit in the IAM set
+    uint32_t iam;
+
+    struct RCTL : public Reg<uint32_t> { // 0x0100 RCTL Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(rst,0,1);   // Reset
+        ADD_FIELD32(en,1,1);    // Enable
+        ADD_FIELD32(sbp,2,1);   // Store bad packets
+        ADD_FIELD32(upe,3,1);   // Unicast Promiscuous enabled
+        ADD_FIELD32(mpe,4,1);   // Multicast promiscuous enabled
+        ADD_FIELD32(lpe,5,1);   // long packet reception enabled
+        ADD_FIELD32(lbm,6,2);   //
+        ADD_FIELD32(rdmts,8,2); //
+        ADD_FIELD32(mo,12,2);    //
+        ADD_FIELD32(mdr,14,1);   //
+        ADD_FIELD32(bam,15,1);   //
+        ADD_FIELD32(bsize,16,2); //
+        ADD_FIELD32(vfe,18,1);   //
+        ADD_FIELD32(cfien,19,1); //
+        ADD_FIELD32(cfi,20,1);   //
+        ADD_FIELD32(dpf,22,1);   // discard pause frames
+        ADD_FIELD32(pmcf,23,1);  // pass mac control  frames
+        ADD_FIELD32(bsex,25,1);  // buffer size extension
+        ADD_FIELD32(secrc,26,1); // strip ethernet crc from incoming packet
+        int descSize()
+        {
+            switch(bsize()) {
+                case 0: return bsex() == 0 ? 2048 : -1;
+                case 1: return bsex() == 0 ? 1024 : 16384;
+                case 2: return bsex() == 0 ? 512 : 8192;
+                case 3: return bsex() == 0 ? 256 : 4096;
+                default:
+                        return -1;
+            }
+        }
+    };
+    RCTL rctl;
+
+    struct FCTTV : public Reg<uint32_t> { // 0x0170 FCTTV
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(ttv,0,16);    // Transmit Timer Value
+    };
+    FCTTV fcttv;
+
+    struct TCTL : public Reg<uint32_t> { // 0x0400 TCTL Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(rst,0,1);    // Reset
+        ADD_FIELD32(en,1,1);     // Enable
+        ADD_FIELD32(bce,2,1);    // busy check enable
+        ADD_FIELD32(psp,3,1);    // pad short packets
+        ADD_FIELD32(ct,4,8);     // collision threshold
+        ADD_FIELD32(cold,12,10); // collision distance
+        ADD_FIELD32(swxoff,22,1); // software xoff transmission
+        ADD_FIELD32(pbe,23,1);    // packet burst enable
+        ADD_FIELD32(rtlc,24,1);   // retransmit late collisions
+        ADD_FIELD32(nrtu,25,1);   // on underrun no TX
+        ADD_FIELD32(mulr,26,1);   // multiple request
+    };
+    TCTL tctl;
+
+    struct PBA : public Reg<uint32_t> { // 0x1000 PBA Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(rxa,0,16);
+        ADD_FIELD32(txa,16,16);
+    };
+    PBA pba;
+
+    struct FCRTL : public Reg<uint32_t> { // 0x2160 FCRTL Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(rtl,3,28); // make this bigger than the spec so we can have
+                               // a larger buffer
+        ADD_FIELD32(xone, 31,1);
+    };
+    FCRTL fcrtl;
+
+    struct FCRTH : public Reg<uint32_t> { // 0x2168 FCRTL Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(rth,3,13); // make this bigger than the spec so we can have
+                               //a larger buffer
+        ADD_FIELD32(xfce, 31,1);
+    };
+    FCRTH fcrth;
+
+    struct RDBA : public Reg<uint64_t> { // 0x2800 RDBA Register
+        using Reg<uint64_t>::operator=;
+        ADD_FIELD64(rdbal,0,32); // base address of rx descriptor ring
+        ADD_FIELD64(rdbah,32,32); // base address of rx descriptor ring
+    };
+    RDBA rdba;
+
+    struct RDLEN : public Reg<uint32_t> { // 0x2808 RDLEN Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(len,7,13); // number of bytes in the descriptor buffer
+    };
+    RDLEN rdlen;
+
+    struct RDH : public Reg<uint32_t> { // 0x2810 RDH Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(rdh,0,16); // head of the descriptor ring
+    };
+    RDH rdh;
+
+    struct RDT : public Reg<uint32_t> { // 0x2818 RDT Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(rdt,0,16); // tail of the descriptor ring
+    };
+    RDT rdt;
+
+    struct RDTR : public Reg<uint32_t> { // 0x2820 RDTR Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(delay,0,16); // receive delay timer
+        ADD_FIELD32(fpd, 31,1);   // flush partial descriptor block ??
+    };
+    RDTR rdtr;
+
+    struct RXDCTL : public Reg<uint32_t> { // 0x2828 RXDCTL Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(pthresh,0,6);   // prefetch threshold, less that this
+                                    // consider prefetch
+        ADD_FIELD32(hthresh,8,6);   // number of descriptors in host mem to
+                                    // consider prefetch
+        ADD_FIELD32(wthresh,16,6);  // writeback threshold
+        ADD_FIELD32(gran,24,1);     // granularity 0 = desc, 1 = cacheline
+    };
+    RXDCTL rxdctl;
+
+    struct RADV : public Reg<uint32_t> { // 0x282C RADV Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(idv,0,16); // absolute interrupt delay
+    };
+    RADV radv;
+
+    struct RSRPD : public Reg<uint32_t> { // 0x2C00 RSRPD Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(idv,0,12); // size to interrutp on small packets
+    };
+    RSRPD rsrpd;
+
+    struct TDBA : public Reg<uint64_t> { // 0x3800 TDBAL Register
+        using Reg<uint64_t>::operator=;
+        ADD_FIELD64(tdbal,0,32); // base address of transmit descriptor ring
+        ADD_FIELD64(tdbah,32,32); // base address of transmit descriptor ring
+    };
+    TDBA tdba;
+
+    struct TDLEN : public Reg<uint32_t> { // 0x3808 TDLEN Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(len,7,13); // number of bytes in the descriptor buffer
+    };
+    TDLEN tdlen;
+
+    struct TDH : public Reg<uint32_t> { // 0x3810 TDH Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(tdh,0,16); // head of the descriptor ring
+    };
+    TDH tdh;
+
+    struct TDT : public Reg<uint32_t> { // 0x3818 TDT Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(tdt,0,16); // tail of the descriptor ring
+    };
+    TDT tdt;
+
+    struct TIDV : public Reg<uint32_t> { // 0x3820 TIDV Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(idv,0,16); // interrupt delay
+    };
+    TIDV tidv;
+
+    struct TXDCTL : public Reg<uint32_t> { // 0x3828 TXDCTL Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(pthresh, 0,6);  // if number of descriptors control has is
+                                    // below this number, a prefetch is considered
+        ADD_FIELD32(hthresh,8,8);   // number of valid descriptors is host memory
+                                    // before a prefetch is considered
+        ADD_FIELD32(wthresh,16,6);  // number of descriptors to keep until
+                                    // writeback is considered
+        ADD_FIELD32(gran, 24,1);    // granulatiry of above values (0 = cacheline,
+                                    // 1 == desscriptor)
+        ADD_FIELD32(lwthresh,25,7); // xmit descriptor low thresh, interrupt
+                                    // below this level
+    };
+    TXDCTL txdctl;
+
+    struct TADV : public Reg<uint32_t> { // 0x382C TADV Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(idv,0,16); // absolute interrupt delay
+    };
+    TADV tadv;
+
+    struct RXCSUM : public Reg<uint32_t> { // 0x5000 RXCSUM Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(pcss,0,8);
+        ADD_FIELD32(ipofld,8,1);
+        ADD_FIELD32(tuofld,9,1);
+    };
+    RXCSUM rxcsum;
+
+    struct MANC : public Reg<uint32_t> { // 0x5820 MANC Register
+        using Reg<uint32_t>::operator=;
+        ADD_FIELD32(smbus,0,1);    // SMBus enabled #####
+        ADD_FIELD32(asf,1,1);      // ASF enabled #####
+        ADD_FIELD32(ronforce,2,1); // reset of force
+        ADD_FIELD32(rsvd,3,5);     // reserved
+        ADD_FIELD32(rmcp1,8,1);    // rcmp1 filtering
+        ADD_FIELD32(rmcp2,9,1);    // rcmp2 filtering
+        ADD_FIELD32(ipv4,10,1);     // enable ipv4
+        ADD_FIELD32(ipv6,11,1);     // enable ipv6
+        ADD_FIELD32(snap,12,1);     // accept snap
+        ADD_FIELD32(arp,13,1);      // filter arp #####
+        ADD_FIELD32(neighbor,14,1); // neighbor discovery
+        ADD_FIELD32(arp_resp,15,1); // arp response
+        ADD_FIELD32(tcorst,16,1);   // tco reset happened
+        ADD_FIELD32(rcvtco,17,1);   // receive tco enabled ######
+        ADD_FIELD32(blkphyrst,18,1);// block phy resets ########
+        ADD_FIELD32(rcvall,19,1);   // receive all
+        ADD_FIELD32(macaddrfltr,20,1); // mac address filtering ######
+        ADD_FIELD32(mng2host,21,1); // mng2 host packets #######
+        ADD_FIELD32(ipaddrfltr,22,1); // ip address filtering
+        ADD_FIELD32(xsumfilter,23,1); // checksum filtering
+        ADD_FIELD32(brfilter,24,1); // broadcast filtering
+        ADD_FIELD32(smbreq,25,1);   // smb request
+        ADD_FIELD32(smbgnt,26,1);   // smb grant
+        ADD_FIELD32(smbclkin,27,1); // smbclkin
+        ADD_FIELD32(smbdatain,28,1); // smbdatain
+        ADD_FIELD32(smbdataout,29,1); // smb data out
+        ADD_FIELD32(smbclkout,30,1); // smb clock out
+    };
+    MANC manc;
 };
 
 }; // iGbReg namespace
diff --git a/src/dev/io_device.hh b/src/dev/io_device.hh
index 902cde909..cd7a5296a 100644
--- a/src/dev/io_device.hh
+++ b/src/dev/io_device.hh
@@ -132,6 +132,7 @@ class DmaPort : public Port
 
     bool dmaPending() { return pendingCount > 0; }
 
+    int cacheBlockSize() { return peerBlockSize(); }
     unsigned int drain(Event *de);
 };
 
@@ -261,13 +262,17 @@ class DmaDevice : public PioDevice
                            addr, size, event, data);
     }
 
-    void dmaRead(Addr addr, int size, Event *event, uint8_t *data = NULL)
-    { dmaPort->dmaAction(MemCmd::ReadReq, addr, size, event, data); }
+    void dmaRead(Addr addr, int size, Event *event, uint8_t *data)
+    {
+        dmaPort->dmaAction(MemCmd::ReadReq, addr, size, event, data);
+    }
 
     bool dmaPending() { return dmaPort->dmaPending(); }
 
     virtual unsigned int drain(Event *de);
 
+    int cacheBlockSize() { return dmaPort->cacheBlockSize(); }
+
     virtual Port *getPort(const std::string &if_name, int idx = -1)
     {
         if (if_name == "pio") {
diff --git a/src/dev/sparc/iob.cc b/src/dev/sparc/iob.cc
index 6bd40b631..e686e51f7 100644
--- a/src/dev/sparc/iob.cc
+++ b/src/dev/sparc/iob.cc
@@ -192,6 +192,8 @@ Iob::writeIob(PacketPtr pkt)
             data = pkt->get<uint64_t>();
             intMan[index].cpu = bits(data,12,8);
             intMan[index].vector = bits(data,5,0);
+            DPRINTF(Iob, "Wrote IntMan %d cpu %d, vec %d\n", index,
+                    intMan[index].cpu, intMan[index].vector);
             return;
         }
 
@@ -201,11 +203,14 @@ Iob::writeIob(PacketPtr pkt)
             intCtl[index].mask = bits(data,2,2);
             if (bits(data,1,1))
                 intCtl[index].pend = false;
+            DPRINTF(Iob, "Wrote IntCtl %d pend %d cleared %d\n", index,
+                    intCtl[index].pend, bits(data,2,2));
             return;
         }
 
         if (accessAddr == JIntVecAddr) {
             jIntVec = bits(pkt->get<uint64_t>(), 5,0);
+            DPRINTF(Iob, "Wrote jIntVec %d\n", jIntVec);
             return;
         }
 
@@ -237,11 +242,15 @@ Iob::writeJBus(PacketPtr pkt)
             index = (accessAddr - JIntBusyAddr) >> 3;
             data = pkt->get<uint64_t>();
             jIntBusy[index].busy = bits(data,5,5);
+            DPRINTF(Iob, "Wrote jIntBusy index %d busy: %d\n", index,
+                    jIntBusy[index].busy);
             return;
         }
         if (accessAddr == JIntABusyAddr) {
             data = pkt->get<uint64_t>();
             jIntBusy[cpuid].busy = bits(data,5,5);
+            DPRINTF(Iob, "Wrote jIntBusy index %d busy: %d\n", cpuid,
+                    jIntBusy[cpuid].busy);
             return;
         };
 
@@ -256,6 +265,8 @@ Iob::receiveDeviceInterrupt(DeviceId devid)
         return;
     intCtl[devid].mask = true;
     intCtl[devid].pend = true;
+    DPRINTF(Iob, "Receiving Device interrupt: %d for cpu %d vec %d\n",
+            devid, intMan[devid].cpu, intMan[devid].vector);
     ic->post(intMan[devid].cpu, SparcISA::IT_INT_VEC, intMan[devid].vector);
 }
 
@@ -269,6 +280,8 @@ Iob::generateIpi(Type type, int cpu_id, int vector)
 
     switch (type) {
       case 0: // interrupt
+        DPRINTF(Iob, "Generating interrupt because of I/O write to cpu: %d vec %d\n",
+                cpu_id, vector);
         ic->post(cpu_id, SparcISA::IT_INT_VEC, vector);
         break;
       case 1: // reset
@@ -279,9 +292,11 @@ Iob::generateIpi(Type type, int cpu_id, int vector)
         sys->threadContexts[cpu_id]->activate();
         break;
       case 2: // idle -- this means stop executing and don't wake on interrupts
+        DPRINTF(Iob, "Idling CPU because of I/O write cpu: %d\n", cpu_id);
         sys->threadContexts[cpu_id]->halt();
         break;
       case 3: // resume
+        DPRINTF(Iob, "Resuming CPU because of I/O write cpu: %d\n", cpu_id);
         sys->threadContexts[cpu_id]->activate();
         break;
       default:
@@ -297,6 +312,9 @@ Iob::receiveJBusInterrupt(int cpu_id, int source, uint64_t d0, uint64_t d1)
     if (jIntBusy[cpu_id].busy)
         return false;
 
+    DPRINTF(Iob, "Receiving jBus interrupt: %d for cpu %d vec %d\n",
+            source, cpu_id, jIntVec);
+
     jIntBusy[cpu_id].busy = true;
     jIntBusy[cpu_id].source = source;
     jBusData0[cpu_id] = d0;
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 4988df3c5..6e6ba2380 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -171,8 +171,12 @@ Bus::recvTiming(PacketPtr pkt)
     }
 
     short dest = pkt->getDest();
+
+    // Make sure to clear the snoop commit flag so it doesn't think an
+    // access has been handled twice.
     if (dest == Packet::Broadcast) {
         port = findPort(pkt->getAddr(), pkt->getSrc());
+        pkt->flags &= ~SNOOP_COMMIT;
         if (timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()])) {
             bool success;
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 9368e7648..ca965859e 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -595,8 +595,13 @@ Cache<TagStore,Coherence>::access(PacketPtr &pkt)
         //We are determining prefetches on access stream, call prefetcher
         prefetcher->handleMiss(pkt, curTick);
     }
+
+    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+
     if (!pkt->req->isUncacheable()) {
-        blk = handleAccess(pkt, lat, writebacks);
+        if (!missQueue->findMSHR(blk_addr)) {
+            blk = handleAccess(pkt, lat, writebacks);
+        }
     } else {
         size = pkt->getSize();
     }
diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc
index 25b8fcbeb..24ca9cfa2 100644
--- a/src/mem/cache/miss/miss_queue.cc
+++ b/src/mem/cache/miss/miss_queue.cc
@@ -599,6 +599,7 @@ MissQueue::handleResponse(PacketPtr &pkt, Tick time)
             MemCmd cmd = mshr->getTarget()->cmd;
             mshr->pkt->setDest(Packet::Broadcast);
             mshr->pkt->result = Packet::Unknown;
+            mshr->pkt->req = mshr->getTarget()->req;
             mq.markPending(mshr, cmd);
             mshr->order = order++;
             cache->setMasterRequest(Request_MSHR, time);
diff --git a/src/python/m5/objects/Ethernet.py b/src/python/m5/objects/Ethernet.py
index a52e35511..bfe30950c 100644
--- a/src/python/m5/objects/Ethernet.py
+++ b/src/python/m5/objects/Ethernet.py
@@ -67,7 +67,14 @@ if build_env['ALPHA_TLASER']:
 
 class IGbE(PciDevice):
     type = 'IGbE'
-    hardware_address = Param.EthernetAddr(NextEthernetAddr, "Ethernet Hardware Address")
+    hardware_address = Param.String("Ethernet Hardware Address")
+    use_flow_control = Param.Bool(False, "Should we use xon/xoff flow contorl (UNIMPLMENTD)")
+    rx_fifo_size = Param.MemorySize('384kB', "Size of the rx FIFO")
+    tx_fifo_size = Param.MemorySize('384kB', "Size of the tx FIFO")
+    rx_desc_cache_size = Param.Int(64, "Number of enteries in the rx descriptor cache")
+    tx_desc_cache_size = Param.Int(64, "Number of enteries in the rx descriptor cache")
+    clock = Param.Clock('500MHz', "Clock speed of the device")
+
 
 class IGbEPciData(PciConfigData):
     VendorID = 0x8086