21 files changed, 971 insertions, 91 deletions
diff --git a/src/arch/x86/floatregfile.cc b/src/arch/x86/floatregfile.cc
index 93f2d4fe1..1c49ea9c6 100644
--- a/src/arch/x86/floatregfile.cc
+++ b/src/arch/x86/floatregfile.cc
@@ -86,6 +86,7 @@
  */
 
 #include "arch/x86/floatregfile.hh"
+#include "base/trace.hh"
 #include "sim/serialize.hh"
 
 #include <string.h>
@@ -111,22 +112,28 @@ void FloatRegFile::clear()
 
 FloatReg FloatRegFile::readReg(int floatReg, int width)
 {
-    return d[floatReg];
+    FloatReg reg = d[floatReg];
+    DPRINTF(X86, "Reading %f from register %d.\n", reg, floatReg);
+    return reg;
 }
 
 FloatRegBits FloatRegFile::readRegBits(int floatReg, int width)
 {
-    return q[floatReg];
+    FloatRegBits reg = q[floatReg];
+    DPRINTF(X86, "Reading %#x from register %d.\n", reg, floatReg);
+    return reg;
 }
 
 Fault FloatRegFile::setReg(int floatReg, const FloatReg &val, int width)
 {
+    DPRINTF(X86, "Writing %f to register %d.\n", val, floatReg);
     d[floatReg] = val;
     return NoFault;
 }
 
 Fault FloatRegFile::setRegBits(int floatReg, const FloatRegBits &val, int width)
 {
+    DPRINTF(X86, "Writing bits %#x to register %d.\n", val, floatReg);
     q[floatReg] = val;
     return NoFault;
 }
diff --git a/src/arch/x86/floatregfile.hh b/src/arch/x86/floatregfile.hh
index 282cac796..14dda443f 100644
--- a/src/arch/x86/floatregfile.hh
+++ b/src/arch/x86/floatregfile.hh
@@ -101,7 +101,7 @@ namespace X86ISA
     std::string getFloatRegName(RegIndex);
 
     //Each 128 bit xmm register is broken into two effective 64 bit registers.
-    const int NumFloatArchRegs = NumMMXRegs + 2 * NumXMMRegs;
+    const int NumFloatArchRegs = NumMMXRegs + 2 * NumXMMRegs + NumMicroFpRegs;
     const int NumFloatRegs = NumFloatArchRegs;
 
     class FloatRegFile
diff --git a/src/arch/x86/floatregs.hh b/src/arch/x86/floatregs.hh
new file mode 100644
index 000000000..b9d6a5c43
--- /dev/null
+++ b/src/arch/x86/floatregs.hh
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2007 The Hewlett-Packard Development Company
+ * All rights reserved.
+ *
+ * Redistribution and use of this software in source and binary forms,
+ * with or without modification, are permitted provided that the
+ * following conditions are met:
+ *
+ * The software must be used only for Non-Commercial Use which means any
+ * use which is NOT directed to receiving any direct monetary
+ * compensation for, or commercial advantage from such use.  Illustrative
+ * examples of non-commercial use are academic research, personal study,
+ * teaching, education and corporate research & development.
+ * Illustrative examples of commercial use are distributing products for
+ * commercial advantage and providing services using the software for
+ * commercial advantage.
+ *
+ * If you wish to use this software or functionality therein that may be
+ * covered by patents for commercial use, please contact:
+ *     Director of Intellectual Property Licensing
+ *     Office of Strategy and Technology
+ *     Hewlett-Packard Company
+ *     1501 Page Mill Road
+ *     Palo Alto, California  94304
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.  Redistributions
+ * in binary form must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.  Neither the name of
+ * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.  No right of
+ * sublicense is granted herewith.  Derivatives of the software and
+ * output created using the software may be prepared, but only for
+ * Non-Commercial Uses.  Derivatives of the software may be shared with
+ * others provided: (i) the others agree to abide by the list of
+ * conditions herein which includes the Non-Commercial Use restrictions;
+ * and (ii) such Derivatives of the software include the above copyright
+ * notice to acknowledge the contribution from this software where
+ * applicable, this list of conditions and the disclaimer below.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_X86_FLOATREGS_HH__
+#define __ARCH_X86_FLOATREGS_HH__
+
+#include "arch/x86/x86_traits.hh"
+#include "base/bitunion.hh"
+
+namespace X86ISA
+{
+    enum FloatRegIndex
+    {
+        // MMX/X87 registers
+        FLOATREG_MMX_BASE,
+        FLOATREG_FPR_BASE = FLOATREG_MMX_BASE,
+        FLOATREG_MMX0 = FLOATREG_MMX_BASE,
+        FLOATREG_MMX1,
+        FLOATREG_MMX2,
+        FLOATREG_MMX3,
+        FLOATREG_MMX4,
+        FLOATREG_MMX5,
+        FLOATREG_MMX6,
+        FLOATREG_MMX7,
+
+        FLOATREG_FPR0 = FLOATREG_FPR_BASE,
+        FLOATREG_FPR1,
+        FLOATREG_FPR2,
+        FLOATREG_FPR3,
+        FLOATREG_FPR4,
+        FLOATREG_FPR5,
+        FLOATREG_FPR6,
+        FLOATREG_FPR7,
+
+        FLOATREG_XMM_BASE = FLOATREG_MMX_BASE + NumMMXRegs,
+        FLOATREG_XMM0_LOW = FLOATREG_XMM_BASE,
+        FLOATREG_XMM0_HIGH,
+        FLOATREG_XMM1_LOW,
+        FLOATREG_XMM1_HIGH,
+        FLOATREG_XMM2_LOW,
+        FLOATREG_XMM2_HIGH,
+        FLOATREG_XMM3_LOW,
+        FLOATREG_XMM3_HIGH,
+        FLOATREG_XMM4_LOW,
+        FLOATREG_XMM4_HIGH,
+        FLOATREG_XMM5_LOW,
+        FLOATREG_XMM5_HIGH,
+        FLOATREG_XMM6_LOW,
+        FLOATREG_XMM6_HIGH,
+        FLOATREG_XMM7_LOW,
+        FLOATREG_XMM7_HIGH,
+        FLOATREG_XMM8_LOW,
+        FLOATREG_XMM8_HIGH,
+        FLOATREG_XMM9_LOW,
+        FLOATREG_XMM9_HIGH,
+        FLOATREG_XMM10_LOW,
+        FLOATREG_XMM10_HIGH,
+        FLOATREG_XMM11_LOW,
+        FLOATREG_XMM11_HIGH,
+        FLOATREG_XMM12_LOW,
+        FLOATREG_XMM12_HIGH,
+        FLOATREG_XMM13_LOW,
+        FLOATREG_XMM13_HIGH,
+        FLOATREG_XMM14_LOW,
+        FLOATREG_XMM14_HIGH,
+        FLOATREG_XMM15_LOW,
+        FLOATREG_XMM15_HIGH,
+
+        FLOATREG_MICROFP_BASE = FLOATREG_XMM_BASE + 2 * NumXMMRegs,
+        FLOATREG_MICROFP0 = FLOATREG_MICROFP_BASE,
+        FLOATREG_MICROFP1,
+        FLOATREG_MICROFP2,
+        FLOATREG_MICROFP3,
+        FLOATREG_MICROFP4,
+        FLOATREG_MICROFP5,
+        FLOATREG_MICROFP6,
+        FLOATREG_MICROFP7,
+
+        NUM_FLOATREGS = FLOATREG_MICROFP_BASE + NumMicroFpRegs
+    };
+
+    static inline FloatRegIndex
+    FLOATREG_MMX(int index)
+    {
+        return (FloatRegIndex)(FLOATREG_MMX_BASE + index);
+    }
+
+    static inline FloatRegIndex
+    FLOATREG_FPR(int index)
+    {
+        return (FloatRegIndex)(FLOATREG_FPR_BASE + index);
+    }
+
+    static inline FloatRegIndex
+    FLOATREG_XMM_LOW(int index)
+    {
+        return (FloatRegIndex)(FLOATREG_XMM_BASE + 2 * index);
+    }
+
+    static inline FloatRegIndex
+    FLOATREG_XMM_HIGH(int index)
+    {
+        return (FloatRegIndex)(FLOATREG_XMM_BASE + 2 * index + 1);
+    }
+
+    static inline FloatRegIndex
+    FLOATREG_MICROFP(int index)
+    {
+        return (FloatRegIndex)(FLOATREG_MICROFP_BASE + index);
+    }
+};
+
+#endif // __ARCH_X86_FLOATREGS_HH__
diff --git a/src/arch/x86/insts/static_inst.cc b/src/arch/x86/insts/static_inst.cc
index 948a74bc1..4f6ec5390 100644
--- a/src/arch/x86/insts/static_inst.cc
+++ b/src/arch/x86/insts/static_inst.cc
@@ -192,7 +192,19 @@ namespace X86ISA
             }
             ccprintf(os, suffix);
         } else if (reg < Ctrl_Base_DepTag) {
-            ccprintf(os, "%%f%d", reg - FP_Base_DepTag);
+            int fpindex = reg - FP_Base_DepTag;
+            if(fpindex < NumMMXRegs) {
+                ccprintf(os, "%%mmx%d", reg - FP_Base_DepTag);
+                return;
+            }
+            fpindex -= NumMMXRegs;
+            if(fpindex < NumXMMRegs) {
+                ccprintf(os, "%%xmm%d_%s", fpindex / 2,
+                        (fpindex % 2) ? "high": "low");
+                return;
+            }
+            fpindex -= NumXMMRegs;
+            ccprintf(os, "%%ufp%d", fpindex);
         } else {
             switch (reg - Ctrl_Base_DepTag) {
               default:
diff --git a/src/arch/x86/isa/decoder/one_byte_opcodes.isa b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
index d8db47063..ecb92947f 100644
--- a/src/arch/x86/isa/decoder/one_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
@@ -504,28 +504,30 @@
                 {{"Tried to execute the rep/repe prefix!"}});
             0x4: hlt();
             0x5: cmc();
-            //0x6: group3_Eb();
-            0x6: decode MODRM_REG {
-                0x0: Inst::TEST(Eb,Iz);
-                0x1: Inst::TEST(Eb,Iz);
-                0x2: Inst::NOT(Eb);
-                0x3: Inst::NEG(Eb);
-                0x4: Inst::MUL_B(Eb);
-                0x5: Inst::IMUL_B(Eb);
-                //This should be Eb, but it access the entire word value ax.
-                0x6: Inst::DIV_B(Ew);
-                0x7: idiv_Eb();
-            }
-            //0x7: group3_Ev();
-            0x7: decode MODRM_REG {
-                0x0: Inst::TEST(Ev,Iz);
-                0x1: Inst::TEST(Ev,Iz);
-                0x2: Inst::NOT(Ev);
-                0x3: Inst::NEG(Ev);
-                0x4: Inst::MUL(Ev);
-                0x5: Inst::IMUL(Ev);
-                0x6: Inst::DIV(Ev);
-                0x7: idiv_Ev();
+            format Inst {
+                //0x6: group3_Eb();
+                0x6: decode MODRM_REG {
+                    0x0: TEST(Eb,Iz);
+                    0x1: TEST(Eb,Iz);
+                    0x2: NOT(Eb);
+                    0x3: NEG(Eb);
+                    0x4: MUL_B(Eb);
+                    0x5: IMUL_B(Eb);
+                    //This should be Eb, but it access the entire word value ax.
+                    0x6: DIV_B(Ew);
+                    0x7: IDIV(Eb);
+                }
+                //0x7: group3_Ev();
+                0x7: decode MODRM_REG {
+                    0x0: TEST(Ev,Iz);
+                    0x1: TEST(Ev,Iz);
+                    0x2: NOT(Ev);
+                    0x3: NEG(Ev);
+                    0x4: MUL(Ev);
+                    0x5: IMUL(Ev);
+                    0x6: DIV(Ev);
+                    0x7: IDIV(Ev);
+                }
             }
         }
         0x1F: decode OPCODE_OP_BOTTOM3 {
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
index 6d5a04e2d..fc5729540 100644
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -201,8 +201,10 @@
                 }
                 // repne (0xF2)
                 0x8: decode OPCODE_OP_BOTTOM3 {
-                    0x2: cvtsi2sd_Vq_Ed();
-                    0x4: cvttsd2si_Gd_Wq();
+                    // The size of the V operand should be q, not dp
+                    0x2: Inst::CVTSI2SD(Vdp,Edp);
+                    // The size of the W operand should be q, not dp
+                    0x4: Inst::CVTTSD2SI(Gdp,Wdp);
                     0x5: cvtsd2si_Gd_Wq();
                     default: Inst::UD2();
                 }
@@ -276,7 +278,9 @@
                     0x4: andpd_Vo_Wo();
                     0x5: andnpd_Vo_Wo();
                     0x6: orpd_Vo_Wo();
-                    0x7: xorpd_Vo_Wo();
+                    //This really should be type o, but it works on q sized
+                    //chunks at a time.
+                    0x7: Inst::XORPD(Vq,Wq);
                     default: Inst::UD2();
                 }
                 // repne (0xF2)
@@ -322,12 +326,12 @@
                 }
                 // repne (0xF2)
                 0x8: decode OPCODE_OP_BOTTOM3 {
-                    0x0: addsd_Vq_Wq();
-                    0x1: mulsd_Vq_Wq();
+                    0x0: Inst::ADDSD(Vq,Wq);
+                    0x1: Inst::MULSD(Vq,Wq);
                     0x2: cvtsd2ss_Vd_Wq();
-                    0x4: subsd_Vq_Wq();
+                    0x4: Inst::SUBSD(Vq,Wq);
                     0x5: minsd_Vq_Wq();
-                    0x6: divsd_Vq_Wq();
+                    0x6: Inst::DIVSD(Vq,Wq);
                     0x7: maxsd_Vq_Wq();
                     default: Inst::UD2();
                 }
diff --git a/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py b/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py
index a865e163b..f498a10e0 100644
--- a/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py
+++ b/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py
@@ -83,8 +83,12 @@ def macroop MUL_B_P
 
 def macroop MUL_R
 {
-    muleh rdx, rax, reg
+    # We need to store the result of the multiplication in a temporary
+    # and then move it later because reg may be rdx. If it is, we can't
+    # clobber its old value yet.
+    muleh t1, rax, reg
     mulel rax, rax, reg
+    mov rdx, rdx, t1
 };
 
 def macroop MUL_M
@@ -130,8 +134,9 @@ def macroop IMUL_B_P
 
 def macroop IMUL_R
 {
-    muleh rdx, rax, reg
+    muleh t1, rax, reg
     mulel rax, rax, reg
+    mov rdx, rdx, t1
 };
 
 def macroop IMUL_M
@@ -225,8 +230,9 @@ def macroop DIV_B_P
 
 def macroop DIV_R
 {
-    divr rdx, rax, reg
+    divr t1, rax, reg
     divq rax, rax, reg
+    mov rdx, rdx, t1
 };
 
 def macroop DIV_M
@@ -243,6 +249,32 @@ def macroop DIV_P
     divr rdx, rax, t1
     divq rax, rax, t1
 };
+
+#
+# Signed division
+#
+
+def macroop IDIV_R
+{
+    divr t1, rax, reg
+    divq rax, rax, reg
+    mov rdx, rdx, t1
+};
+
+def macroop IDIV_M
+{
+    ld t1, seg, sib, disp
+    divr rdx, rax, t1
+    divq rax, rax, t1
+};
+
+def macroop IDIV_P
+{
+    rdip t7
+    ld t1, seg, riprel, disp
+    divr rdx, rax, t1
+    divq rax, rax, t1
+};
 '''
 #let {{
 #    class IDIV(Inst):
diff --git a/src/arch/x86/isa/insts/data_conversion/sign_extension.py b/src/arch/x86/isa/insts/data_conversion/sign_extension.py
index 0bdd4036c..9a7c226af 100644
--- a/src/arch/x86/isa/insts/data_conversion/sign_extension.py
+++ b/src/arch/x86/isa/insts/data_conversion/sign_extension.py
@@ -62,6 +62,6 @@ def macroop CQO_R_R {
     # A shift might be slower than, for example, an explicit sign extension,
     # so it might be worthwhile to try to find an alternative.
     mov regm, regm, reg
-    sra regm, regm, "env.dataSize * 8 - 1"
+    srai regm, regm, "env.dataSize * 8 - 1"
 };
 '''
diff --git a/src/arch/x86/isa/insts/sse/__init__.py b/src/arch/x86/isa/insts/sse/__init__.py
new file mode 100644
index 000000000..976e2dd52
--- /dev/null
+++ b/src/arch/x86/isa/insts/sse/__init__.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2007 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# Redistribution and use of this software in source and binary forms,
+# with or without modification, are permitted provided that the
+# following conditions are met:
+#
+# The software must be used only for Non-Commercial Use which means any
+# use which is NOT directed to receiving any direct monetary
+# compensation for, or commercial advantage from such use.  Illustrative
+# examples of non-commercial use are academic research, personal study,
+# teaching, education and corporate research & development.
+# Illustrative examples of commercial use are distributing products for
+# commercial advantage and providing services using the software for
+# commercial advantage.
+#
+# If you wish to use this software or functionality therein that may be
+# covered by patents for commercial use, please contact:
+#     Director of Intellectual Property Licensing
+#     Office of Strategy and Technology
+#     Hewlett-Packard Company
+#     1501 Page Mill Road
+#     Palo Alto, California  94304
+#
+# Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.  Redistributions
+# in binary form must reproduce the above copyright notice, this list of
+# conditions and the following disclaimer in the documentation and/or
+# other materials provided with the distribution.  Neither the name of
+# the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.  No right of
+# sublicense is granted herewith.  Derivatives of the software and
+# output created using the software may be prepared, but only for
+# Non-Commercial Uses.  Derivatives of the software may be shared with
+# others provided: (i) the others agree to abide by the list of
+# conditions herein which includes the Non-Commercial Use restrictions;
+# and (ii) such Derivatives of the software include the above copyright
+# notice to acknowledge the contribution from this software where
+# applicable, this list of conditions and the disclaimer below.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Gabe Black
+
+categories = ["move",
+              "convert",
+              "add_and_subtract",
+              "multiply_and_divide",
+              "logical"]
+
+microcode = '''
+# SSE instructions
+'''
+for category in categories:
+    exec "import %s as cat" % category
+    microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/sse/add_and_subtract.py b/src/arch/x86/isa/insts/sse/add_and_subtract.py
new file mode 100644
index 000000000..5e8919106
--- /dev/null
+++ b/src/arch/x86/isa/insts/sse/add_and_subtract.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2007 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# Redistribution and use of this software in source and binary forms,
+# with or without modification, are permitted provided that the
+# following conditions are met:
+#
+# The software must be used only for Non-Commercial Use which means any
+# use which is NOT directed to receiving any direct monetary
+# compensation for, or commercial advantage from such use.  Illustrative
+# examples of non-commercial use are academic research, personal study,
+# teaching, education and corporate research & development.
+# Illustrative examples of commercial use are distributing products for
+# commercial advantage and providing services using the software for
+# commercial advantage.
+#
+# If you wish to use this software or functionality therein that may be
+# covered by patents for commercial use, please contact:
+#     Director of Intellectual Property Licensing
+#     Office of Strategy and Technology
+#     Hewlett-Packard Company
+#     1501 Page Mill Road
+#     Palo Alto, California  94304
+#
+# Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.  Redistributions
+# in binary form must reproduce the above copyright notice, this list of
+# conditions and the following disclaimer in the documentation and/or
+# other materials provided with the distribution.  Neither the name of
+# the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.  No right of
+# sublicense is granted herewith.  Derivatives of the software and
+# output created using the software may be prepared, but only for
+# Non-Commercial Uses.  Derivatives of the software may be shared with
+# others provided: (i) the others agree to abide by the list of
+# conditions herein which includes the Non-Commercial Use restrictions;
+# and (ii) such Derivatives of the software include the above copyright
+# notice to acknowledge the contribution from this software where
+# applicable, this list of conditions and the disclaimer below.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Gabe Black
+
+microcode = '''
+def macroop ADDSD_R_R {
+    addfp xmml, xmml, xmmlm
+};
+
+def macroop ADDSD_R_M {
+    ldfp ufp1, seg, sib, disp
+    addfp xmml, xmml, ufp1
+};
+
+def macroop ADDSD_R_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, disp
+    addfp xmml, xmml, ufp1
+};
+
+def macroop SUBSD_R_R {
+    subfp xmml, xmml, xmmlm
+};
+
+def macroop SUBSD_R_M {
+    ldfp ufp1, seg, sib, disp
+    subfp xmml, xmml, ufp1
+};
+
+def macroop SUBSD_R_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, disp
+    subfp xmml, xmml, ufp1
+};
+'''
diff --git a/src/arch/x86/isa/insts/sse/convert.py b/src/arch/x86/isa/insts/sse/convert.py
new file mode 100644
index 000000000..070df84cc
--- /dev/null
+++ b/src/arch/x86/isa/insts/sse/convert.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2007 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# Redistribution and use of this software in source and binary forms,
+# with or without modification, are permitted provided that the
+# following conditions are met:
+#
+# The software must be used only for Non-Commercial Use which means any
+# use which is NOT directed to receiving any direct monetary
+# compensation for, or commercial advantage from such use.  Illustrative
+# examples of non-commercial use are academic research, personal study,
+# teaching, education and corporate research & development.
+# Illustrative examples of commercial use are distributing products for
+# commercial advantage and providing services using the software for
+# commercial advantage.
+#
+# If you wish to use this software or functionality therein that may be
+# covered by patents for commercial use, please contact:
+#     Director of Intellectual Property Licensing
+#     Office of Strategy and Technology
+#     Hewlett-Packard Company
+#     1501 Page Mill Road
+#     Palo Alto, California  94304
+#
+# Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.  Redistributions
+# in binary form must reproduce the above copyright notice, this list of
+# conditions and the following disclaimer in the documentation and/or
+# other materials provided with the distribution.  Neither the name of
+# the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.  No right of
+# sublicense is granted herewith.  Derivatives of the software and
+# output created using the software may be prepared, but only for
+# Non-Commercial Uses.  Derivatives of the software may be shared with
+# others provided: (i) the others agree to abide by the list of
+# conditions herein which includes the Non-Commercial Use restrictions;
+# and (ii) such Derivatives of the software include the above copyright
+# notice to acknowledge the contribution from this software where
+# applicable, this list of conditions and the disclaimer below.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Gabe Black
+
+microcode = '''
+def macroop CVTSI2SD_R_R {
+    cvtf_i2d xmml, regm
+};
+
+def macroop CVTSI2SD_R_M {
+    ld t1, seg, sib, disp
+    cvtf_i2d xmml, t1
+};
+
+def macroop CVTSI2SD_R_P {
+    rdip t7
+    ld t1, seg, riprel, disp
+    cvtf_i2d xmml, t1
+};
+
+def macroop CVTTSD2SI_R_R {
+    cvtf_d2i reg, xmmlm
+};
+
+def macroop CVTTSD2SI_R_M {
+    ldfp ufp1, seg, sib, disp
+    cvtf_d2i reg, ufp1
+};
+
+def macroop CVTTSD2SI_R_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, disp
+    cvtf_d2i reg, ufp1
+};
+'''
diff --git a/src/arch/x86/isa/insts/sse/logical.py b/src/arch/x86/isa/insts/sse/logical.py
new file mode 100644
index 000000000..a2dbec40a
--- /dev/null
+++ b/src/arch/x86/isa/insts/sse/logical.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2007 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# Redistribution and use of this software in source and binary forms,
+# with or without modification, are permitted provided that the
+# following conditions are met:
+#
+# The software must be used only for Non-Commercial Use which means any
+# use which is NOT directed to receiving any direct monetary
+# compensation for, or commercial advantage from such use.  Illustrative
+# examples of non-commercial use are academic research, personal study,
+# teaching, education and corporate research & development.
+# Illustrative examples of commercial use are distributing products for
+# commercial advantage and providing services using the software for
+# commercial advantage.
+#
+# If you wish to use this software or functionality therein that may be
+# covered by patents for commercial use, please contact:
+#     Director of Intellectual Property Licensing
+#     Office of Strategy and Technology
+#     Hewlett-Packard Company
+#     1501 Page Mill Road
+#     Palo Alto, California  94304
+#
+# Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.  Redistributions
+# in binary form must reproduce the above copyright notice, this list of
+# conditions and the following disclaimer in the documentation and/or
+# other materials provided with the distribution.  Neither the name of
+# the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.  No right of
+# sublicense is granted herewith.  Derivatives of the software and
+# output created using the software may be prepared, but only for
+# Non-Commercial Uses.  Derivatives of the software may be shared with
+# others provided: (i) the others agree to abide by the list of
+# conditions herein which includes the Non-Commercial Use restrictions;
+# and (ii) such Derivatives of the software include the above copyright
+# notice to acknowledge the contribution from this software where
+# applicable, this list of conditions and the disclaimer below.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Gabe Black
+
+microcode = '''
+def macroop XORPD_R_R {
+    xorfp xmml, xmml, xmmlm
+    xorfp xmmh, xmmh, xmmhm
+};
+
+def macroop XORPD_R_M {
+    ldfp ufp1, seg, sib, disp
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8"
+    xorfp xmml, xmml, ufp1
+    xorfp xmmh, xmmh, ufp2
+};
+
+def macroop XORPD_R_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, disp
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8"
+    xorfp xmml, xmml, ufp1
+    xorfp xmmh, xmmh, ufp2
+};
+'''
diff --git a/src/arch/x86/isa/insts/sse/multiply_and_divide.py b/src/arch/x86/isa/insts/sse/multiply_and_divide.py
new file mode 100644
index 000000000..d555c6e1a
--- /dev/null
+++ b/src/arch/x86/isa/insts/sse/multiply_and_divide.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2007 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# Redistribution and use of this software in source and binary forms,
+# with or without modification, are permitted provided that the
+# following conditions are met:
+#
+# The software must be used only for Non-Commercial Use which means any
+# use which is NOT directed to receiving any direct monetary
+# compensation for, or commercial advantage from such use.  Illustrative
+# examples of non-commercial use are academic research, personal study,
+# teaching, education and corporate research & development.
+# Illustrative examples of commercial use are distributing products for
+# commercial advantage and providing services using the software for
+# commercial advantage.
+#
+# If you wish to use this software or functionality therein that may be
+# covered by patents for commercial use, please contact:
+#     Director of Intellectual Property Licensing
+#     Office of Strategy and Technology
+#     Hewlett-Packard Company
+#     1501 Page Mill Road
+#     Palo Alto, California  94304
+#
+# Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.  Redistributions
+# in binary form must reproduce the above copyright notice, this list of
+# conditions and the following disclaimer in the documentation and/or
+# other materials provided with the distribution.  Neither the name of
+# the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.  No right of
+# sublicense is granted herewith.  Derivatives of the software and
+# output created using the software may be prepared, but only for
+# Non-Commercial Uses.  Derivatives of the software may be shared with
+# others provided: (i) the others agree to abide by the list of
+# conditions herein which includes the Non-Commercial Use restrictions;
+# and (ii) such Derivatives of the software include the above copyright
+# notice to acknowledge the contribution from this software where
+# applicable, this list of conditions and the disclaimer below.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Gabe Black
+
+microcode = '''
+def macroop MULSD_R_R {
+    mulfp xmml, xmml, xmmlm
+};
+
+def macroop MULSD_R_M {
+    ldfp ufp1, seg, sib, disp
+    mulfp xmml, xmml, ufp1
+};
+
+def macroop MULSD_R_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, disp
+    mulfp xmml, xmml, ufp1
+};
+
+def macroop DIVSD_R_R {
+    divfp xmml, xmml, xmmlm
+};
+
+def macroop DIVSD_R_M {
+    ldfp ufp1, seg, sib, disp
+    divfp xmml, xmml, ufp1
+};
+
+def macroop DIVSD_R_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, disp
+    divfp xmml, xmml, ufp1
+};
+'''
diff --git a/src/arch/x86/isa/macroop.isa b/src/arch/x86/isa/macroop.isa
index 3f33c8cfe..c9c33f981 100644
--- a/src/arch/x86/isa/macroop.isa
+++ b/src/arch/x86/isa/macroop.isa
@@ -221,6 +221,11 @@ let {{
                 self.dataSize = 1
             elif self.size == 'd':
                 self.dataSize = 4
+            #This is for "double plus" which is normally a double word unless
+            #the REX W bit is set, in which case it's a quad word. It's used
+            #for some SSE instructions.
+            elif self.size == 'dp':
+                self.dataSize = "(REX_W ? 8 : 4)"
             elif self.size == 'q':
                 self.dataSize = 8
             elif self.size == 'v':
@@ -251,7 +256,7 @@ let {{
             if not self.size:
                 self.size = size
             else:
-                if self.size is not size:
+                if self.size != size:
                     raise Exception, "Conflicting register sizes %s and %s!" %\
                         (self.size, size)
 }};
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index dcc20c708..579909506 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -75,6 +75,8 @@ let {{
     # Add in symbols for the microcode registers
     for num in range(15):
         assembler.symbols["t%d" % num] = "NUM_INTREGS+%d" % num
+    for num in range(7):
+        assembler.symbols["ufp%d" % num] = "FLOATREG_MICROFP(%d)" % num
     # Add in symbols for the segment descriptor registers
     for letter in ("C", "D", "E", "F", "G", "S"):
         assembler.symbols["%ss" % letter.lower()] = "SEGMENT_REG_%sS" % letter
diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa
index a0477dab7..dca6d7377 100644
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@@ -453,7 +453,7 @@ let {{
             '''
 
     class Mulel(FlagRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 * op2, dataSize)'
+        code = 'DestReg = merge(DestReg, psrc1 * op2, dataSize);'
 
     class Muleh(FlagRegOp):
         code = '''
@@ -488,9 +488,12 @@ let {{
         code = 'DestReg = merge(SrcReg1, op2, dataSize)'
         else_code = 'DestReg=DestReg;'
 
+    class Xorfp(RegOp):
+        code = 'FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;'
+
     class Movfp(CondRegOp):
-        code = 'FpDestReg = FpSrcReg2'
-        else_code = 'FpDestReg = FpDestReg;'
+        code = 'FpDestReg.uqw = FpSrcReg2.uqw;'
+        else_code = 'FpDestReg.uqw = FpDestReg.uqw;'
 
     # Shift instructions
 
@@ -636,4 +639,37 @@ let {{
 
     class Zext(RegOp):
         code = 'DestReg = bits(psrc1, imm8-1, 0);'
+
+    # Conversion microops
+    class ConvOp(RegOp):
+        abstract = True
+        def __init__(self, dest, src1):
+            super(ConvOp, self).__init__(dest, src1, "NUM_INTREGS")
+
+    #FIXME This needs to always use 32 bits unless REX.W is present
+    class cvtf_i2d(ConvOp):
+        code = 'FpDestReg = psrc1;'
+
+    class cvtf_i2d_hi(ConvOp):
+        code = 'FpDestReg = bits(SrcReg1, 63, 32);'
+
+    class cvtf_d2i(ConvOp):
+        code = '''
+        int64_t intSrcReg1 = static_cast<int64_t>(FpSrcReg1);
+        DestReg = merge(DestReg, intSrcReg1, dataSize);
+        '''
+
+    # These need to consider size at some point. They'll always use doubles
+    # for the moment.
+    class addfp(RegOp):
+        code = 'FpDestReg = FpSrcReg1 + FpSrcReg2;'
+
+    class mulfp(RegOp):
+        code = 'FpDestReg = FpSrcReg1 * FpSrcReg2;'
+
+    class divfp(RegOp):
+        code = 'FpDestReg = FpSrcReg1 / FpSrcReg2;'
+
+    class subfp(RegOp):
+        code = 'FpDestReg = FpSrcReg1 - FpSrcReg2;'
 }};
diff --git a/src/arch/x86/x86_traits.hh b/src/arch/x86/x86_traits.hh
index 381a60a63..aa5b959d1 100644
--- a/src/arch/x86/x86_traits.hh
+++ b/src/arch/x86/x86_traits.hh
@@ -65,6 +65,7 @@ namespace X86ISA
 
     const int NumMMXRegs = 8;
     const int NumXMMRegs = 16;
+    const int NumMicroFpRegs = 8;
 
     const int NumCRegs = 16;
     const int NumDRegs = 8;
diff --git a/src/cpu/nativetrace.cc b/src/cpu/nativetrace.cc
index 0db61af2c..7152602fe 100644
--- a/src/cpu/nativetrace.cc
+++ b/src/cpu/nativetrace.cc
@@ -84,6 +84,19 @@ NativeTrace::checkR11Reg(const char * name, uint64_t &mVal, uint64_t &nVal)
     return true;
 }
 
+bool
+NativeTrace::checkXMM(int num, uint64_t mXmmBuf[], uint64_t nXmmBuf[])
+{
+    if (mXmmBuf[num * 2]     != nXmmBuf[num * 2] ||
+        mXmmBuf[num * 2 + 1] != nXmmBuf[num * 2 + 1]) {
+        DPRINTFN("Register xmm%d should be 0x%016x%016x but is 0x%016x%016x.\n",
+                num, nXmmBuf[num * 2 + 1], nXmmBuf[num * 2],
+                     mXmmBuf[num * 2 + 1], mXmmBuf[num * 2]);
+        return false;
+    }
+    return true;
+}
+
 void
 Trace::NativeTraceRecord::dump()
 {
@@ -127,6 +140,22 @@ Trace::NativeTrace::check(ThreadContext * tc, bool isSyscall)
     checkReg("r14", mState.r14, nState.r14);
     checkReg("r15", mState.r15, nState.r15);
     checkReg("rip", mState.rip, nState.rip);
+    checkXMM(0, mState.xmm, nState.xmm);
+    checkXMM(1, mState.xmm, nState.xmm);
+    checkXMM(2, mState.xmm, nState.xmm);
+    checkXMM(3, mState.xmm, nState.xmm);
+    checkXMM(4, mState.xmm, nState.xmm);
+    checkXMM(5, mState.xmm, nState.xmm);
+    checkXMM(6, mState.xmm, nState.xmm);
+    checkXMM(7, mState.xmm, nState.xmm);
+    checkXMM(8, mState.xmm, nState.xmm);
+    checkXMM(9, mState.xmm, nState.xmm);
+    checkXMM(10, mState.xmm, nState.xmm);
+    checkXMM(11, mState.xmm, nState.xmm);
+    checkXMM(12, mState.xmm, nState.xmm);
+    checkXMM(13, mState.xmm, nState.xmm);
+    checkXMM(14, mState.xmm, nState.xmm);
+    checkXMM(15, mState.xmm, nState.xmm);
 #if THE_ISA == SPARC_ISA
     /*for(int f = 0; f <= 62; f+=2)
     {
diff --git a/src/cpu/nativetrace.hh b/src/cpu/nativetrace.hh
index 6fd624211..ab038c4c3 100644
--- a/src/cpu/nativetrace.hh
+++ b/src/cpu/nativetrace.hh
@@ -37,6 +37,7 @@
 #include "sim/host.hh"
 #include "sim/insttracer.hh"
 #include "arch/x86/intregs.hh"
+#include "arch/x86/floatregs.hh"
 
 class ThreadContext;
 
@@ -91,6 +92,9 @@ class NativeTrace : public InstTracer
         uint64_t r14;
         uint64_t r15;
         uint64_t rip;
+        //This should be expanded to 16 if x87 registers are considered
+        uint64_t mmx[8];
+        uint64_t xmm[32];
 
         void update(int fd)
         {
@@ -121,6 +125,11 @@ class NativeTrace : public InstTracer
             r14 = TheISA::gtoh(r14);
             r15 = TheISA::gtoh(r15);
             rip = TheISA::gtoh(rip);
+            //This should be expanded if x87 registers are considered
+            for (int i = 0; i < 8; i++)
+                mmx[i] = TheISA::gtoh(mmx[i]);
+            for (int i = 0; i < 32; i++)
+                xmm[i] = TheISA::gtoh(xmm[i]);
         }
 
         void update(ThreadContext * tc)
@@ -142,6 +151,11 @@ class NativeTrace : public InstTracer
             r14 = tc->readIntReg(X86ISA::INTREG_R14);
             r15 = tc->readIntReg(X86ISA::INTREG_R15);
             rip = tc->readNextPC();
+            //This should be expanded if x87 registers are considered
+            for (int i = 0; i < 8; i++)
+                mmx[i] = tc->readFloatRegBits(X86ISA::FLOATREG_MMX(i));
+            for (int i = 0; i < 32; i++)
+                xmm[i] = tc->readFloatRegBits(X86ISA::FLOATREG_XMM_BASE + i);
         }
 
     };
@@ -171,6 +185,9 @@ class NativeTrace : public InstTracer
     bool
     checkR11Reg(const char * regName, uint64_t &, uint64_t &);
 
+    bool
+    checkXMM(int num, uint64_t mXmmBuf[], uint64_t nXmmBuf[]);
+
     NativeTrace(const Params *p);
 
     NativeTraceRecord *
diff --git a/util/statetrace/arch/tracechild_amd64.cc b/util/statetrace/arch/tracechild_amd64.cc
index 112ee793e..2809bf4c8 100644
--- a/util/statetrace/arch/tracechild_amd64.cc
+++ b/util/statetrace/arch/tracechild_amd64.cc
@@ -52,83 +52,219 @@ char * AMD64TraceChild::regNames[numregs] = {
                 //PC
                 "rip",
                 //Flags
-                "eflags"};
+                "eflags",
+                //MMX
+                "mmx0_0", "mmx0_1",
+                "mmx1_0", "mmx1_1",
+                "mmx2_0", "mmx2_1",
+                "mmx3_0", "mmx3_1",
+                "mmx4_0", "mmx4_1",
+                "mmx5_0", "mmx5_1",
+                "mmx6_0", "mmx6_1",
+                "mmx7_0", "mmx7_1",
+                //XMM
+                "xmm0_0",  "xmm0_1",  "xmm0_2",  "xmm0_3",
+                "xmm1_0",  "xmm1_1",  "xmm1_2",  "xmm1_3",
+                "xmm2_0",  "xmm2_1",  "xmm2_2",  "xmm2_3",
+                "xmm3_0",  "xmm3_1",  "xmm3_2",  "xmm3_3",
+                "xmm4_0",  "xmm4_1",  "xmm4_2",  "xmm4_3",
+                "xmm5_0",  "xmm5_1",  "xmm5_2",  "xmm5_3",
+                "xmm6_0",  "xmm6_1",  "xmm6_2",  "xmm6_3",
+                "xmm7_0",  "xmm7_1",  "xmm7_2",  "xmm7_3",
+                "xmm8_0",  "xmm8_1",  "xmm8_2",  "xmm8_3",
+                "xmm9_0",  "xmm9_1",  "xmm9_2",  "xmm9_3",
+                "xmm10_0", "xmm10_1", "xmm10_2", "xmm10_3",
+                "xmm11_0", "xmm11_1", "xmm11_2", "xmm11_3",
+                "xmm12_0", "xmm12_1", "xmm12_2", "xmm12_3",
+                "xmm13_0", "xmm13_1", "xmm13_2", "xmm13_3",
+                "xmm14_0", "xmm14_1", "xmm14_2", "xmm14_3",
+                "xmm15_0", "xmm15_1", "xmm15_2", "xmm15_3"};
 
 bool AMD64TraceChild::sendState(int socket)
 {
-    uint64_t regVal = 0;
+    uint64_t regVal64 = 0;
+    uint32_t regVal32 = 0;
     for(int x = 0; x <= R15; x++)
     {
-        regVal = getRegVal(x);
-        if(write(socket, &regVal, sizeof(regVal)) == -1)
+        regVal64 = getRegVal(x);
+        if(write(socket, &regVal64, sizeof(regVal64)) == -1)
         {
             cerr << "Write failed! " << strerror(errno) << endl;
             tracing = false;
             return false;
         }
     }
-    regVal = getRegVal(RIP);
-    if(write(socket, &regVal, sizeof(regVal)) == -1)
+    regVal64 = getRegVal(RIP);
+    if(write(socket, &regVal64, sizeof(regVal64)) == -1)
     {
         cerr << "Write failed! " << strerror(errno) << endl;
         tracing = false;
         return false;
     }
+    for(int x = MMX0_0; x <= MMX7_1; x++)
+    {
+        regVal32 = getRegVal(x);
+        if(write(socket, &regVal32, sizeof(regVal32)) == -1)
+        {
+            cerr << "Write failed! " << strerror(errno) << endl;
+            tracing = false;
+            return false;
+        }
+    }
+    for(int x = XMM0_0; x <= XMM15_3; x++)
+    {
+        regVal32 = getRegVal(x);
+        if(write(socket, &regVal32, sizeof(regVal32)) == -1)
+        {
+            cerr << "Write failed! " << strerror(errno) << endl;
+            tracing = false;
+            return false;
+        }
+    }
     return true;
 }
 
-int64_t AMD64TraceChild::getRegs(user_regs_struct & myregs, int num)
+int64_t AMD64TraceChild::getRegs(user_regs_struct & myregs,
+        user_fpregs_struct & myfpregs, int num)
 {
-        assert(num < numregs && num >= 0);
-        switch(num)
-        {
-                //GPRs
-                case RAX: return myregs.rax;
-                case RBX: return myregs.rbx;
-                case RCX: return myregs.rcx;
-                case RDX: return myregs.rdx;
-                //Index registers
-                case RSI: return myregs.rsi;
-                case RDI: return myregs.rdi;
-                //Base pointer and stack pointer
-                case RBP: return myregs.rbp;
-                case RSP: return myregs.rsp;
-                //New 64 bit mode registers
-                case R8: return myregs.r8;
-                case R9: return myregs.r9;
-                case R10: return myregs.r10;
-                case R11: return myregs.r11;
-                case R12: return myregs.r12;
-                case R13: return myregs.r13;
-                case R14: return myregs.r14;
-                case R15: return myregs.r15;
-                //Segmentation registers
-                case CS: return myregs.cs;
-                case DS: return myregs.ds;
-                case ES: return myregs.es;
-                case FS: return myregs.fs;
-                case GS: return myregs.gs;
-                case SS: return myregs.ss;
-                case FS_BASE: return myregs.fs_base;
-                case GS_BASE: return myregs.gs_base;
-                //PC
-                case RIP: return myregs.rip;
-                //Flags
-                case EFLAGS: return myregs.eflags;
-                default:
-                        assert(0);
-                        return 0;
-        }
+    assert(num < numregs && num >= 0);
+    switch(num)
+    {
+        //GPRs
+        case RAX: return myregs.rax;
+        case RBX: return myregs.rbx;
+        case RCX: return myregs.rcx;
+        case RDX: return myregs.rdx;
+        //Index registers
+        case RSI: return myregs.rsi;
+        case RDI: return myregs.rdi;
+        //Base pointer and stack pointer
+        case RBP: return myregs.rbp;
+        case RSP: return myregs.rsp;
+        //New 64 bit mode registers
+        case R8: return myregs.r8;
+        case R9: return myregs.r9;
+        case R10: return myregs.r10;
+        case R11: return myregs.r11;
+        case R12: return myregs.r12;
+        case R13: return myregs.r13;
+        case R14: return myregs.r14;
+        case R15: return myregs.r15;
+        //Segmentation registers
+        case CS: return myregs.cs;
+        case DS: return myregs.ds;
+        case ES: return myregs.es;
+        case FS: return myregs.fs;
+        case GS: return myregs.gs;
+        case SS: return myregs.ss;
+        case FS_BASE: return myregs.fs_base;
+        case GS_BASE: return myregs.gs_base;
+        //PC
+        case RIP: return myregs.rip;
+        //Flags
+        case EFLAGS: return myregs.eflags;
+        //MMX
+        case MMX0_0: return myfpregs.st_space[0];
+        case MMX0_1: return myfpregs.st_space[1];
+        case MMX1_0: return myfpregs.st_space[2];
+        case MMX1_1: return myfpregs.st_space[3];
+        case MMX2_0: return myfpregs.st_space[4];
+        case MMX2_1: return myfpregs.st_space[5];
+        case MMX3_0: return myfpregs.st_space[6];
+        case MMX3_1: return myfpregs.st_space[7];
+        case MMX4_0: return myfpregs.st_space[8];
+        case MMX4_1: return myfpregs.st_space[9];
+        case MMX5_0: return myfpregs.st_space[10];
+        case MMX5_1: return myfpregs.st_space[11];
+        case MMX6_0: return myfpregs.st_space[12];
+        case MMX6_1: return myfpregs.st_space[13];
+        case MMX7_0: return myfpregs.st_space[14];
+        case MMX7_1: return myfpregs.st_space[15];
+        //XMM
+        case XMM0_0: return myfpregs.xmm_space[0];
+        case XMM0_1: return myfpregs.xmm_space[1];
+        case XMM0_2: return myfpregs.xmm_space[2];
+        case XMM0_3: return myfpregs.xmm_space[3];
+        case XMM1_0: return myfpregs.xmm_space[4];
+        case XMM1_1: return myfpregs.xmm_space[5];
+        case XMM1_2: return myfpregs.xmm_space[6];
+        case XMM1_3: return myfpregs.xmm_space[7];
+        case XMM2_0: return myfpregs.xmm_space[8];
+        case XMM2_1: return myfpregs.xmm_space[9];
+        case XMM2_2: return myfpregs.xmm_space[10];
+        case XMM2_3: return myfpregs.xmm_space[11];
+        case XMM3_0: return myfpregs.xmm_space[12];
+        case XMM3_1: return myfpregs.xmm_space[13];
+        case XMM3_2: return myfpregs.xmm_space[14];
+        case XMM3_3: return myfpregs.xmm_space[15];
+        case XMM4_0: return myfpregs.xmm_space[16];
+        case XMM4_1: return myfpregs.xmm_space[17];
+        case XMM4_2: return myfpregs.xmm_space[18];
+        case XMM4_3: return myfpregs.xmm_space[19];
+        case XMM5_0: return myfpregs.xmm_space[20];
+        case XMM5_1: return myfpregs.xmm_space[21];
+        case XMM5_2: return myfpregs.xmm_space[22];
+        case XMM5_3: return myfpregs.xmm_space[23];
+        case XMM6_0: return myfpregs.xmm_space[24];
+        case XMM6_1: return myfpregs.xmm_space[25];
+        case XMM6_2: return myfpregs.xmm_space[26];
+        case XMM6_3: return myfpregs.xmm_space[27];
+        case XMM7_0: return myfpregs.xmm_space[28];
+        case XMM7_1: return myfpregs.xmm_space[29];
+        case XMM7_2: return myfpregs.xmm_space[30];
+        case XMM7_3: return myfpregs.xmm_space[31];
+        case XMM8_0: return myfpregs.xmm_space[32];
+        case XMM8_1: return myfpregs.xmm_space[33];
+        case XMM8_2: return myfpregs.xmm_space[34];
+        case XMM8_3: return myfpregs.xmm_space[35];
+        case XMM9_0: return myfpregs.xmm_space[36];
+        case XMM9_1: return myfpregs.xmm_space[37];
+        case XMM9_2: return myfpregs.xmm_space[38];
+        case XMM9_3: return myfpregs.xmm_space[39];
+        case XMM10_0: return myfpregs.xmm_space[40];
+        case XMM10_1: return myfpregs.xmm_space[41];
+        case XMM10_2: return myfpregs.xmm_space[42];
+        case XMM10_3: return myfpregs.xmm_space[43];
+        case XMM11_0: return myfpregs.xmm_space[44];
+        case XMM11_1: return myfpregs.xmm_space[45];
+        case XMM11_2: return myfpregs.xmm_space[46];
+        case XMM11_3: return myfpregs.xmm_space[47];
+        case XMM12_0: return myfpregs.xmm_space[48];
+        case XMM12_1: return myfpregs.xmm_space[49];
+        case XMM12_2: return myfpregs.xmm_space[50];
+        case XMM12_3: return myfpregs.xmm_space[51];
+        case XMM13_0: return myfpregs.xmm_space[52];
+        case XMM13_1: return myfpregs.xmm_space[53];
+        case XMM13_2: return myfpregs.xmm_space[54];
+        case XMM13_3: return myfpregs.xmm_space[55];
+        case XMM14_0: return myfpregs.xmm_space[56];
+        case XMM14_1: return myfpregs.xmm_space[57];
+        case XMM14_2: return myfpregs.xmm_space[58];
+        case XMM14_3: return myfpregs.xmm_space[59];
+        case XMM15_0: return myfpregs.xmm_space[60];
+        case XMM15_1: return myfpregs.xmm_space[61];
+        case XMM15_2: return myfpregs.xmm_space[62];
+        case XMM15_3: return myfpregs.xmm_space[63];
+        default:
+                assert(0);
+                return 0;
+    }
 }
 
 bool AMD64TraceChild::update(int pid)
 {
     oldregs = regs;
+    oldfpregs = fpregs;
     if(ptrace(PTRACE_GETREGS, pid, 0, &regs) != 0)
     {
         cerr << "update: " << strerror(errno) << endl;
         return false;
     }
+    if(ptrace(PTRACE_GETFPREGS, pid, 0, &fpregs) != 0)
+    {
+        cerr << "update: " << strerror(errno) << endl;
+        return false;
+    }
     for(unsigned int x = 0; x < numregs; x++)
         regDiffSinceUpdate[x] = (getRegVal(x) != getOldRegVal(x));
     return true;
@@ -142,12 +278,12 @@ AMD64TraceChild::AMD64TraceChild()
 
 int64_t AMD64TraceChild::getRegVal(int num)
 {
-        return getRegs(regs, num);
+        return getRegs(regs, fpregs, num);
 }
 
 int64_t AMD64TraceChild::getOldRegVal(int num)
 {
-        return getRegs(oldregs, num);
+        return getRegs(oldregs, oldfpregs, num);
 }
 
 char * AMD64TraceChild::printReg(int num)
diff --git a/util/statetrace/arch/tracechild_amd64.hh b/util/statetrace/arch/tracechild_amd64.hh
index e7457f677..1ab11d767 100644
--- a/util/statetrace/arch/tracechild_amd64.hh
+++ b/util/statetrace/arch/tracechild_amd64.hh
@@ -58,14 +58,43 @@ class AMD64TraceChild : public TraceChild
         RIP,
         //Flags
         EFLAGS,
+        //MMX
+        MMX0_0, MMX0_1,
+        MMX1_0, MMX1_1,
+        MMX2_0, MMX2_1,
+        MMX3_0, MMX3_1,
+        MMX4_0, MMX4_1,
+        MMX5_0, MMX5_1,
+        MMX6_0, MMX6_1,
+        MMX7_0, MMX7_1,
+        //XMM
+        XMM0_0,  XMM0_1,  XMM0_2,  XMM0_3,
+        XMM1_0,  XMM1_1,  XMM1_2,  XMM1_3,
+        XMM2_0,  XMM2_1,  XMM2_2,  XMM2_3,
+        XMM3_0,  XMM3_1,  XMM3_2,  XMM3_3,
+        XMM4_0,  XMM4_1,  XMM4_2,  XMM4_3,
+        XMM5_0,  XMM5_1,  XMM5_2,  XMM5_3,
+        XMM6_0,  XMM6_1,  XMM6_2,  XMM6_3,
+        XMM7_0,  XMM7_1,  XMM7_2,  XMM7_3,
+        XMM8_0,  XMM8_1,  XMM8_2,  XMM8_3,
+        XMM9_0,  XMM9_1,  XMM9_2,  XMM9_3,
+        XMM10_0, XMM10_1, XMM10_2, XMM10_3,
+        XMM11_0, XMM11_1, XMM11_2, XMM11_3,
+        XMM12_0, XMM12_1, XMM12_2, XMM12_3,
+        XMM13_0, XMM13_1, XMM13_2, XMM13_3,
+        XMM14_0, XMM14_1, XMM14_2, XMM14_3,
+        XMM15_0, XMM15_1, XMM15_2, XMM15_3,
         numregs
     };
   private:
     char printBuffer [256];
     static char * regNames[numregs];
-    int64_t getRegs(user_regs_struct & myregs, int num);
+    int64_t getRegs(user_regs_struct & myregs,
+            user_fpregs_struct &myfpregs,int num);
     user_regs_struct regs;
     user_regs_struct oldregs;
+    user_fpregs_struct fpregs;
+    user_fpregs_struct oldfpregs;
     bool regDiffSinceUpdate[numregs];
 
     uint64_t findSyscall();