13 files changed, 388 insertions, 99 deletions
diff --git a/src/arch/x86/insts/static_inst.hh b/src/arch/x86/insts/static_inst.hh
index 22139fc77..e5c333e75 100644
--- a/src/arch/x86/insts/static_inst.hh
+++ b/src/arch/x86/insts/static_inst.hh
@@ -140,6 +140,27 @@ namespace X86ISA
                 panic("Tried to pick with unrecognized size %d.\n", size);
             }
         }
+
+        inline int64_t signedPick(uint64_t from, int idx, int size) const
+        {
+            X86IntReg reg = from;
+            DPRINTF(X86, "Picking with size %d\n", size);
+            if(_srcRegIdx[idx] & (1 << 6))
+                return reg.SH;
+            switch(size)
+            {
+              case 1:
+                return reg.SL;
+              case 2:
+                return reg.SX;
+              case 4:
+                return reg.SE;
+              case 8:
+                return reg.SR;
+              default:
+                panic("Tried to pick with unrecognized size %d.\n", size);
+            }
+        }
     };
 }
 
diff --git a/src/arch/x86/intregfile.hh b/src/arch/x86/intregfile.hh
index be6242a41..b4d256a04 100644
--- a/src/arch/x86/intregfile.hh
+++ b/src/arch/x86/intregfile.hh
@@ -105,7 +105,8 @@ namespace X86ISA
 
     const int NumIntArchRegs = NUM_INTREGS;
     const int NumIntRegs =
-        NumIntArchRegs + NumMicroIntRegs + NumPseudoIntRegs;
+        NumIntArchRegs + NumMicroIntRegs +
+        NumPseudoIntRegs + NumImplicitIntRegs;
 
     class IntRegFile
     {
diff --git a/src/arch/x86/intregs.hh b/src/arch/x86/intregs.hh
index bfec7d041..6f252392e 100644
--- a/src/arch/x86/intregs.hh
+++ b/src/arch/x86/intregs.hh
@@ -58,16 +58,22 @@
 #ifndef __ARCH_X86_INTREGS_HH__
 #define __ARCH_X86_INTREGS_HH__
 
+#include "arch/x86/x86_traits.hh"
 #include "base/bitunion.hh"
 
 namespace X86ISA
 {
     BitUnion64(X86IntReg)
         Bitfield<63,0> R;
+        SignedBitfield<63,0> SR;
         Bitfield<31,0> E;
+        SignedBitfield<31,0> SE;
         Bitfield<15,0> X;
+        SignedBitfield<15,0> SX;
         Bitfield<15,8> H;
+        SignedBitfield<15,8> SH;
         Bitfield<7, 0> L;
+        SignedBitfield<7, 0> SL;
     EndBitUnion(X86IntReg)
 
     enum IntRegIndex
@@ -158,6 +164,31 @@ namespace X86ISA
 
         NUM_INTREGS
     };
+
+    inline static IntRegIndex
+    INTREG_MICRO(int index)
+    {
+        return (IntRegIndex)(NUM_INTREGS + index);
+    }
+
+    inline static IntRegIndex
+    INTREG_PSEUDO(int index)
+    {
+        return (IntRegIndex)(NUM_INTREGS + NumMicroIntRegs + index);
+    }
+
+    inline static IntRegIndex
+    INTREG_IMPLICIT(int index)
+    {
+        return (IntRegIndex)(NUM_INTREGS + NumMicroIntRegs +
+                             NumPseudoIntRegs + index);
+    }
+
+    inline static IntRegIndex
+    INTREG_FOLDED(int index, int foldBit)
+    {
+        return (IntRegIndex)(((index & 0x1C) == 4 ? foldBit : 0) | index);
+    }
 };
 
 #endif // __ARCH_X86_INTREGS_HH__
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
index fc5729540..eae994706 100644
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -196,7 +196,7 @@
                     0x3: movntpd_Mo_Vo();
                     0x4: cvttpd2pi_Pq_Wo();
                     0x5: cvtpd2pi_Pq_Wo();
-                    0x6: ucomisd_Vq_Wq();
+                    0x6: Inst::UCOMISD(Vq,Wq);
                     0x7: comisd_Vq_Wq();
                 }
                 // repne (0xF2)
@@ -283,12 +283,14 @@
                     0x7: Inst::XORPD(Vq,Wq);
                     default: Inst::UD2();
                 }
-                // repne (0xF2)
-                0x8: decode OPCODE_OP_BOTTOM3 {
-                    0x1: sqrtsd_Vq_Wq();
-                    default: Inst::UD2();
+                format Inst {
+                    // repne (0xF2)
+                    0x8: decode OPCODE_OP_BOTTOM3 {
+                        0x1: SQRTSD(Vq,Wq);
+                        default: UD2();
+                    }
+                    default: UD2();
                 }
-                default: Inst::UD2();
             }
             0x0B: decode LEGACY_DECODEVAL {
                 // no prefix
diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa
index f9e1e971c..9629a54e3 100644
--- a/src/arch/x86/isa/includes.isa
+++ b/src/arch/x86/isa/includes.isa
@@ -143,6 +143,7 @@ output exec {{
 #include <limits>
 
 #include <cmath>
+#include "arch/x86/miscregs.hh"
 #include "base/bigint.hh"
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
diff --git a/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py b/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py
index f498a10e0..197de5fb3 100644
--- a/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py
+++ b/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py
@@ -61,20 +61,29 @@ microcode = '''
 
 def macroop MUL_B_R
 {
-    mul1u rax, rax, reg, dataSize="2"
+    mul1u rax, reg
+    mulel rax
+    # Really ah
+    muleh rsi, flags=(OF,CF)
 };
 
 def macroop MUL_B_M
 {
     ld t1, seg, sib, disp
-    mul1u rax, rax, t1, dataSize="2"
+    mul1u rax, t1
+    mulel rax
+    # Really ah
+    muleh rsi, flags=(OF,CF)
 };
 
 def macroop MUL_B_P
 {
     rdip t7
     ld t1, seg, riprel, disp
-    mul1u rax, rax, t1, dataSize="2"
+    mul1u rax, t1
+    mulel rax
+    # Really ah
+    muleh rsi, flags=(OF,CF)
 };
 
 #
@@ -83,27 +92,26 @@ def macroop MUL_B_P
 
 def macroop MUL_R
 {
-    # We need to store the result of the multiplication in a temporary
-    # and then move it later because reg may be rdx. If it is, we can't
-    # clobber its old value yet.
-    muleh t1, rax, reg
-    mulel rax, rax, reg
-    mov rdx, rdx, t1
+    mul1u rax, reg
+    mulel rax
+    muleh rdx, flags=(OF,CF)
 };
 
 def macroop MUL_M
 {
     ld t1, seg, sib, disp
-    muleh rdx, rax, t1
-    mulel rax, rax, t1
+    mul1u rax, t1
+    mulel rax
+    muleh rdx, flags=(OF,CF)
 };
 
 def macroop MUL_P
 {
     rdip t7
     ld t1, seg, riprel, disp
-    muleh rdx, rax, t1
-    mulel rax, rax, t1
+    mul1u rax, t1
+    mulel rax
+    muleh rdx, flags=(OF,CF)
 };
 
 #
@@ -112,20 +120,29 @@ def macroop MUL_P
 
 def macroop IMUL_B_R
 {
-    mul1s rax, rax, reg, dataSize="2"
+    mul1s rax, reg
+    mulel rax
+    # Really ah
+    muleh rsi, flags=(OF,CF)
 };
 
 def macroop IMUL_B_M
 {
     ld t1, seg, sib, disp
-    mul1s rax, rax, t1, dataSize="2"
+    mul1s rax, t1
+    mulel rax
+    # Really ah
+    muleh rsi, flags=(OF,CF)
 };
 
 def macroop IMUL_B_P
 {
     rdip t7
     ld t1, seg, riprel, disp
-    mul1s rax, rax, t1, dataSize="2"
+    mul1s rax, t1
+    mulel rax
+    # Really ah
+    muleh rsi, flags=(OF,CF)
 };
 
 #
@@ -134,47 +151,50 @@ def macroop IMUL_B_P
 
 def macroop IMUL_R
 {
-    muleh t1, rax, reg
-    mulel rax, rax, reg
-    mov rdx, rdx, t1
+    mul1s rax, reg
+    mulel rax
+    muleh rdx, flags=(OF,CF)
 };
 
 def macroop IMUL_M
 {
     ld t1, seg, sib, disp
-    muleh rdx, rax, t1
-    mulel rax, rax, t1
+    mul1s rax, t1
+    mulel rax
+    muleh rdx, flags=(OF,CF)
 };
 
 def macroop IMUL_P
 {
     rdip t7
     ld t1, seg, riprel, disp
-    muleh rdx, rax, t1
-    mulel rax, rax, t1
+    mul1s rax, t1
+    mulel rax
+    muleh rdx, flags=(OF,CF)
 };
 
-#
-# Two operand signed multiply. These should set the CF and OF flags if the
-# result is too large for the destination register
-#
-
 def macroop IMUL_R_R
 {
-    mulel reg, reg, regm
+    mul1s reg, regm
+    mulel reg
+    muleh t0, flags=(CF,OF)
 };
 
 def macroop IMUL_R_M
 {
     ld t1, seg, sib, disp
-    mulel reg, reg, t1
+    mul1s reg, t1
+    mulel reg
+    muleh t0, flags=(CF,OF)
 };
 
 def macroop IMUL_R_P
 {
     rdip t7
     ld t1, seg, riprel, disp
-    mulel reg, reg, t1
+    mul1s reg, t1
+    mulel reg
+    muleh t0, flags=(CF,OF)
 };
 
 #
@@ -184,14 +204,18 @@ def macroop IMUL_R_P
 def macroop IMUL_R_R_I
 {
     limm t1, imm
-    mulel reg, regm, t1
+    mul1s regm, t1
+    mulel reg
+    muleh t0, flags=(OF,CF)
 };
 
 def macroop IMUL_R_M_I
 {
     limm t1, imm
     ld t2, seg, sib, disp
-    mulel reg, t2, t1
+    mul1s t2, t1
+    mulel reg
+    muleh t0, flags=(OF,CF)
 };
 
 def macroop IMUL_R_P_I
@@ -199,7 +223,9 @@ def macroop IMUL_R_P_I
     rdip t7
     limm t1, imm
     ld t2, seg, riprel
-    mulel reg, t2, t1
+    mul1s t2, t1
+    mulel reg
+    muleh t0, flags=(OF,CF)
 };
 
 #
diff --git a/src/arch/x86/isa/insts/sse/__init__.py b/src/arch/x86/isa/insts/sse/__init__.py
index 976e2dd52..7985a124f 100644
--- a/src/arch/x86/isa/insts/sse/__init__.py
+++ b/src/arch/x86/isa/insts/sse/__init__.py
@@ -57,7 +57,9 @@ categories = ["move",
               "convert",
               "add_and_subtract",
               "multiply_and_divide",
-              "logical"]
+              "logical",
+              "compare",
+              "square_root"]
 
 microcode = '''
 # SSE instructions
diff --git a/src/arch/x86/isa/insts/sse/compare.py b/src/arch/x86/isa/insts/sse/compare.py
new file mode 100644
index 000000000..8721dffa7
--- /dev/null
+++ b/src/arch/x86/isa/insts/sse/compare.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2007 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# Redistribution and use of this software in source and binary forms,
+# with or without modification, are permitted provided that the
+# following conditions are met:
+#
+# The software must be used only for Non-Commercial Use which means any
+# use which is NOT directed to receiving any direct monetary
+# compensation for, or commercial advantage from such use.  Illustrative
+# examples of non-commercial use are academic research, personal study,
+# teaching, education and corporate research & development.
+# Illustrative examples of commercial use are distributing products for
+# commercial advantage and providing services using the software for
+# commercial advantage.
+#
+# If you wish to use this software or functionality therein that may be
+# covered by patents for commercial use, please contact:
+#     Director of Intellectual Property Licensing
+#     Office of Strategy and Technology
+#     Hewlett-Packard Company
+#     1501 Page Mill Road
+#     Palo Alto, California  94304
+#
+# Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.  Redistributions
+# in binary form must reproduce the above copyright notice, this list of
+# conditions and the following disclaimer in the documentation and/or
+# other materials provided with the distribution.  Neither the name of
+# the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.  No right of
+# sublicense is granted herewith.  Derivatives of the software and
+# output created using the software may be prepared, but only for
+# Non-Commercial Uses.  Derivatives of the software may be shared with
+# others provided: (i) the others agree to abide by the list of
+# conditions herein which includes the Non-Commercial Use restrictions;
+# and (ii) such Derivatives of the software include the above copyright
+# notice to acknowledge the contribution from this software where
+# applicable, this list of conditions and the disclaimer below.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Gabe Black
+
+microcode = '''
+def macroop UCOMISD_R_R {
+    compfp xmml, xmmlm
+};
+
+def macroop UCOMISD_R_M {
+    ldfp ufp1, seg, sib, disp
+    compfp xmml, ufp1
+};
+
+def macroop UCOMISD_R_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, disp
+    compfp xmml, ufp1
+};
+'''
diff --git a/src/arch/x86/isa/insts/sse/square_root.py b/src/arch/x86/isa/insts/sse/square_root.py
new file mode 100644
index 000000000..473292386
--- /dev/null
+++ b/src/arch/x86/isa/insts/sse/square_root.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2007 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# Redistribution and use of this software in source and binary forms,
+# with or without modification, are permitted provided that the
+# following conditions are met:
+#
+# The software must be used only for Non-Commercial Use which means any
+# use which is NOT directed to receiving any direct monetary
+# compensation for, or commercial advantage from such use.  Illustrative
+# examples of non-commercial use are academic research, personal study,
+# teaching, education and corporate research & development.
+# Illustrative examples of commercial use are distributing products for
+# commercial advantage and providing services using the software for
+# commercial advantage.
+#
+# If you wish to use this software or functionality therein that may be
+# covered by patents for commercial use, please contact:
+#     Director of Intellectual Property Licensing
+#     Office of Strategy and Technology
+#     Hewlett-Packard Company
+#     1501 Page Mill Road
+#     Palo Alto, California  94304
+#
+# Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.  Redistributions
+# in binary form must reproduce the above copyright notice, this list of
+# conditions and the following disclaimer in the documentation and/or
+# other materials provided with the distribution.  Neither the name of
+# the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.  No right of
+# sublicense is granted herewith.  Derivatives of the software and
+# output created using the software may be prepared, but only for
+# Non-Commercial Uses.  Derivatives of the software may be shared with
+# others provided: (i) the others agree to abide by the list of
+# conditions herein which includes the Non-Commercial Use restrictions;
+# and (ii) such Derivatives of the software include the above copyright
+# notice to acknowledge the contribution from this software where
+# applicable, this list of conditions and the disclaimer below.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Gabe Black
+
+microcode = '''
+def macroop SQRTSD_R_R {
+    sqrtfp xmml, xmml, xmmlm
+};
+
+def macroop SQRTSD_R_M {
+    ldfp ufp1, seg, sib, disp
+    sqrtfp xmml, xmml, ufp1
+};
+
+def macroop SQRTSD_R_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, disp
+    sqrtfp xmml, xmml, ufp1
+};
+'''
diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa
index dca6d7377..7ce9dc27f 100644
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@@ -258,13 +258,18 @@ let {{
 
             # If op2 is used anywhere, make register and immediate versions
             # of this code.
-            matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
-            if matcher.search(allCode):
+            matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
+            match = matcher.search(allCode)
+            if match:
+                typeQual = ""
+                if match.group("typeQual"):
+                    typeQual = match.group("typeQual")
+                src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual)
                 self.buildCppClasses(name, Name, suffix,
-                        matcher.sub("psrc2", code),
-                        matcher.sub("psrc2", flag_code),
-                        matcher.sub("psrc2", cond_check),
-                        matcher.sub("psrc2", else_code))
+                        matcher.sub(src2_name, code),
+                        matcher.sub(src2_name, flag_code),
+                        matcher.sub(src2_name, cond_check),
+                        matcher.sub(src2_name, else_code))
                 self.buildCppClasses(name + "i", Name, suffix + "Imm",
                         matcher.sub("imm8", code),
                         matcher.sub("imm8", flag_code),
@@ -283,10 +288,17 @@ let {{
             # compute it.
             matcher = re.compile("(?<!\w)psrc1(?!\w)")
             if matcher.search(allCode):
-                code = "IntReg psrc1 = pick(SrcReg1, 0, dataSize);" + code
+                code = "uint64_t psrc1 = pick(SrcReg1, 0, dataSize);" + code
             matcher = re.compile("(?<!\w)psrc2(?!\w)")
             if matcher.search(allCode):
-                code = "IntReg psrc2 = pick(SrcReg2, 1, dataSize);" + code
+                code = "uint64_t psrc2 = pick(SrcReg2, 1, dataSize);" + code
+            # Also make available versions which do sign extension
+            matcher = re.compile("(?<!\w)spsrc1(?!\w)")
+            if matcher.search(allCode):
+                code = "int64_t spsrc1 = signedPick(SrcReg1, 0, dataSize);" + code
+            matcher = re.compile("(?<!\w)spsrc2(?!\w)")
+            if matcher.search(allCode):
+                code = "int64_t spsrc2 = signedPick(SrcReg2, 1, dataSize);" + code
 
             base = "X86ISA::RegOp"
 
@@ -409,6 +421,18 @@ let {{
         abstract = True
         cond_check = "checkCondition(ccFlagBits)"
 
+    class RdRegOp(RegOp):
+        abstract = True
+        def __init__(self, dest, src1=None, dataSize="env.dataSize"):
+            if not src1:
+                src1 = dest
+            super(RdRegOp, self).__init__(dest, src1, "NUM_INTREGS", None, dataSize)
+
+    class WrRegOp(RegOp):
+        abstract = True
+        def __init__(self, src1, src2, flags=None, dataSize="env.dataSize"):
+            super(WrRegOp, self).__init__("NUM_INTREGS", src1, src2, flags, dataSize)
+
     class Add(FlagRegOp):
         code = 'DestReg = merge(DestReg, psrc1 + op2, dataSize);'
 
@@ -436,39 +460,53 @@ let {{
     class Xor(LogicRegOp):
         code = 'DestReg = merge(DestReg, psrc1 ^ op2, dataSize)'
 
-    class Mul1s(FlagRegOp):
-        code = '''
-            int signPos = (dataSize * 8) / 2 - 1;
-            IntReg srcVal1 = psrc1 | (-bits(psrc1, signPos) << signPos);
-            IntReg srcVal2 = op2 | (-bits(psrc1, signPos) << signPos);
-            DestReg = merge(DestReg, srcVal1 * srcVal2, dataSize)
-            '''
-
-    class Mul1u(FlagRegOp):
+    class Mul1s(WrRegOp):
         code = '''
+            ProdLow = psrc1 * op2;
             int halfSize = (dataSize * 8) / 2;
-            IntReg srcVal1 = psrc1 & mask(halfSize);
-            IntReg srcVal2 = op2 & mask(halfSize);
-            DestReg = merge(DestReg, srcVal1 * srcVal2, dataSize)
+            int64_t spsrc1_h = spsrc1 >> halfSize;
+            int64_t spsrc1_l = spsrc1 & mask(halfSize);
+            int64_t spsrc2_h = sop2 >> halfSize;
+            int64_t spsrc2_l = sop2 & mask(halfSize);
+            ProdHi = ((spsrc1_l * spsrc2_h + spsrc1_h * spsrc2_l +
+                      ((spsrc1_l * spsrc2_l) >> halfSize)) >> halfSize) +
+                     spsrc1_h * spsrc2_h;
             '''
 
-    class Mulel(FlagRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 * op2, dataSize);'
-
-    class Muleh(FlagRegOp):
+    class Mul1u(WrRegOp):
         code = '''
+            ProdLow = psrc1 * op2;
             int halfSize = (dataSize * 8) / 2;
             uint64_t psrc1_h = psrc1 >> halfSize;
             uint64_t psrc1_l = psrc1 & mask(halfSize);
             uint64_t psrc2_h = op2 >> halfSize;
             uint64_t psrc2_l = op2 & mask(halfSize);
-            uint64_t result =
-                ((psrc1_l * psrc2_h) >> halfSize) +
-                ((psrc1_h * psrc2_l) >> halfSize) +
-                psrc1_h * psrc2_h;
-            DestReg = merge(DestReg, result, dataSize);
+            ProdHi = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
+                      ((psrc1_l * psrc2_l) >> halfSize)) >> halfSize) +
+                     psrc1_h * psrc2_h;
             '''
 
+    class Mulel(RdRegOp):
+        code = 'DestReg = merge(SrcReg1, ProdLow, dataSize);'
+
+    # Neither of these is quite correct because it assumes that right shifting
+    # a signed or unsigned value does sign or zero extension respectively.
+    # The C standard says that what happens on a right shift with a 1 in the
+    # MSB position is undefined. On x86 and under likely most compilers the
+    # "right thing" happens, but this isn't a guarantee.
+    class Muleh(RdRegOp):
+        def __init__(self, dest, src1=None, flags=None, dataSize="env.dataSize"):
+            if not src1:
+                src1 = dest
+            super(RdRegOp, self).__init__(dest, src1, "NUM_INTREGS", flags, dataSize)
+        code = 'DestReg = merge(SrcReg1, ProdHi, dataSize);'
+        flag_code = '''
+            if (ProdHi)
+                ccFlagBits = ccFlagBits | (ext & (CFBit | OFBit | ECFBit));
+            else
+                ccFlagBits = ccFlagBits & ~(ext & (CFBit | OFBit | ECFBit));
+        '''
+
     class Div1(FlagRegOp):
         code = '''
             int halfSize = (dataSize * 8) / 2;
@@ -488,13 +526,6 @@ let {{
         code = 'DestReg = merge(SrcReg1, op2, dataSize)'
         else_code = 'DestReg=DestReg;'
 
-    class Xorfp(RegOp):
-        code = 'FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;'
-
-    class Movfp(CondRegOp):
-        code = 'FpDestReg.uqw = FpSrcReg2.uqw;'
-        else_code = 'FpDestReg.uqw = FpDestReg.uqw;'
-
     # Shift instructions
 
     class Sll(FlagRegOp):
@@ -589,11 +620,6 @@ let {{
                 DestReg = DestReg;
             '''
 
-    class WrRegOp(RegOp):
-        abstract = True
-        def __init__(self, src1, src2, flags=None, dataSize="env.dataSize"):
-            super(WrRegOp, self).__init__("NUM_INTREGS", src1, src2, flags, dataSize)
-
     class Wrip(WrRegOp, CondRegOp):
         code = 'RIP = psrc1 + op2'
         else_code="RIP = RIP;"
@@ -605,11 +631,6 @@ let {{
     class Wruflags(WrRegOp):
         code = 'ccFlagBits = psrc1 ^ op2'
 
-    class RdRegOp(RegOp):
-        abstract = True
-        def __init__(self, dest, src1 = "NUM_INTREGS", dataSize="env.dataSize"):
-            super(RdRegOp, self).__init__(dest, src1, "NUM_INTREGS", None, dataSize)
-
     class Rdip(RdRegOp):
         code = 'DestReg = RIP'
 
@@ -640,6 +661,36 @@ let {{
     class Zext(RegOp):
         code = 'DestReg = bits(psrc1, imm8-1, 0);'
 
+    class Compfp(WrRegOp):
+        # This class sets the condition codes in rflags according to the
+        # rules for comparing floating point.
+        code = '''
+            //               ZF PF CF
+            // Unordered      1  1  1
+            // Greater than   0  0  0
+            // Less than      0  0  1
+            // Equal          1  0  0
+            //           OF = SF = AF = 0
+            ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit |
+                                        ZFBit | PFBit | CFBit);
+            if (isnan(FpSrcReg1) || isnan(FpSrcReg2))
+                ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit);
+            else if(FpSrcReg1 < FpSrcReg2)
+                ccFlagBits = ccFlagBits | CFBit;
+            else if(FpSrcReg1 == FpSrcReg2)
+                ccFlagBits = ccFlagBits | ZFBit;
+        '''
+
+    class Xorfp(RegOp):
+        code = 'FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;'
+
+    class Sqrtfp(RegOp):
+        code = 'FpDestReg = sqrt(FpSrcReg2);'
+
+    class Movfp(CondRegOp):
+        code = 'FpDestReg.uqw = FpSrcReg2.uqw;'
+        else_code = 'FpDestReg.uqw = FpDestReg.uqw;'
+
     # Conversion microops
     class ConvOp(RegOp):
         abstract = True
@@ -648,7 +699,7 @@ let {{
 
     #FIXME This needs to always use 32 bits unless REX.W is present
     class cvtf_i2d(ConvOp):
-        code = 'FpDestReg = psrc1;'
+        code = 'FpDestReg = spsrc1;'
 
     class cvtf_i2d_hi(ConvOp):
         code = 'FpDestReg = bits(SrcReg1, 63, 32);'
diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa
index 347322752..fae1aa5ca 100644
--- a/src/arch/x86/isa/operands.isa
+++ b/src/arch/x86/isa/operands.isa
@@ -95,13 +95,17 @@ def operand_types {{
 }};
 
 def operands {{
-        'SrcReg1':       ('IntReg', 'uqw', '(((src1 & 0x1C) == 4 ? foldOBit : 0) | src1)', 'IsInteger', 1),
-        'SrcReg2':       ('IntReg', 'uqw', '(((src2 & 0x1C) == 4 ? foldOBit : 0) | src2)', 'IsInteger', 2),
-        'Index':         ('IntReg', 'uqw', '(((index & 0x1C) == 4 ? foldABit : 0) | index)', 'IsInteger', 3),
-        'Base':          ('IntReg', 'uqw', '(((base & 0x1C) == 4 ? foldABit : 0) | base)', 'IsInteger', 4),
-        'DestReg':       ('IntReg', 'uqw', '(((dest & 0x1C) == 4 ? foldOBit : 0) | dest)', 'IsInteger', 5),
-        'Data':          ('IntReg', 'uqw', '(((data & 0x1C) == 4 ? foldOBit : 0) | data)', 'IsInteger', 6),
-        'rax':           ('IntReg', 'uqw', '(INTREG_RAX)', 'IsInteger', 7),
+        'SrcReg1':       ('IntReg', 'uqw', 'INTREG_FOLDED(src1, foldOBit)', 'IsInteger', 1),
+        'SrcReg2':       ('IntReg', 'uqw', 'INTREG_FOLDED(src2, foldOBit)', 'IsInteger', 2),
+        'Index':         ('IntReg', 'uqw', 'INTREG_FOLDED(index, foldABit)', 'IsInteger', 3),
+        'Base':          ('IntReg', 'uqw', 'INTREG_FOLDED(base, foldABit)', 'IsInteger', 4),
+        'DestReg':       ('IntReg', 'uqw', 'INTREG_FOLDED(dest, foldOBit)', 'IsInteger', 5),
+        'Data':          ('IntReg', 'uqw', 'INTREG_FOLDED(data, foldOBit)', 'IsInteger', 6),
+        'ProdLow':       ('IntReg', 'uqw', 'INTREG_IMPLICIT(0)', 'IsInteger', 7),
+        'ProdHi':        ('IntReg', 'uqw', 'INTREG_IMPLICIT(1)', 'IsInteger', 8),
+        'Quotient':      ('IntReg', 'uqw', 'INTREG_IMPLICIT(2)', 'IsInteger', 9),
+        'Remainder':     ('IntReg', 'uqw', 'INTREG_IMPLICIT(3)', 'IsInteger', 10),
+        'rax':           ('IntReg', 'uqw', '(INTREG_RAX)', 'IsInteger', 11),
         'FpSrcReg1':     ('FloatReg', 'df', 'src1', 'IsFloating', 20),
         'FpSrcReg2':     ('FloatReg', 'df', 'src2', 'IsFloating', 21),
         'FpDestReg':     ('FloatReg', 'df', 'dest', 'IsFloating', 22),
@@ -109,7 +113,7 @@ def operands {{
         'RIP':           ('NPC', 'uqw', None, (None, None, 'IsControl'), 50),
         'uIP':           ('UPC', 'uqw', None, (None, None, 'IsControl'), 51),
         'nuIP':          ('NUPC', 'uqw', None, (None, None, 'IsControl'), 52),
-        'ccFlagBits':    ('IntReg', 'uqw', 'NUM_INTREGS + NumMicroIntRegs', None, 60),
+        'ccFlagBits':    ('IntReg', 'uqw', 'INTREG_PSEUDO(0)', None, 60),
         'SegBase':       ('ControlReg', 'uqw', 'MISCREG_SEG_BASE(segment)', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 70),
         'Mem':           ('Mem', 'uqw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100)
 }};
diff --git a/src/arch/x86/isa_traits.hh b/src/arch/x86/isa_traits.hh
index 466422ced..602c99390 100644
--- a/src/arch/x86/isa_traits.hh
+++ b/src/arch/x86/isa_traits.hh
@@ -81,8 +81,9 @@ namespace X86ISA
 
     // These enumerate all the registers for dependence tracking.
     enum DependenceTags {
-        //There are 16 microcode registers at the moment
-        FP_Base_DepTag = 1 << 7,
+        //There are 16 microcode registers at the moment. This is an
+        //unusually large constant to make sure there isn't overflow.
+        FP_Base_DepTag = 128,
         Ctrl_Base_DepTag =
             FP_Base_DepTag +
             //mmx/x87 registers
diff --git a/src/arch/x86/x86_traits.hh b/src/arch/x86/x86_traits.hh
index aa5b959d1..9ea8eaef4 100644
--- a/src/arch/x86/x86_traits.hh
+++ b/src/arch/x86/x86_traits.hh
@@ -61,7 +61,14 @@
 namespace X86ISA
 {
     const int NumMicroIntRegs = 16;
+
     const int NumPseudoIntRegs = 1;
+    //1. The condition code bits of the rflags register.
+    const int NumImplicitIntRegs = 4;
+    //1. The lower part of the result of multiplication.
+    //2. The upper part of the result of multiplication.
+    //3. The quotient from division
+    //4. The remainder from division
 
     const int NumMMXRegs = 8;
     const int NumXMMRegs = 16;