11 files changed, 189 insertions, 177 deletions
diff --git a/src/arch/x86/emulenv.cc b/src/arch/x86/emulenv.cc
index 8e4600a14..7e4a9fc55 100644
--- a/src/arch/x86/emulenv.cc
+++ b/src/arch/x86/emulenv.cc
@@ -118,8 +118,7 @@ void EmulEnv::doModRM(const ExtMachInst & machInst)
     //Figure out what segment to use. This won't be entirely accurate since
     //the presence of a displacement is supposed to make the instruction
     //default to the data segment.
-    if ((base != INTREG_RBP && base != INTREG_RSP) ||
-            0/*Has an immediate offset*/) {
+    if ((base != INTREG_RBP && base != INTREG_RSP) || machInst.dispSize) {
         seg = SEGMENT_REG_DS;
         //Handle any segment override that might have been in the instruction
         int segFromInst = machInst.legacy.seg;
diff --git a/src/arch/x86/insts/microregop.cc b/src/arch/x86/insts/microregop.cc
index 2edd3ba87..5982dff7a 100644
--- a/src/arch/x86/insts/microregop.cc
+++ b/src/arch/x86/insts/microregop.cc
@@ -78,7 +78,7 @@ namespace X86ISA
             if(subtract)
                 flags ^= (flagMask & (ECFBit | CFBit));
         }
-        if(flagMask & PFBit && findParity(dataSize*8, _dest))
+        if(flagMask & PFBit && !findParity(8, _dest))
             flags |= PFBit;
         if(flagMask & AFBit)
         {
diff --git a/src/arch/x86/isa/decoder/one_byte_opcodes.isa b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
index 84d18441d..f365ed4b0 100644
--- a/src/arch/x86/isa/decoder/one_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
@@ -330,14 +330,8 @@
             //The 64 bit versions of both of these should be illegal only
             //if CPUID says it isn't supported. For now, we'll just assume
             //that it's supported.
-            0x6: decode MODE_SUBMODE {
-                0x0: SAHF_64();
-                default: SAHF();
-            }
-            0x7: decode MODE_SUBMODE {
-                0x0: LAHF_64();
-                default: LAHF();
-            }
+            0x6: SAHF();
+            0x7: LAHF();
         }
         0x14: decode OPCODE_OP_BOTTOM3 {
             0x0: MOV(rAb, Ob);
diff --git a/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py b/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py
index 19d1c7789..dbc803350 100644
--- a/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py
+++ b/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py
@@ -61,29 +61,26 @@ microcode = '''
 
 def macroop MUL_B_R
 {
-    mul1u rax, reg
+    mul1u rax, reg, flags=(OF,CF)
     mulel rax
-    # Really ah
-    muleh rsi, flags=(OF,CF)
+    muleh ah
 };
 
 def macroop MUL_B_M
 {
     ld t1, seg, sib, disp
-    mul1u rax, t1
+    mul1u rax, t1, flags=(OF,CF)
     mulel rax
-    # Really ah
-    muleh rsi, flags=(OF,CF)
+    muleh ah
 };
 
 def macroop MUL_B_P
 {
     rdip t7
     ld t1, seg, riprel, disp
-    mul1u rax, t1
+    mul1u rax, t1, flags=(OF,CF)
     mulel rax
-    # Really ah
-    muleh rsi, flags=(OF,CF)
+    muleh ah
 };
 
 #
@@ -92,26 +89,26 @@ def macroop MUL_B_P
 
 def macroop MUL_R
 {
-    mul1u rax, reg
+    mul1u rax, reg, flags=(OF,CF)
     mulel rax
-    muleh rdx, flags=(OF,CF)
+    muleh rdx
 };
 
 def macroop MUL_M
 {
     ld t1, seg, sib, disp
-    mul1u rax, t1
+    mul1u rax, t1, flags=(OF,CF)
     mulel rax
-    muleh rdx, flags=(OF,CF)
+    muleh rdx
 };
 
 def macroop MUL_P
 {
     rdip t7
     ld t1, seg, riprel, disp
-    mul1u rax, t1
+    mul1u rax, t1, flags=(OF,CF)
     mulel rax
-    muleh rdx, flags=(OF,CF)
+    muleh rdx
 };
 
 #
@@ -120,29 +117,26 @@ def macroop MUL_P
 
 def macroop IMUL_B_R
 {
-    mul1s rax, reg
+    mul1s rax, reg, flags=(OF,CF)
     mulel rax
-    # Really ah
-    muleh rsi, flags=(OF,CF)
+    muleh ah
 };
 
 def macroop IMUL_B_M
 {
     ld t1, seg, sib, disp
-    mul1s rax, t1
+    mul1s rax, t1, flags=(OF,CF)
     mulel rax
-    # Really ah
-    muleh rsi, flags=(OF,CF)
+    muleh ah
 };
 
 def macroop IMUL_B_P
 {
     rdip t7
     ld t1, seg, riprel, disp
-    mul1s rax, t1
+    mul1s rax, t1, flags=(OF,CF)
     mulel rax
-    # Really ah
-    muleh rsi, flags=(OF,CF)
+    muleh ah
 };
 
 #
@@ -151,50 +145,50 @@ def macroop IMUL_B_P
 
 def macroop IMUL_R
 {
-    mul1s rax, reg
+    mul1s rax, reg, flags=(OF,CF)
     mulel rax
-    muleh rdx, flags=(OF,CF)
+    muleh rdx
 };
 
 def macroop IMUL_M
 {
     ld t1, seg, sib, disp
-    mul1s rax, t1
+    mul1s rax, t1, flags=(OF,CF)
     mulel rax
-    muleh rdx, flags=(OF,CF)
+    muleh rdx
 };
 
 def macroop IMUL_P
 {
     rdip t7
     ld t1, seg, riprel, disp
-    mul1s rax, t1
+    mul1s rax, t1, flags=(OF,CF)
     mulel rax
-    muleh rdx, flags=(OF,CF)
+    muleh rdx
 };
 
 def macroop IMUL_R_R
 {
-    mul1s reg, regm
+    mul1s reg, regm, flags=(OF,CF)
     mulel reg
-    muleh t0, flags=(CF,OF)
+    muleh t0
 };
 
 def macroop IMUL_R_M
 {
     ld t1, seg, sib, disp
-    mul1s reg, t1
+    mul1s reg, t1, flags=(CF,OF)
     mulel reg
-    muleh t0, flags=(CF,OF)
+    muleh t0
 };
 
 def macroop IMUL_R_P
 {
     rdip t7
     ld t1, seg, riprel, disp
-    mul1s reg, t1
+    mul1s reg, t1, flags=(CF,OF)
     mulel reg
-    muleh t0, flags=(CF,OF)
+    muleh t0
 };
 
 #
@@ -204,18 +198,18 @@ def macroop IMUL_R_P
 def macroop IMUL_R_R_I
 {
     limm t1, imm
-    mul1s regm, t1
+    mul1s regm, t1, flags=(OF,CF)
     mulel reg
-    muleh t0, flags=(OF,CF)
+    muleh t0
 };
 
 def macroop IMUL_R_M_I
 {
     limm t1, imm
     ld t2, seg, sib, disp
-    mul1s t2, t1
+    mul1s t2, t1, flags=(OF,CF)
     mulel reg
-    muleh t0, flags=(OF,CF)
+    muleh t0
 };
 
 def macroop IMUL_R_P_I
@@ -223,9 +217,9 @@ def macroop IMUL_R_P_I
     rdip t7
     limm t1, imm
     ld t2, seg, riprel
-    mul1s t2, t1
+    mul1s t2, t1, flags=(OF,CF)
     mulel reg
-    muleh t0, flags=(OF,CF)
+    muleh t0
 };
 
 #
@@ -235,7 +229,7 @@ def macroop IMUL_R_P_I
 def macroop DIV_B_R
 {
     # Do the initial part of the division
-    div1 rsi, reg, dataSize=1
+    div1 ah, reg, dataSize=1
 
     #These are split out so we can initialize the number of bits in the
     #second register
@@ -250,7 +244,7 @@ divLoopTop:
 
     #Unload the answer
     divq rax, dataSize=1
-    divr rsi, dataSize=1
+    divr ah, dataSize=1
 };
 
 def macroop DIV_B_M
@@ -258,7 +252,7 @@ def macroop DIV_B_M
     ld t2, seg, sib, disp
 
     # Do the initial part of the division
-    div1 rsi, t2, dataSize=1
+    div1 ah, t2, dataSize=1
 
     #These are split out so we can initialize the number of bits in the
     #second register
@@ -273,7 +267,7 @@ divLoopTop:
 
     #Unload the answer
     divq rax, dataSize=1
-    divr rsi, dataSize=1
+    divr ah, dataSize=1
 };
 
 def macroop DIV_B_P
@@ -282,7 +276,7 @@ def macroop DIV_B_P
     ld t2, seg, riprel, disp
 
     # Do the initial part of the division
-    div1 rsi, t2, dataSize=1
+    div1 ah, t2, dataSize=1
 
     #These are split out so we can initialize the number of bits in the
     #second register
@@ -297,7 +291,7 @@ divLoopTop:
 
     #Unload the answer
     divq rax, dataSize=1
-    divr rsi, dataSize=1
+    divr ah, dataSize=1
 };
 
 #
@@ -390,7 +384,7 @@ def macroop IDIV_B_R
     # Negate dividend
     sub t1, t0, rax, flags=(ECF,), dataSize=1
     ruflag t4, 3
-    sub t2, t0, rsi, dataSize=1
+    sub t2, t0, ah, dataSize=1
     sub t2, t2, t4
 
     #Find the sign of the divisor
@@ -404,11 +398,11 @@ def macroop IDIV_B_R
 
     #Find the sign of the dividend
     #FIXME!!! This depends on shifts setting the carry flag correctly.
-    slli t0, rsi, 1, flags=(ECF,), dataSize=1
+    slli t0, ah, 1, flags=(ECF,), dataSize=1
 
     # Put the dividend's absolute value into t1 and t2
     mov t1, t1, rax, flags=(nCECF,), dataSize=1
-    mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+    mov t2, t2, ah, flags=(nCECF,), dataSize=1
 
     # Do the initial part of the division
     div1 t2, t3, dataSize=1
@@ -435,10 +429,10 @@ divLoopTop:
 
     # Negate the remainder
     sub t4, t0, t6, dataSize=1
-    # If the dividend was negitive, put the negated remainder in rsi.
-    mov rsi, rsi, t4, (CECF,), dataSize=1
-    # Otherwise put the regular remainder in rsi.
-    mov rsi, rsi, t6, (nCECF,), dataSize=1
+    # If the dividend was negitive, put the negated remainder in ah.
+    mov ah, ah, t4, (CECF,), dataSize=1
+    # Otherwise put the regular remainder in ah.
+    mov ah, ah, t6, (nCECF,), dataSize=1
 
     # Negate the quotient.
     sub t4, t0, t5, dataSize=1
@@ -461,7 +455,7 @@ def macroop IDIV_B_M
     # Negate dividend
     sub t1, t0, rax, flags=(ECF,), dataSize=1
     ruflag t4, 3
-    sub t2, t0, rsi, dataSize=1
+    sub t2, t0, ah, dataSize=1
     sub t2, t2, t4
 
     ld t3, seg, sib, disp
@@ -477,11 +471,11 @@ def macroop IDIV_B_M
 
     #Find the sign of the dividend
     #FIXME!!! This depends on shifts setting the carry flag correctly.
-    slli t0, rsi, 1, flags=(ECF,), dataSize=1
+    slli t0, ah, 1, flags=(ECF,), dataSize=1
 
     # Put the dividend's absolute value into t1 and t2
     mov t1, t1, rax, flags=(nCECF,), dataSize=1
-    mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+    mov t2, t2, ah, flags=(nCECF,), dataSize=1
 
     # Do the initial part of the division
     div1 t2, t3, dataSize=1
@@ -508,10 +502,10 @@ divLoopTop:
 
     # Negate the remainder
     sub t4, t0, t6, dataSize=1
-    # If the dividend was negitive, put the negated remainder in rsi.
-    mov rsi, rsi, t4, (CECF,), dataSize=1
-    # Otherwise put the regular remainder in rsi.
-    mov rsi, rsi, t6, (nCECF,), dataSize=1
+    # If the dividend was negitive, put the negated remainder in ah.
+    mov ah, ah, t4, (CECF,), dataSize=1
+    # Otherwise put the regular remainder in ah.
+    mov ah, ah, t6, (nCECF,), dataSize=1
 
     # Negate the quotient.
     sub t4, t0, t5, dataSize=1
@@ -534,7 +528,7 @@ def macroop IDIV_B_P
     # Negate dividend
     sub t1, t0, rax, flags=(ECF,), dataSize=1
     ruflag t4, 3
-    sub t2, t0, rsi, dataSize=1
+    sub t2, t0, ah, dataSize=1
     sub t2, t2, t4
 
     rdip t7
@@ -551,11 +545,11 @@ def macroop IDIV_B_P
 
     #Find the sign of the dividend
     #FIXME!!! This depends on shifts setting the carry flag correctly.
-    slli t0, rsi, 1, flags=(ECF,), dataSize=1
+    slli t0, ah, 1, flags=(ECF,), dataSize=1
 
     # Put the dividend's absolute value into t1 and t2
     mov t1, t1, rax, flags=(nCECF,), dataSize=1
-    mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+    mov t2, t2, ah, flags=(nCECF,), dataSize=1
 
     # Do the initial part of the division
     div1 t2, t3, dataSize=1
@@ -582,10 +576,10 @@ divLoopTop:
 
     # Negate the remainder
     sub t4, t0, t6, dataSize=1
-    # If the dividend was negitive, put the negated remainder in rsi.
-    mov rsi, rsi, t4, (CECF,), dataSize=1
-    # Otherwise put the regular remainder in rsi.
-    mov rsi, rsi, t6, (nCECF,), dataSize=1
+    # If the dividend was negitive, put the negated remainder in ah.
+    mov ah, ah, t4, (CECF,), dataSize=1
+    # Otherwise put the regular remainder in ah.
+    mov ah, ah, t6, (nCECF,), dataSize=1
 
     # Negate the quotient.
     sub t4, t0, t5, dataSize=1
diff --git a/src/arch/x86/isa/insts/general_purpose/flags/load_and_store.py b/src/arch/x86/isa/insts/general_purpose/flags/load_and_store.py
index 0915bf819..01908ca7b 100644
--- a/src/arch/x86/isa/insts/general_purpose/flags/load_and_store.py
+++ b/src/arch/x86/isa/insts/general_purpose/flags/load_and_store.py
@@ -55,26 +55,10 @@
 
 microcode = '''
 def macroop SAHF {
-    # This will fold to ah since this never executes in 64 bit mode.
-    ruflags rsp, dataSize=1
-};
-
-# This is allows the instruction to write to ah in 64 bit mode.
-def macroop SAHF_64 {
-    ruflags t1
-    slli t1, t1, 8
-    mov t1, t1, rax, dataSize=1
-    mov rax, rax, t1, dataSize=2
+    ruflags ah, dataSize=1
 };
 
 def macroop LAHF {
-    # This will fold to ah since this never executes in 64 bit mode.
-    wruflags rsp, t0, dataSize=1
-};
-
-# This is allows the instruction to read from ah in 64 bit mode.
-def macroop LAHF_64 {
-    srli t1, rax, 8, dataSize=2
-    wruflags t1, t0, dataSize=1
+    wruflags ah, t0, dataSize=1
 };
 '''
diff --git a/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/rotate.py b/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/rotate.py
index b5ae9560e..3be954768 100644
--- a/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/rotate.py
+++ b/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/rotate.py
@@ -56,13 +56,13 @@
 microcode = '''
 def macroop ROL_R_I
 {
-    roli reg, reg, imm
+    roli reg, reg, imm, flags=(OF,CF)
 };
 
 def macroop ROL_M_I
 {
     ldst t1, seg, sib, disp
-    roli t1, t1, imm
+    roli t1, t1, imm, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -70,19 +70,19 @@ def macroop ROL_P_I
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    roli t1, t1, imm
+    roli t1, t1, imm, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 
 def macroop ROL_1_R
 {
-    roli reg, reg, 1
+    roli reg, reg, 1, flags=(OF,CF)
 };
 
 def macroop ROL_1_M
 {
     ldst t1, seg, sib, disp
-    roli t1, t1, 1
+    roli t1, t1, 1, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -90,19 +90,19 @@ def macroop ROL_1_P
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    roli t1, t1, 1
+    roli t1, t1, 1, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 
 def macroop ROL_R_R
 {
-    rol reg, reg, regm
+    rol reg, reg, regm, flags=(OF,CF)
 };
 
 def macroop ROL_M_R
 {
     ldst t1, seg, sib, disp
-    rol t1, t1, reg
+    rol t1, t1, reg, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -110,19 +110,19 @@ def macroop ROL_P_R
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    rol t1, t1, reg
+    rol t1, t1, reg, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 
 def macroop ROR_R_I
 {
-    rori reg, reg, imm
+    rori reg, reg, imm, flags=(OF,CF)
 };
 
 def macroop ROR_M_I
 {
     ldst t1, seg, sib, disp
-    rori t1, t1, imm
+    rori t1, t1, imm, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -130,19 +130,19 @@ def macroop ROR_P_I
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    rori t1, t1, imm
+    rori t1, t1, imm, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 
 def macroop ROR_1_R
 {
-    rori reg, reg, 1
+    rori reg, reg, 1, flags=(OF,CF)
 };
 
 def macroop ROR_1_M
 {
     ldst t1, seg, sib, disp
-    rori t1, t1, 1
+    rori t1, t1, 1, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -150,19 +150,19 @@ def macroop ROR_1_P
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    rori t1, t1, 1
+    rori t1, t1, 1, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 
 def macroop ROR_R_R
 {
-    ror reg, reg, regm
+    ror reg, reg, regm, flags=(OF,CF)
 };
 
 def macroop ROR_M_R
 {
     ldst t1, seg, sib, disp
-    ror t1, t1, reg
+    ror t1, t1, reg, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -170,19 +170,19 @@ def macroop ROR_P_R
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    ror t1, t1, reg
+    ror t1, t1, reg, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 
 def macroop RCL_R_I
 {
-    rcli reg, reg, imm
+    rcli reg, reg, imm, flags=(OF,CF)
 };
 
 def macroop RCL_M_I
 {
     ldst t1, seg, sib, disp
-    rcli t1, t1, imm
+    rcli t1, t1, imm, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -190,19 +190,19 @@ def macroop RCL_P_I
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    rcli t1, t1, imm
+    rcli t1, t1, imm, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 
 def macroop RCL_1_R
 {
-    rcli reg, reg, 1
+    rcli reg, reg, 1, flags=(OF,CF)
 };
 
 def macroop RCL_1_M
 {
     ldst t1, seg, sib, disp
-    rcli t1, t1, 1
+    rcli t1, t1, 1, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -210,19 +210,19 @@ def macroop RCL_1_P
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    rcli t1, t1, 1
+    rcli t1, t1, 1, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 
 def macroop RCL_R_R
 {
-    rcl reg, reg, regm
+    rcl reg, reg, regm, flags=(OF,CF)
 };
 
 def macroop RCL_M_R
 {
     ldst t1, seg, sib, disp
-    rcl t1, t1, reg
+    rcl t1, t1, reg, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -230,19 +230,19 @@ def macroop RCL_P_R
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    rcl t1, t1, reg
+    rcl t1, t1, reg, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 
 def macroop RCR_R_I
 {
-    rcri reg, reg, imm
+    rcri reg, reg, imm, flags=(OF,CF)
 };
 
 def macroop RCR_M_I
 {
     ldst t1, seg, sib, disp
-    rcri t1, t1, imm
+    rcri t1, t1, imm, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -250,19 +250,19 @@ def macroop RCR_P_I
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    rcri t1, t1, imm
+    rcri t1, t1, imm, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 
 def macroop RCR_1_R
 {
-    rcri reg, reg, 1
+    rcri reg, reg, 1, flags=(OF,CF)
 };
 
 def macroop RCR_1_M
 {
     ldst t1, seg, sib, disp
-    rcri t1, t1, 1
+    rcri t1, t1, 1, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -270,19 +270,19 @@ def macroop RCR_1_P
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    rcri t1, t1, 1
+    rcri t1, t1, 1, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 
 def macroop RCR_R_R
 {
-    rcr reg, reg, regm
+    rcr reg, reg, regm, flags=(OF,CF)
 };
 
 def macroop RCR_M_R
 {
     ldst t1, seg, sib, disp
-    rcr t1, t1, reg
+    rcr t1, t1, reg, flags=(OF,CF)
     st t1, seg, sib, disp
 };
 
@@ -290,7 +290,7 @@ def macroop RCR_P_R
 {
     rdip t7
     ldst t1, seg, riprel, disp
-    rcr t1, t1, reg
+    rcr t1, t1, reg, flags=(OF,CF)
     st t1, seg, riprel, disp
 };
 '''
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index 0cc72bf7b..c6f5e9cdd 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -151,6 +151,10 @@ let {{
         assembler.symbols["r%s" % reg] = \
             regIdx("INTREG_R%s" % reg.upper())
 
+    for reg in ('ah', 'bh', 'ch', 'dh'):
+        assembler.symbols[reg] = \
+            regIdx("INTREG_FOLDED(INTREG_%s, IntFoldBit)" % reg.upper())
+
     for reg in range(16):
         assembler.symbols["cr%d" % reg] = regIdx("MISCREG_CR%d" % reg)
 
diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa
index 698216139..dc6819886 100644
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@@ -525,18 +525,25 @@ let {{
             uint64_t hiResult;
             uint64_t psrc1_h = psrc1 / shifter;
             uint64_t psrc1_l = psrc1 & mask(halfSize);
-            uint64_t psrc2_h = op2 / shifter;
+            uint64_t psrc2_h = (op2 / shifter) & mask(halfSize);
             uint64_t psrc2_l = op2 & mask(halfSize);
             hiResult = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
                         ((psrc1_l * psrc2_l) / shifter)) /shifter) +
                        psrc1_h * psrc2_h;
-            if (spsrc1 < 0)
+            if (bits(psrc1, dataSize * 8 - 1))
                 hiResult -= op2;
-            int64_t bigSop2 = sop2;
-            if (bigSop2 < 0)
+            if (bits(op2, dataSize * 8 - 1))
                 hiResult -= psrc1;
             ProdHi = hiResult;
             '''
+        flag_code = '''
+            if ((-ProdHi & mask(dataSize * 8)) !=
+                    bits(ProdLow, dataSize * 8 - 1)) {
+                ccFlagBits = ccFlagBits | (ext & (CFBit | OFBit | ECFBit));
+            } else {
+                ccFlagBits = ccFlagBits & ~(ext & (CFBit | OFBit | ECFBit));
+            }
+        '''
 
     class Mul1u(WrRegOp):
         code = '''
@@ -545,12 +552,19 @@ let {{
             uint64_t shifter = (1ULL << halfSize);
             uint64_t psrc1_h = psrc1 / shifter;
             uint64_t psrc1_l = psrc1 & mask(halfSize);
-            uint64_t psrc2_h = op2 / shifter;
+            uint64_t psrc2_h = (op2 / shifter) & mask(halfSize);
             uint64_t psrc2_l = op2 & mask(halfSize);
             ProdHi = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
                       ((psrc1_l * psrc2_l) / shifter)) / shifter) +
                      psrc1_h * psrc2_h;
             '''
+        flag_code = '''
+            if (ProdHi) {
+                ccFlagBits = ccFlagBits | (ext & (CFBit | OFBit | ECFBit));
+            } else {
+                ccFlagBits = ccFlagBits & ~(ext & (CFBit | OFBit | ECFBit));
+            }
+        '''
 
     class Mulel(RdRegOp):
         code = 'DestReg = merge(SrcReg1, ProdLow, dataSize);'
@@ -562,12 +576,6 @@ let {{
             super(RdRegOp, self).__init__(dest, src1, \
                     "InstRegIndex(NUM_INTREGS)", flags, dataSize)
         code = 'DestReg = merge(SrcReg1, ProdHi, dataSize);'
-        flag_code = '''
-            if (ProdHi)
-                ccFlagBits = ccFlagBits | (ext & (CFBit | OFBit | ECFBit));
-            else
-                ccFlagBits = ccFlagBits & ~(ext & (CFBit | OFBit | ECFBit));
-        '''
 
     # One or two bit divide
     class Div1(WrRegOp):
@@ -631,7 +639,7 @@ let {{
 
     class Mov(CondRegOp):
         code = 'DestReg = merge(SrcReg1, op2, dataSize)'
-        else_code = 'DestReg=DestReg;'
+        else_code = 'DestReg = merge(DestReg, DestReg, dataSize);'
 
     # Shift instructions
 
@@ -648,8 +656,10 @@ let {{
                 ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
                 int CFBits = 0;
                 //Figure out if we -would- set the CF bits if requested.
-                if (bits(SrcReg1, dataSize * 8 - shiftAmt))
+                if (shiftAmt <= dataSize * 8 &&
+                        bits(SrcReg1, dataSize * 8 - shiftAmt)) {
                     CFBits = 1;
+                }
                 //If some combination of the CF bits need to be set, set them.
                 if ((ext & (CFBit | ECFBit)) && CFBits)
                     ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
@@ -678,8 +688,11 @@ let {{
                 //worry about setting them.
                 ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
                 //If some combination of the CF bits need to be set, set them.
-                if ((ext & (CFBit | ECFBit)) && bits(SrcReg1, shiftAmt - 1))
+                if ((ext & (CFBit | ECFBit)) && 
+                        shiftAmt <= dataSize * 8 &&
+                        bits(SrcReg1, shiftAmt - 1)) {
                     ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
+                }
                 //Figure out what the OF bit should be.
                 if ((ext & OFBit) && bits(SrcReg1, dataSize * 8 - 1))
                     ccFlagBits = ccFlagBits | OFBit;
@@ -695,7 +708,7 @@ let {{
             // Because what happens to the bits shift -in- on a right shift
             // is not defined in the C/C++ standard, we have to sign extend
             // them manually to be sure.
-            uint64_t arithMask =
+            uint64_t arithMask = (shiftAmt == 0) ? 0 :
                 -bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt);
             DestReg = merge(DestReg, (psrc1 >> shiftAmt) | arithMask, dataSize);
             '''
@@ -706,8 +719,12 @@ let {{
                 //worry about setting them.
                 ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
                 //If some combination of the CF bits need to be set, set them.
-                if ((ext & (CFBit | ECFBit)) && bits(SrcReg1, shiftAmt - 1))
+                uint8_t effectiveShift =
+                    (shiftAmt <= dataSize * 8) ? shiftAmt : (dataSize * 8);
+                if ((ext & (CFBit | ECFBit)) &&
+                        bits(SrcReg1, effectiveShift - 1)) {
                     ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
+                }
                 //Use the regular mechanisms to calculate the other flags.
                 ccFlagBits = genFlags(ccFlagBits, ext & ~(CFBit | ECFBit | OFBit),
                         DestReg, psrc1, op2);
@@ -718,14 +735,15 @@ let {{
         code = '''
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
-            if(shiftAmt)
+            uint8_t realShiftAmt = shiftAmt % (dataSize * 8);
+            if(realShiftAmt)
             {
-                uint64_t top = psrc1 << (dataSize * 8 - shiftAmt);
-                uint64_t bottom = bits(psrc1, dataSize * 8, shiftAmt);
+                uint64_t top = psrc1 << (dataSize * 8 - realShiftAmt);
+                uint64_t bottom = bits(psrc1, dataSize * 8, realShiftAmt);
                 DestReg = merge(DestReg, top | bottom, dataSize);
             }
             else
-                DestReg = DestReg;
+                DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
@@ -752,31 +770,37 @@ let {{
         code = '''
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
-            if(shiftAmt)
+            uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1);
+            if(realShiftAmt)
             {
                 CCFlagBits flags = ccFlagBits;
-                uint64_t top = flags.cf << (dataSize * 8 - shiftAmt);
-                if(shiftAmt > 1)
-                    top |= psrc1 << (dataSize * 8 - shiftAmt - 1);
-                uint64_t bottom = bits(psrc1, dataSize * 8, shiftAmt);
+                uint64_t top = flags.cf << (dataSize * 8 - realShiftAmt);
+                if (realShiftAmt > 1)
+                    top |= psrc1 << (dataSize * 8 - realShiftAmt + 1);
+                uint64_t bottom = bits(psrc1, dataSize * 8 - 1, realShiftAmt);
                 DestReg = merge(DestReg, top | bottom, dataSize);
             }
             else
-                DestReg = DestReg;
+                DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
+                int origCFBit = (ccFlagBits & CFBit) ? 1 : 0;
                 //Zero out any flags we might modify. This way we only have to
                 //worry about setting them.
                 ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
                 //Figure out what the OF bit should be.
-                if ((ext & OFBit) && ((ccFlagBits & CFBit) ^
-                                      bits(SrcReg1, dataSize * 8 - 1)))
+                if ((ext & OFBit) && (origCFBit ^
+                                      bits(SrcReg1, dataSize * 8 - 1))) {
                     ccFlagBits = ccFlagBits | OFBit;
+                }
                 //If some combination of the CF bits need to be set, set them.
-                if ((ext & (CFBit | ECFBit)) && bits(SrcReg1, shiftAmt - 1))
+                if ((ext & (CFBit | ECFBit)) &&
+                        (realShiftAmt == 0) ? origCFBit :
+                        bits(SrcReg1, realShiftAmt - 1)) {
                     ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
+                }
                 //Use the regular mechanisms to calculate the other flags.
                 ccFlagBits = genFlags(ccFlagBits, ext & ~(CFBit | ECFBit | OFBit),
                         DestReg, psrc1, op2);
@@ -787,15 +811,16 @@ let {{
         code = '''
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
-            if(shiftAmt)
+            uint8_t realShiftAmt = shiftAmt % (dataSize * 8);
+            if(realShiftAmt)
             {
-                uint64_t top = psrc1 << shiftAmt;
+                uint64_t top = psrc1 << realShiftAmt;
                 uint64_t bottom =
-                    bits(psrc1, dataSize * 8 - 1, dataSize * 8 - shiftAmt);
+                    bits(psrc1, dataSize * 8 - 1, dataSize * 8 - realShiftAmt);
                 DestReg = merge(DestReg, top | bottom, dataSize);
             }
             else
-                DestReg = DestReg;
+                DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
@@ -822,30 +847,33 @@ let {{
         code = '''
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
-            if(shiftAmt)
+            uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1);
+            if(realShiftAmt)
             {
                 CCFlagBits flags = ccFlagBits;
-                uint64_t top = psrc1 << shiftAmt;
-                uint64_t bottom = flags.cf << (shiftAmt - 1);
+                uint64_t top = psrc1 << realShiftAmt;
+                uint64_t bottom = flags.cf << (realShiftAmt - 1);
                 if(shiftAmt > 1)
                     bottom |=
                         bits(psrc1, dataSize * 8 - 1,
-                                   dataSize * 8 - shiftAmt + 1);
+                                   dataSize * 8 - realShiftAmt + 1);
                 DestReg = merge(DestReg, top | bottom, dataSize);
             }
             else
-                DestReg = DestReg;
+                DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
+                int origCFBit = (ccFlagBits & CFBit) ? 1 : 0;
                 //Zero out any flags we might modify. This way we only have to
                 //worry about setting them.
                 ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
                 int msb = bits(DestReg, dataSize * 8 - 1);
-                int CFBits = bits(SrcReg1, dataSize * 8 - shiftAmt);
+                int CFBits = bits(SrcReg1, dataSize * 8 - realShiftAmt);
                 //If some combination of the CF bits need to be set, set them.
-                if ((ext & (CFBit | ECFBit)) && CFBits)
+                if ((ext & (CFBit | ECFBit)) && 
+                        (realShiftAmt == 0) ? origCFBit : CFBits)
                     ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
                 //Figure out what the OF bit should be.
                 if ((ext & OFBit) && (msb ^ CFBits))
diff --git a/src/arch/x86/predecoder.cc b/src/arch/x86/predecoder.cc
index 24276f06c..f537f92af 100644
--- a/src/arch/x86/predecoder.cc
+++ b/src/arch/x86/predecoder.cc
@@ -77,6 +77,7 @@ namespace X86ISA
         immediateCollected = 0;
         emi.immediate = 0;
         emi.displacement = 0;
+        emi.dispSize = 0;
 
         emi.modRM = 0;
         emi.sib = 0;
@@ -383,6 +384,8 @@ namespace X86ISA
                 emiIsReady = true;
                 nextState = ResetState;
             }
+
+            emi.dispSize = displacementSize;
         }
         else
             nextState = DisplacementState;
diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh
index bdf3a814e..956ec3216 100644
--- a/src/arch/x86/types.hh
+++ b/src/arch/x86/types.hh
@@ -175,6 +175,8 @@ namespace X86ISA
         uint8_t addrSize;
         //The effective stack size.
         uint8_t stackSize;
+        //The size of the displacement
+        uint8_t dispSize;
 
         //Mode information
         OperatingMode mode;
@@ -187,12 +189,13 @@ namespace X86ISA
                      "op = {\n\t\tnum = %d,\n\t\top = %#x,\n\t\t"
                            "prefixA = %#x,\n\t\tprefixB = %#x\n\t},\n\t"
                      "modRM = %#x,\n\tsib = %#x,\n\t"
-                     "immediate = %#x,\n\tdisplacement = %#x\n}\n",
+                     "immediate = %#x,\n\tdisplacement = %#x\n\t"
+                     "dispSize = %d}\n",
                      (uint8_t)emi.legacy, (uint8_t)emi.rex,
                      emi.opcode.num, (uint8_t)emi.opcode.op,
                      emi.opcode.prefixA, emi.opcode.prefixB,
                      (uint8_t)emi.modRM, (uint8_t)emi.sib,
-                     emi.immediate, emi.displacement);
+                     emi.immediate, emi.displacement, emi.dispSize);
         return os;
     }
 
@@ -227,6 +230,8 @@ namespace X86ISA
             return false;
         if(emi1.stackSize != emi2.stackSize)
             return false;
+        if(emi1.dispSize != emi2.dispSize)
+            return false;
         return true;
     }
 
diff --git a/src/arch/x86/utility.hh b/src/arch/x86/utility.hh
index d305e2599..4388dd416 100644
--- a/src/arch/x86/utility.hh
+++ b/src/arch/x86/utility.hh
@@ -82,7 +82,8 @@ namespace __hash_namespace {
                     ((uint64_t)emi.opcode.op)) ^
                     emi.immediate ^ emi.displacement ^
                     emi.mode ^
-                    emi.opSize ^ emi.addrSize ^ emi.stackSize;
+                    emi.opSize ^ emi.addrSize ^
+                    emi.stackSize ^ emi.dispSize;
         };
     };
 }