X86: Total overhaul of the division instructions and microops.

--HG-- extra : convert_revision : 303ea45f69f7805361ad877fe6bb43fbc3dfd7a6
author: Gabe Black <gblack@eecs.umich.edu> 2007-09-13 16:34:46 -0700
committer: Gabe Black <gblack@eecs.umich.edu> 2007-09-13 16:34:46 -0700
commit: f7b6230d99e102f3a6195687fed0617005a70424 (patch)
tree: 360dd76c0b7d7f159e816af90ab8fb3e62f3c36c /src/arch/x86/isa/insts
parent: 0f57b407a3df68f93e73e0635569d7bf5dd151b0 (diff)
download: gem5-f7b6230d99e102f3a6195687fed0617005a70424.tar.xz
1 files changed, 552 insertions, 27 deletions
diff --git a/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py b/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py
index 197de5fb3..a9b53acac 100644
--- a/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py
+++ b/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py
@@ -234,20 +234,70 @@ def macroop IMUL_R_P_I
 
 def macroop DIV_B_R
 {
-    div1 rax, rax, reg
+    # Do the initial part of the division
+    div1 rsi, reg, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, 8, dataSize=1
+    div2 t1, rax, t1, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t1, rax, t1, dataSize=1
+    div2 t1, rax, t1, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax, dataSize=1
+    divr rsi, dataSize=1
 };
 
 def macroop DIV_B_M
 {
-    ld t1, seg, sib, disp
-    div1 rax, rax, t1
+    ld t2, seg, sib, disp
+
+    # Do the initial part of the division
+    div1 rsi, t2, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, 8, dataSize=1
+    div2 t1, rax, t1, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t1, rax, t1, dataSize=1
+    div2 t1, rax, t1, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax, dataSize=1
+    divr rsi, dataSize=1
 };
 
 def macroop DIV_B_P
 {
     rdip t7
-    ld t1, seg, riprel, disp
-    div1 rax, rax, t1
+    ld t2, seg, riprel, disp
+
+    # Do the initial part of the division
+    div1 rsi, t2, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, 8, dataSize=1
+    div2 t1, rax, t1, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t1, rax, t1, dataSize=1
+    div2 t1, rax, t1, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax, dataSize=1
+    divr rsi, dataSize=1
 };
 
 #
@@ -256,24 +306,301 @@ def macroop DIV_B_P
 
 def macroop DIV_R
 {
-    divr t1, rax, reg
-    divq rax, rax, reg
-    mov rdx, rdx, t1
+    # Do the initial part of the division
+    div1 rdx, reg
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, "env.dataSize * 8"
+    div2 t1, rax, t1
+
+    #Loop until we're out of bits to shift in
+    #The amount of unrolling here could stand some tuning
+divLoopTop:
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax
+    divr rdx
 };
 
 def macroop DIV_M
 {
-    ld t1, seg, sib, disp
-    divr rdx, rax, t1
-    divq rax, rax, t1
+    ld t2, seg, sib, disp
+
+    # Do the initial part of the division
+    div1 rdx, t2
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, "env.dataSize * 8"
+    div2 t1, rax, t1
+
+    #Loop until we're out of bits to shift in
+    #The amount of unrolling here could stand some tuning
+divLoopTop:
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax
+    divr rdx
 };
 
 def macroop DIV_P
 {
     rdip t7
-    ld t1, seg, riprel, disp
-    divr rdx, rax, t1
-    divq rax, rax, t1
+    ld t2, seg, riprel, disp
+
+    # Do the initial part of the division
+    div1 rdx, t2
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, "env.dataSize * 8"
+    div2 t1, rax, t1
+
+    #Loop until we're out of bits to shift in
+    #The amount of unrolling here could stand some tuning
+divLoopTop:
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax
+    divr rdx
+};
+
+#
+# One byte version of signed division
+#
+
+def macroop IDIV_B_R
+{
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,), dataSize=1
+    ruflag t4, 3
+    sub t2, t0, rsi, dataSize=1
+    sub t2, t2, t4
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, reg, 1, flags=(ECF,), dataSize=1
+
+    # Negate divisor
+    sub t3, t0, reg, dataSize=1
+    # Put the divisor's absolute value into t3
+    mov t3, t3, reg, flags=(nCECF,), dataSize=1
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rsi, 1, flags=(ECF,), dataSize=1
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,), dataSize=1
+    mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+
+    # Do the initial part of the division
+    div1 t2, t3, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, 8, dataSize=1
+    div2 t4, t1, t4, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4, dataSize=1
+    div2 t4, t1, t4, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5, dataSize=1
+    divr t6, dataSize=1
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6, dataSize=1
+    # If the dividend was negitive, put the negated remainder in rsi.
+    mov rsi, rsi, t4, (CECF,), dataSize=1
+    # Otherwise put the regular remainder in rsi.
+    mov rsi, rsi, t6, (nCECF,), dataSize=1
+
+    # Negate the quotient.
+    sub t4, t0, t5, dataSize=1
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,), dataSize=1
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,), dataSize=1
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5, dataSize=1
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,), dataSize=1
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,), dataSize=1
+};
+
+def macroop IDIV_B_M
+{
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,), dataSize=1
+    ruflag t4, 3
+    sub t2, t0, rsi, dataSize=1
+    sub t2, t2, t4
+
+    ld t3, seg, sib, disp
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, t3, 1, flags=(ECF,), dataSize=1
+
+    # Negate divisor
+    sub t4, t0, t3, dataSize=1
+    # Put the divisor's absolute value into t3
+    mov t3, t3, t4, flags=(CECF,), dataSize=1
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rsi, 1, flags=(ECF,), dataSize=1
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,), dataSize=1
+    mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+
+    # Do the initial part of the division
+    div1 t2, t3, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, 8, dataSize=1
+    div2 t4, t1, t4, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4, dataSize=1
+    div2 t4, t1, t4, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5, dataSize=1
+    divr t6, dataSize=1
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6, dataSize=1
+    # If the dividend was negitive, put the negated remainder in rsi.
+    mov rsi, rsi, t4, (CECF,), dataSize=1
+    # Otherwise put the regular remainder in rsi.
+    mov rsi, rsi, t6, (nCECF,), dataSize=1
+
+    # Negate the quotient.
+    sub t4, t0, t5, dataSize=1
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,), dataSize=1
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,), dataSize=1
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5, dataSize=1
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,), dataSize=1
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,), dataSize=1
+};
+
+def macroop IDIV_B_P
+{
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,), dataSize=1
+    ruflag t4, 3
+    sub t2, t0, rsi, dataSize=1
+    sub t2, t2, t4
+
+    rdip t7
+    ld t3, seg, riprel, disp
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, t3, 1, flags=(ECF,), dataSize=1
+
+    # Negate divisor
+    sub t4, t0, t3, dataSize=1
+    # Put the divisor's absolute value into t3
+    mov t3, t3, t4, flags=(CECF,), dataSize=1
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rsi, 1, flags=(ECF,), dataSize=1
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,), dataSize=1
+    mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+
+    # Do the initial part of the division
+    div1 t2, t3, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, 8, dataSize=1
+    div2 t4, t1, t4, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4, dataSize=1
+    div2 t4, t1, t4, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5, dataSize=1
+    divr t6, dataSize=1
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6, dataSize=1
+    # If the dividend was negitive, put the negated remainder in rsi.
+    mov rsi, rsi, t4, (CECF,), dataSize=1
+    # Otherwise put the regular remainder in rsi.
+    mov rsi, rsi, t6, (nCECF,), dataSize=1
+
+    # Negate the quotient.
+    sub t4, t0, t5, dataSize=1
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,), dataSize=1
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,), dataSize=1
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5, dataSize=1
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,), dataSize=1
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,), dataSize=1
 };
 
 #
@@ -282,27 +609,225 @@ def macroop DIV_P
 
 def macroop IDIV_R
 {
-    divr t1, rax, reg
-    divq rax, rax, reg
-    mov rdx, rdx, t1
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,)
+    ruflag t4, 3
+    sub t2, t0, rdx
+    sub t2, t2, t4
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, reg, 1, flags=(ECF,)
+
+    # Negate divisor
+    sub t3, t0, reg
+    # Put the divisor's absolute value into t3
+    mov t3, t3, reg, flags=(nCECF,)
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rdx, 1, flags=(ECF,)
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,)
+    mov t2, t2, rdx, flags=(nCECF,)
+
+    # Do the initial part of the division
+    div1 t2, t3
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, "env.dataSize * 8"
+    div2 t4, t1, t4
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5
+    divr t6
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6
+    # If the dividend was negitive, put the negated remainder in rdx.
+    mov rdx, rdx, t4, (CECF,)
+    # Otherwise put the regular remainder in rdx.
+    mov rdx, rdx, t6, (nCECF,)
+
+    # Negate the quotient.
+    sub t4, t0, t5
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,)
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,)
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,)
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,)
 };
 
 def macroop IDIV_M
 {
-    ld t1, seg, sib, disp
-    divr rdx, rax, t1
-    divq rax, rax, t1
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,)
+    ruflag t4, 3
+    sub t2, t0, rdx
+    sub t2, t2, t4
+
+    ld t3, seg, sib, disp
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, t3, 1, flags=(ECF,)
+
+    # Negate divisor
+    sub t4, t0, t3
+    # Put the divisor's absolute value into t3
+    mov t3, t3, t4, flags=(CECF,)
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rdx, 1, flags=(ECF,)
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,)
+    mov t2, t2, rdx, flags=(nCECF,)
+
+    # Do the initial part of the division
+    div1 t2, t3
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, "env.dataSize * 8"
+    div2 t4, t1, t4
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5
+    divr t6
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6
+    # If the dividend was negitive, put the negated remainder in rdx.
+    mov rdx, rdx, t4, (CECF,)
+    # Otherwise put the regular remainder in rdx.
+    mov rdx, rdx, t6, (nCECF,)
+
+    # Negate the quotient.
+    sub t4, t0, t5
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,)
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,)
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,)
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,)
 };
 
 def macroop IDIV_P
 {
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,)
+    ruflag t4, 3
+    sub t2, t0, rdx
+    sub t2, t2, t4
+
     rdip t7
-    ld t1, seg, riprel, disp
-    divr rdx, rax, t1
-    divq rax, rax, t1
+    ld t3, seg, riprel, disp
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, t3, 1, flags=(ECF,)
+
+    # Negate divisor
+    sub t4, t0, t3
+    # Put the divisor's absolute value into t3
+    mov t3, t3, t4, flags=(CECF,)
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rdx, 1, flags=(ECF,)
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,)
+    mov t2, t2, rdx, flags=(nCECF,)
+
+    # Do the initial part of the division
+    div1 t2, t3
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, "env.dataSize * 8"
+    div2 t4, t1, t4
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5
+    divr t6
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6
+    # If the dividend was negitive, put the negated remainder in rdx.
+    mov rdx, rdx, t4, (CECF,)
+    # Otherwise put the regular remainder in rdx.
+    mov rdx, rdx, t6, (nCECF,)
+
+    # Negate the quotient.
+    sub t4, t0, t5
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,)
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,)
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,)
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,)
 };
 '''
-#let {{
-#    class IDIV(Inst):
-#	"GenFault ${new UnimpInstFault}"
-#}};
author	Gabe Black <gblack@eecs.umich.edu>	2007-09-13 16:34:46 -0700
committer	Gabe Black <gblack@eecs.umich.edu>	2007-09-13 16:34:46 -0700
commit	f7b6230d99e102f3a6195687fed0617005a70424 (patch)
tree	360dd76c0b7d7f159e816af90ab8fb3e62f3c36c /src/arch/x86/isa/insts
parent	0f57b407a3df68f93e73e0635569d7bf5dd151b0 (diff)
download	gem5-f7b6230d99e102f3a6195687fed0617005a70424.tar.xz