From e4c01713562f51847537c5724bc629ce4bdcf3bc Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 6 Sep 2007 16:27:28 -0700 Subject: X86: Rework the multiplication microops so that they work like they would in the patent. --HG-- extra : convert_revision : 6fcf5dee440288d8bf92f6c5c2f97ef019975536 --- src/arch/x86/intregfile.hh | 3 +- src/arch/x86/intregs.hh | 26 ++++++ .../isa/insts/arithmetic/multiply_and_divide.py | 94 ++++++++++++++-------- src/arch/x86/isa/microops/regop.isa | 93 +++++++++++---------- src/arch/x86/isa/operands.isa | 20 +++-- src/arch/x86/isa_traits.hh | 5 +- src/arch/x86/x86_traits.hh | 7 ++ 7 files changed, 155 insertions(+), 93 deletions(-) (limited to 'src') diff --git a/src/arch/x86/intregfile.hh b/src/arch/x86/intregfile.hh index be6242a41..b4d256a04 100644 --- a/src/arch/x86/intregfile.hh +++ b/src/arch/x86/intregfile.hh @@ -105,7 +105,8 @@ namespace X86ISA const int NumIntArchRegs = NUM_INTREGS; const int NumIntRegs = - NumIntArchRegs + NumMicroIntRegs + NumPseudoIntRegs; + NumIntArchRegs + NumMicroIntRegs + + NumPseudoIntRegs + NumImplicitIntRegs; class IntRegFile { diff --git a/src/arch/x86/intregs.hh b/src/arch/x86/intregs.hh index dbbb9f97e..6f252392e 100644 --- a/src/arch/x86/intregs.hh +++ b/src/arch/x86/intregs.hh @@ -58,6 +58,7 @@ #ifndef __ARCH_X86_INTREGS_HH__ #define __ARCH_X86_INTREGS_HH__ +#include "arch/x86/x86_traits.hh" #include "base/bitunion.hh" namespace X86ISA @@ -163,6 +164,31 @@ namespace X86ISA NUM_INTREGS }; + + inline static IntRegIndex + INTREG_MICRO(int index) + { + return (IntRegIndex)(NUM_INTREGS + index); + } + + inline static IntRegIndex + INTREG_PSEUDO(int index) + { + return (IntRegIndex)(NUM_INTREGS + NumMicroIntRegs + index); + } + + inline static IntRegIndex + INTREG_IMPLICIT(int index) + { + return (IntRegIndex)(NUM_INTREGS + NumMicroIntRegs + + NumPseudoIntRegs + index); + } + + inline static IntRegIndex + INTREG_FOLDED(int index, int foldBit) + { + return (IntRegIndex)(((index & 0x1C) == 4 ? foldBit : 0) | index); + } }; #endif // __ARCH_X86_INTREGS_HH__ diff --git a/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py b/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py index 5f75b8868..197de5fb3 100644 --- a/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py +++ b/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py @@ -61,20 +61,29 @@ microcode = ''' def macroop MUL_B_R { - mul1u rax, rax, reg, dataSize="2" + mul1u rax, reg + mulel rax + # Really ah + muleh rsi, flags=(OF,CF) }; def macroop MUL_B_M { ld t1, seg, sib, disp - mul1u rax, rax, t1, dataSize="2" + mul1u rax, t1 + mulel rax + # Really ah + muleh rsi, flags=(OF,CF) }; def macroop MUL_B_P { rdip t7 ld t1, seg, riprel, disp - mul1u rax, rax, t1, dataSize="2" + mul1u rax, t1 + mulel rax + # Really ah + muleh rsi, flags=(OF,CF) }; # @@ -83,27 +92,26 @@ def macroop MUL_B_P def macroop MUL_R { - # We need to store the result of the multiplication in a temporary - # and then move it later because reg may be rdx. If it is, we can't - # clobber its old value yet. - muleh t1, rax, reg - mulel rax, rax, reg - mov rdx, rdx, t1 + mul1u rax, reg + mulel rax + muleh rdx, flags=(OF,CF) }; def macroop MUL_M { ld t1, seg, sib, disp - muleh rdx, rax, t1 - mulel rax, rax, t1 + mul1u rax, t1 + mulel rax + muleh rdx, flags=(OF,CF) }; def macroop MUL_P { rdip t7 ld t1, seg, riprel, disp - muleh rdx, rax, t1 - mulel rax, rax, t1 + mul1u rax, t1 + mulel rax + muleh rdx, flags=(OF,CF) }; # @@ -112,20 +120,29 @@ def macroop MUL_P def macroop IMUL_B_R { - mul1s rax, rax, reg, dataSize="2" + mul1s rax, reg + mulel rax + # Really ah + muleh rsi, flags=(OF,CF) }; def macroop IMUL_B_M { ld t1, seg, sib, disp - mul1s rax, rax, t1, dataSize="2" + mul1s rax, t1 + mulel rax + # Really ah + muleh rsi, flags=(OF,CF) }; def macroop IMUL_B_P { rdip t7 ld t1, seg, riprel, disp - mul1s rax, rax, t1, dataSize="2" + mul1s rax, t1 + mulel rax + # Really ah + muleh rsi, flags=(OF,CF) }; # @@ -134,47 +151,50 @@ def macroop IMUL_B_P def macroop IMUL_R { - mulehs t1, rax, reg - mulel rax, rax, reg - mov rdx, rdx, t1 + mul1s rax, reg + mulel rax + muleh rdx, flags=(OF,CF) }; def macroop IMUL_M { ld t1, seg, sib, disp - mulehs rdx, rax, t1 - mulel rax, rax, t1 + mul1s rax, t1 + mulel rax + muleh rdx, flags=(OF,CF) }; def macroop IMUL_P { rdip t7 ld t1, seg, riprel, disp - mulehs rdx, rax, t1 - mulel rax, rax, t1 + mul1s rax, t1 + mulel rax + muleh rdx, flags=(OF,CF) }; -# -# Two operand signed multiply. These should set the CF and OF flags if the -# result is too large for the destination register -# - def macroop IMUL_R_R { - mulel reg, reg, regm + mul1s reg, regm + mulel reg + muleh t0, flags=(CF,OF) }; def macroop IMUL_R_M { ld t1, seg, sib, disp - mulel reg, reg, t1 + mul1s reg, t1 + mulel reg + muleh t0, flags=(CF,OF) }; def macroop IMUL_R_P { rdip t7 ld t1, seg, riprel, disp - mulel reg, reg, t1 + mul1s reg, t1 + mulel reg + muleh t0, flags=(CF,OF) }; # @@ -184,14 +204,18 @@ def macroop IMUL_R_P def macroop IMUL_R_R_I { limm t1, imm - mulel reg, regm, t1 + mul1s regm, t1 + mulel reg + muleh t0, flags=(OF,CF) }; def macroop IMUL_R_M_I { limm t1, imm ld t2, seg, sib, disp - mulel reg, t2, t1 + mul1s t2, t1 + mulel reg + muleh t0, flags=(OF,CF) }; def macroop IMUL_R_P_I @@ -199,7 +223,9 @@ def macroop IMUL_R_P_I rdip t7 limm t1, imm ld t2, seg, riprel - mulel reg, t2, t1 + mul1s t2, t1 + mulel reg + muleh t0, flags=(OF,CF) }; # diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index 28689c84b..7ce9dc27f 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -421,6 +421,18 @@ let {{ abstract = True cond_check = "checkCondition(ccFlagBits)" + class RdRegOp(RegOp): + abstract = True + def __init__(self, dest, src1=None, dataSize="env.dataSize"): + if not src1: + src1 = dest + super(RdRegOp, self).__init__(dest, src1, "NUM_INTREGS", None, dataSize) + + class WrRegOp(RegOp): + abstract = True + def __init__(self, src1, src2, flags=None, dataSize="env.dataSize"): + super(WrRegOp, self).__init__("NUM_INTREGS", src1, src2, flags, dataSize) + class Add(FlagRegOp): code = 'DestReg = merge(DestReg, psrc1 + op2, dataSize);' @@ -448,57 +460,52 @@ let {{ class Xor(LogicRegOp): code = 'DestReg = merge(DestReg, psrc1 ^ op2, dataSize)' - class Mul1s(FlagRegOp): + class Mul1s(WrRegOp): code = ''' - int signPos = (dataSize * 8) / 2 - 1; - IntReg srcVal1 = psrc1 | (-bits(psrc1, signPos) << signPos); - IntReg srcVal2 = op2 | (-bits(psrc1, signPos) << signPos); - DestReg = merge(DestReg, srcVal1 * srcVal2, dataSize) + ProdLow = psrc1 * op2; + int halfSize = (dataSize * 8) / 2; + int64_t spsrc1_h = spsrc1 >> halfSize; + int64_t spsrc1_l = spsrc1 & mask(halfSize); + int64_t spsrc2_h = sop2 >> halfSize; + int64_t spsrc2_l = sop2 & mask(halfSize); + ProdHi = ((spsrc1_l * spsrc2_h + spsrc1_h * spsrc2_l + + ((spsrc1_l * spsrc2_l) >> halfSize)) >> halfSize) + + spsrc1_h * spsrc2_h; ''' - class Mul1u(FlagRegOp): + class Mul1u(WrRegOp): code = ''' + ProdLow = psrc1 * op2; int halfSize = (dataSize * 8) / 2; - IntReg srcVal1 = psrc1 & mask(halfSize); - IntReg srcVal2 = op2 & mask(halfSize); - DestReg = merge(DestReg, srcVal1 * srcVal2, dataSize) + uint64_t psrc1_h = psrc1 >> halfSize; + uint64_t psrc1_l = psrc1 & mask(halfSize); + uint64_t psrc2_h = op2 >> halfSize; + uint64_t psrc2_l = op2 & mask(halfSize); + ProdHi = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l + + ((psrc1_l * psrc2_l) >> halfSize)) >> halfSize) + + psrc1_h * psrc2_h; ''' - class Mulel(FlagRegOp): - code = 'DestReg = merge(DestReg, psrc1 * op2, dataSize);' + class Mulel(RdRegOp): + code = 'DestReg = merge(SrcReg1, ProdLow, dataSize);' # Neither of these is quite correct because it assumes that right shifting # a signed or unsigned value does sign or zero extension respectively. # The C standard says that what happens on a right shift with a 1 in the # MSB position is undefined. On x86 and under likely most compilers the # "right thing" happens, but this isn't a guarantee. - class Muleh(FlagRegOp): - code = ''' - int halfSize = (dataSize * 8) / 2; - uint64_t psrc1_h = psrc1 >> halfSize; - uint64_t psrc1_l = psrc1 & mask(halfSize); - uint64_t psrc2_h = op2 >> halfSize; - uint64_t psrc2_l = op2 & mask(halfSize); - uint64_t result = - ((psrc1_l * psrc2_h + psrc1_h * psrc2_l + - ((psrc1_l * psrc2_l) >> halfSize)) >> halfSize) + - psrc1_h * psrc2_h; - DestReg = merge(DestReg, result, dataSize); - ''' - - class Mulehs(FlagRegOp): - code = ''' - int halfSize = (dataSize * 8) / 2; - int64_t spsrc1_h = spsrc1 >> halfSize; - int64_t spsrc1_l = spsrc1 & mask(halfSize); - int64_t spsrc2_h = sop2 >> halfSize; - int64_t spsrc2_l = sop2 & mask(halfSize); - int64_t result = - ((spsrc1_l * spsrc2_h + spsrc1_h * spsrc2_l + - ((spsrc1_l * spsrc2_l) >> halfSize)) >> halfSize) + - spsrc1_h * spsrc2_h; - DestReg = merge(DestReg, result, dataSize); - ''' + class Muleh(RdRegOp): + def __init__(self, dest, src1=None, flags=None, dataSize="env.dataSize"): + if not src1: + src1 = dest + super(RdRegOp, self).__init__(dest, src1, "NUM_INTREGS", flags, dataSize) + code = 'DestReg = merge(SrcReg1, ProdHi, dataSize);' + flag_code = ''' + if (ProdHi) + ccFlagBits = ccFlagBits | (ext & (CFBit | OFBit | ECFBit)); + else + ccFlagBits = ccFlagBits & ~(ext & (CFBit | OFBit | ECFBit)); + ''' class Div1(FlagRegOp): code = ''' @@ -613,11 +620,6 @@ let {{ DestReg = DestReg; ''' - class WrRegOp(RegOp): - abstract = True - def __init__(self, src1, src2, flags=None, dataSize="env.dataSize"): - super(WrRegOp, self).__init__("NUM_INTREGS", src1, src2, flags, dataSize) - class Wrip(WrRegOp, CondRegOp): code = 'RIP = psrc1 + op2' else_code="RIP = RIP;" @@ -629,11 +631,6 @@ let {{ class Wruflags(WrRegOp): code = 'ccFlagBits = psrc1 ^ op2' - class RdRegOp(RegOp): - abstract = True - def __init__(self, dest, src1 = "NUM_INTREGS", dataSize="env.dataSize"): - super(RdRegOp, self).__init__(dest, src1, "NUM_INTREGS", None, dataSize) - class Rdip(RdRegOp): code = 'DestReg = RIP' diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa index 347322752..fae1aa5ca 100644 --- a/src/arch/x86/isa/operands.isa +++ b/src/arch/x86/isa/operands.isa @@ -95,13 +95,17 @@ def operand_types {{ }}; def operands {{ - 'SrcReg1': ('IntReg', 'uqw', '(((src1 & 0x1C) == 4 ? foldOBit : 0) | src1)', 'IsInteger', 1), - 'SrcReg2': ('IntReg', 'uqw', '(((src2 & 0x1C) == 4 ? foldOBit : 0) | src2)', 'IsInteger', 2), - 'Index': ('IntReg', 'uqw', '(((index & 0x1C) == 4 ? foldABit : 0) | index)', 'IsInteger', 3), - 'Base': ('IntReg', 'uqw', '(((base & 0x1C) == 4 ? foldABit : 0) | base)', 'IsInteger', 4), - 'DestReg': ('IntReg', 'uqw', '(((dest & 0x1C) == 4 ? foldOBit : 0) | dest)', 'IsInteger', 5), - 'Data': ('IntReg', 'uqw', '(((data & 0x1C) == 4 ? foldOBit : 0) | data)', 'IsInteger', 6), - 'rax': ('IntReg', 'uqw', '(INTREG_RAX)', 'IsInteger', 7), + 'SrcReg1': ('IntReg', 'uqw', 'INTREG_FOLDED(src1, foldOBit)', 'IsInteger', 1), + 'SrcReg2': ('IntReg', 'uqw', 'INTREG_FOLDED(src2, foldOBit)', 'IsInteger', 2), + 'Index': ('IntReg', 'uqw', 'INTREG_FOLDED(index, foldABit)', 'IsInteger', 3), + 'Base': ('IntReg', 'uqw', 'INTREG_FOLDED(base, foldABit)', 'IsInteger', 4), + 'DestReg': ('IntReg', 'uqw', 'INTREG_FOLDED(dest, foldOBit)', 'IsInteger', 5), + 'Data': ('IntReg', 'uqw', 'INTREG_FOLDED(data, foldOBit)', 'IsInteger', 6), + 'ProdLow': ('IntReg', 'uqw', 'INTREG_IMPLICIT(0)', 'IsInteger', 7), + 'ProdHi': ('IntReg', 'uqw', 'INTREG_IMPLICIT(1)', 'IsInteger', 8), + 'Quotient': ('IntReg', 'uqw', 'INTREG_IMPLICIT(2)', 'IsInteger', 9), + 'Remainder': ('IntReg', 'uqw', 'INTREG_IMPLICIT(3)', 'IsInteger', 10), + 'rax': ('IntReg', 'uqw', '(INTREG_RAX)', 'IsInteger', 11), 'FpSrcReg1': ('FloatReg', 'df', 'src1', 'IsFloating', 20), 'FpSrcReg2': ('FloatReg', 'df', 'src2', 'IsFloating', 21), 'FpDestReg': ('FloatReg', 'df', 'dest', 'IsFloating', 22), @@ -109,7 +113,7 @@ def operands {{ 'RIP': ('NPC', 'uqw', None, (None, None, 'IsControl'), 50), 'uIP': ('UPC', 'uqw', None, (None, None, 'IsControl'), 51), 'nuIP': ('NUPC', 'uqw', None, (None, None, 'IsControl'), 52), - 'ccFlagBits': ('IntReg', 'uqw', 'NUM_INTREGS + NumMicroIntRegs', None, 60), + 'ccFlagBits': ('IntReg', 'uqw', 'INTREG_PSEUDO(0)', None, 60), 'SegBase': ('ControlReg', 'uqw', 'MISCREG_SEG_BASE(segment)', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 70), 'Mem': ('Mem', 'uqw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100) }}; diff --git a/src/arch/x86/isa_traits.hh b/src/arch/x86/isa_traits.hh index 466422ced..602c99390 100644 --- a/src/arch/x86/isa_traits.hh +++ b/src/arch/x86/isa_traits.hh @@ -81,8 +81,9 @@ namespace X86ISA // These enumerate all the registers for dependence tracking. enum DependenceTags { - //There are 16 microcode registers at the moment - FP_Base_DepTag = 1 << 7, + //There are 16 microcode registers at the moment. This is an + //unusually large constant to make sure there isn't overflow. + FP_Base_DepTag = 128, Ctrl_Base_DepTag = FP_Base_DepTag + //mmx/x87 registers diff --git a/src/arch/x86/x86_traits.hh b/src/arch/x86/x86_traits.hh index aa5b959d1..9ea8eaef4 100644 --- a/src/arch/x86/x86_traits.hh +++ b/src/arch/x86/x86_traits.hh @@ -61,7 +61,14 @@ namespace X86ISA { const int NumMicroIntRegs = 16; + const int NumPseudoIntRegs = 1; + //1. The condition code bits of the rflags register. + const int NumImplicitIntRegs = 4; + //1. The lower part of the result of multiplication. + //2. The upper part of the result of multiplication. + //3. The quotient from division + //4. The remainder from division const int NumMMXRegs = 8; const int NumXMMRegs = 16; -- cgit v1.2.3