From c1ec4c4f8c22864e6e6c0d5a6d833f413d3a58d7 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 15 May 2017 19:39:51 -0700 Subject: x86: Fix the multiplication microops. If the operands were 64 bit, an intermediate calculation could lose a carry bit. This change rearranges that intermediate calculation if the operand width is large, and reworks the microop implementation in general in an attempt to make it easier to understand. Change-Id: Ib36333f3f2695a33cd9623e43682de22ebd2e7ea Reviewed-on: https://gem5-review.googlesource.com/3381 Reviewed-by: Jason Lowe-Power Reviewed-by: Anthony Gutierrez Maintainer: Anthony Gutierrez --- src/arch/x86/isa/microops/regop.isa | 80 +++++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 22 deletions(-) (limited to 'src/arch/x86') diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index ef0c4cb18..dc5f0affe 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -546,23 +546,42 @@ let {{ class Mul1s(WrRegOp): op_class = 'IntMultOp' + # Multiply two values Aa and Bb where Aa = A << p + a, then correct for + # negative operands. + # Aa * Bb + # = (A << p + a) * (B << p + b) + # = (A * B) << 2p + (A * b + a * B) << p + a * b code = ''' ProdLow = psrc1 * op2; - int halfSize = (dataSize * 8) / 2; - uint64_t shifter = (ULL(1) << halfSize); - uint64_t hiResult; - uint64_t psrc1_h = psrc1 / shifter; - uint64_t psrc1_l = psrc1 & mask(halfSize); - uint64_t psrc2_h = (op2 / shifter) & mask(halfSize); - uint64_t psrc2_l = op2 & mask(halfSize); - hiResult = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l + - ((psrc1_l * psrc2_l) / shifter)) /shifter) + - psrc1_h * psrc2_h; + + int p = (dataSize * 8) / 2; + uint64_t A = bits(psrc1, 2 * p - 1, p); + uint64_t a = bits(psrc1, p - 1, 0); + uint64_t B = bits(op2, 2 * p - 1, p); + uint64_t b = bits(op2, p - 1, 0); + + uint64_t c1, c2; // Carry between place values. + uint64_t ab = a * b, Ab = A * b, aB = a * B, AB = A * B; + + c1 = ab >> p; + + // Be careful to avoid overflow if p is large. + if (p == 32) { + c2 = (c1 >> 1) + (Ab >> 1) + (aB >> 1); + c2 += ((c1 & 0x1) + (Ab & 0x1) + (aB & 0x1)) >> 1; + c2 >>= (p - 1); + } else { + c2 = (c1 + Ab + aB) >> p; + } + + uint64_t hi = AB + c2; + if (bits(psrc1, dataSize * 8 - 1)) - hiResult -= op2; + hi -= op2; if (bits(op2, dataSize * 8 - 1)) - hiResult -= psrc1; - ProdHi = hiResult; + hi -= psrc1; + + ProdHi = hi; ''' flag_code = ''' if ((-ProdHi & mask(dataSize * 8)) != @@ -578,17 +597,34 @@ let {{ class Mul1u(WrRegOp): op_class = 'IntMultOp' + # Multiply two values Aa and Bb where Aa = A << p + a. + # Aa * Bb + # = (A << p + a) * (B << p + b) + # = (A * B) << 2p + (A * b + a * B) << p + a * b code = ''' ProdLow = psrc1 * op2; - int halfSize = (dataSize * 8) / 2; - uint64_t shifter = (ULL(1) << halfSize); - uint64_t psrc1_h = psrc1 / shifter; - uint64_t psrc1_l = psrc1 & mask(halfSize); - uint64_t psrc2_h = (op2 / shifter) & mask(halfSize); - uint64_t psrc2_l = op2 & mask(halfSize); - ProdHi = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l + - ((psrc1_l * psrc2_l) / shifter)) / shifter) + - psrc1_h * psrc2_h; + + int p = (dataSize * 8) / 2; + uint64_t A = bits(psrc1, 2 * p - 1, p); + uint64_t a = bits(psrc1, p - 1, 0); + uint64_t B = bits(op2, 2 * p - 1, p); + uint64_t b = bits(op2, p - 1, 0); + + uint64_t c1, c2; // Carry between place values. + uint64_t ab = a * b, Ab = A * b, aB = a * B, AB = A * B; + + c1 = ab >> p; + + // Be careful to avoid overflow if p is large. + if (p == 32) { + c2 = (c1 >> 1) + (Ab >> 1) + (aB >> 1); + c2 += ((c1 & 0x1) + (Ab & 0x1) + (aB & 0x1)) >> 1; + c2 >>= (p - 1); + } else { + c2 = (c1 + Ab + aB) >> p; + } + + ProdHi = AB + c2; ''' flag_code = ''' if (ProdHi) { -- cgit v1.2.3