diff options
Diffstat (limited to 'src/arch')
-rw-r--r-- | src/arch/x86/isa/microops/regop.isa | 80 |
1 files changed, 58 insertions, 22 deletions
diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index ef0c4cb18..dc5f0affe 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -546,23 +546,42 @@ let {{ class Mul1s(WrRegOp): op_class = 'IntMultOp' + # Multiply two values Aa and Bb where Aa = A << p + a, then correct for + # negative operands. + # Aa * Bb + # = (A << p + a) * (B << p + b) + # = (A * B) << 2p + (A * b + a * B) << p + a * b code = ''' ProdLow = psrc1 * op2; - int halfSize = (dataSize * 8) / 2; - uint64_t shifter = (ULL(1) << halfSize); - uint64_t hiResult; - uint64_t psrc1_h = psrc1 / shifter; - uint64_t psrc1_l = psrc1 & mask(halfSize); - uint64_t psrc2_h = (op2 / shifter) & mask(halfSize); - uint64_t psrc2_l = op2 & mask(halfSize); - hiResult = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l + - ((psrc1_l * psrc2_l) / shifter)) /shifter) + - psrc1_h * psrc2_h; + + int p = (dataSize * 8) / 2; + uint64_t A = bits(psrc1, 2 * p - 1, p); + uint64_t a = bits(psrc1, p - 1, 0); + uint64_t B = bits<uint64_t>(op2, 2 * p - 1, p); + uint64_t b = bits<uint64_t>(op2, p - 1, 0); + + uint64_t c1, c2; // Carry between place values. + uint64_t ab = a * b, Ab = A * b, aB = a * B, AB = A * B; + + c1 = ab >> p; + + // Be careful to avoid overflow if p is large. + if (p == 32) { + c2 = (c1 >> 1) + (Ab >> 1) + (aB >> 1); + c2 += ((c1 & 0x1) + (Ab & 0x1) + (aB & 0x1)) >> 1; + c2 >>= (p - 1); + } else { + c2 = (c1 + Ab + aB) >> p; + } + + uint64_t hi = AB + c2; + if (bits(psrc1, dataSize * 8 - 1)) - hiResult -= op2; + hi -= op2; if (bits(op2, dataSize * 8 - 1)) - hiResult -= psrc1; - ProdHi = hiResult; + hi -= psrc1; + + ProdHi = hi; ''' flag_code = ''' if ((-ProdHi & mask(dataSize * 8)) != @@ -578,17 +597,34 @@ let {{ class Mul1u(WrRegOp): op_class = 'IntMultOp' + # Multiply two values Aa and Bb where Aa = A << p + a. + # Aa * Bb + # = (A << p + a) * (B << p + b) + # = (A * B) << 2p + (A * b + a * B) << p + a * b code = ''' ProdLow = psrc1 * op2; - int halfSize = (dataSize * 8) / 2; - uint64_t shifter = (ULL(1) << halfSize); - uint64_t psrc1_h = psrc1 / shifter; - uint64_t psrc1_l = psrc1 & mask(halfSize); - uint64_t psrc2_h = (op2 / shifter) & mask(halfSize); - uint64_t psrc2_l = op2 & mask(halfSize); - ProdHi = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l + - ((psrc1_l * psrc2_l) / shifter)) / shifter) + - psrc1_h * psrc2_h; + + int p = (dataSize * 8) / 2; + uint64_t A = bits(psrc1, 2 * p - 1, p); + uint64_t a = bits(psrc1, p - 1, 0); + uint64_t B = bits<uint64_t>(op2, 2 * p - 1, p); + uint64_t b = bits<uint64_t>(op2, p - 1, 0); + + uint64_t c1, c2; // Carry between place values. + uint64_t ab = a * b, Ab = A * b, aB = a * B, AB = A * B; + + c1 = ab >> p; + + // Be careful to avoid overflow if p is large. + if (p == 32) { + c2 = (c1 >> 1) + (Ab >> 1) + (aB >> 1); + c2 += ((c1 & 0x1) + (Ab & 0x1) + (aB & 0x1)) >> 1; + c2 >>= (p - 1); + } else { + c2 = (c1 + Ab + aB) >> p; + } + + ProdHi = AB + c2; ''' flag_code = ''' if (ProdHi) { |