summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGabe Black <gabeblack@google.com>2017-05-15 19:39:51 -0700
committerAnthony Gutierrez <anthony.gutierrez@amd.com>2017-05-16 20:02:03 +0000
commitc1ec4c4f8c22864e6e6c0d5a6d833f413d3a58d7 (patch)
tree671a151f495a9f277a336dae0cc0e2e3992d1050 /src
parent05c486c5ebf95cffc7435c16c45120a28c42b2fb (diff)
downloadgem5-c1ec4c4f8c22864e6e6c0d5a6d833f413d3a58d7.tar.xz
x86: Fix the multiplication microops.
If the operands were 64 bit, an intermediate calculation could lose a carry bit. This change rearranges that intermediate calculation if the operand width is large, and reworks the microop implementation in general in an attempt to make it easier to understand. Change-Id: Ib36333f3f2695a33cd9623e43682de22ebd2e7ea Reviewed-on: https://gem5-review.googlesource.com/3381 Reviewed-by: Jason Lowe-Power <jason@lowepower.com> Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Diffstat (limited to 'src')
-rw-r--r--src/arch/x86/isa/microops/regop.isa80
1 files changed, 58 insertions, 22 deletions
diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa
index ef0c4cb18..dc5f0affe 100644
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@@ -546,23 +546,42 @@ let {{
class Mul1s(WrRegOp):
op_class = 'IntMultOp'
+ # Multiply two values Aa and Bb where Aa = A << p + a, then correct for
+ # negative operands.
+ # Aa * Bb
+ # = (A << p + a) * (B << p + b)
+ # = (A * B) << 2p + (A * b + a * B) << p + a * b
code = '''
ProdLow = psrc1 * op2;
- int halfSize = (dataSize * 8) / 2;
- uint64_t shifter = (ULL(1) << halfSize);
- uint64_t hiResult;
- uint64_t psrc1_h = psrc1 / shifter;
- uint64_t psrc1_l = psrc1 & mask(halfSize);
- uint64_t psrc2_h = (op2 / shifter) & mask(halfSize);
- uint64_t psrc2_l = op2 & mask(halfSize);
- hiResult = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
- ((psrc1_l * psrc2_l) / shifter)) /shifter) +
- psrc1_h * psrc2_h;
+
+ int p = (dataSize * 8) / 2;
+ uint64_t A = bits(psrc1, 2 * p - 1, p);
+ uint64_t a = bits(psrc1, p - 1, 0);
+ uint64_t B = bits<uint64_t>(op2, 2 * p - 1, p);
+ uint64_t b = bits<uint64_t>(op2, p - 1, 0);
+
+ uint64_t c1, c2; // Carry between place values.
+ uint64_t ab = a * b, Ab = A * b, aB = a * B, AB = A * B;
+
+ c1 = ab >> p;
+
+ // Be careful to avoid overflow if p is large.
+ if (p == 32) {
+ c2 = (c1 >> 1) + (Ab >> 1) + (aB >> 1);
+ c2 += ((c1 & 0x1) + (Ab & 0x1) + (aB & 0x1)) >> 1;
+ c2 >>= (p - 1);
+ } else {
+ c2 = (c1 + Ab + aB) >> p;
+ }
+
+ uint64_t hi = AB + c2;
+
if (bits(psrc1, dataSize * 8 - 1))
- hiResult -= op2;
+ hi -= op2;
if (bits(op2, dataSize * 8 - 1))
- hiResult -= psrc1;
- ProdHi = hiResult;
+ hi -= psrc1;
+
+ ProdHi = hi;
'''
flag_code = '''
if ((-ProdHi & mask(dataSize * 8)) !=
@@ -578,17 +597,34 @@ let {{
class Mul1u(WrRegOp):
op_class = 'IntMultOp'
+ # Multiply two values Aa and Bb where Aa = A << p + a.
+ # Aa * Bb
+ # = (A << p + a) * (B << p + b)
+ # = (A * B) << 2p + (A * b + a * B) << p + a * b
code = '''
ProdLow = psrc1 * op2;
- int halfSize = (dataSize * 8) / 2;
- uint64_t shifter = (ULL(1) << halfSize);
- uint64_t psrc1_h = psrc1 / shifter;
- uint64_t psrc1_l = psrc1 & mask(halfSize);
- uint64_t psrc2_h = (op2 / shifter) & mask(halfSize);
- uint64_t psrc2_l = op2 & mask(halfSize);
- ProdHi = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
- ((psrc1_l * psrc2_l) / shifter)) / shifter) +
- psrc1_h * psrc2_h;
+
+ int p = (dataSize * 8) / 2;
+ uint64_t A = bits(psrc1, 2 * p - 1, p);
+ uint64_t a = bits(psrc1, p - 1, 0);
+ uint64_t B = bits<uint64_t>(op2, 2 * p - 1, p);
+ uint64_t b = bits<uint64_t>(op2, p - 1, 0);
+
+ uint64_t c1, c2; // Carry between place values.
+ uint64_t ab = a * b, Ab = A * b, aB = a * B, AB = A * B;
+
+ c1 = ab >> p;
+
+ // Be careful to avoid overflow if p is large.
+ if (p == 32) {
+ c2 = (c1 >> 1) + (Ab >> 1) + (aB >> 1);
+ c2 += ((c1 & 0x1) + (Ab & 0x1) + (aB & 0x1)) >> 1;
+ c2 >>= (p - 1);
+ } else {
+ c2 = (c1 + Ab + aB) >> p;
+ }
+
+ ProdHi = AB + c2;
'''
flag_code = '''
if (ProdHi) {