summaryrefslogtreecommitdiff
path: root/src/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'src/arch/x86')
-rw-r--r--src/arch/x86/isa/microops/regop.isa80
1 files changed, 58 insertions, 22 deletions
diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa
index ef0c4cb18..dc5f0affe 100644
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@@ -546,23 +546,42 @@ let {{
class Mul1s(WrRegOp):
op_class = 'IntMultOp'
+ # Multiply two values Aa and Bb where Aa = A << p + a, then correct for
+ # negative operands.
+ # Aa * Bb
+ # = (A << p + a) * (B << p + b)
+ # = (A * B) << 2p + (A * b + a * B) << p + a * b
code = '''
ProdLow = psrc1 * op2;
- int halfSize = (dataSize * 8) / 2;
- uint64_t shifter = (ULL(1) << halfSize);
- uint64_t hiResult;
- uint64_t psrc1_h = psrc1 / shifter;
- uint64_t psrc1_l = psrc1 & mask(halfSize);
- uint64_t psrc2_h = (op2 / shifter) & mask(halfSize);
- uint64_t psrc2_l = op2 & mask(halfSize);
- hiResult = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
- ((psrc1_l * psrc2_l) / shifter)) /shifter) +
- psrc1_h * psrc2_h;
+
+ int p = (dataSize * 8) / 2;
+ uint64_t A = bits(psrc1, 2 * p - 1, p);
+ uint64_t a = bits(psrc1, p - 1, 0);
+ uint64_t B = bits<uint64_t>(op2, 2 * p - 1, p);
+ uint64_t b = bits<uint64_t>(op2, p - 1, 0);
+
+ uint64_t c1, c2; // Carry between place values.
+ uint64_t ab = a * b, Ab = A * b, aB = a * B, AB = A * B;
+
+ c1 = ab >> p;
+
+ // Be careful to avoid overflow if p is large.
+ if (p == 32) {
+ c2 = (c1 >> 1) + (Ab >> 1) + (aB >> 1);
+ c2 += ((c1 & 0x1) + (Ab & 0x1) + (aB & 0x1)) >> 1;
+ c2 >>= (p - 1);
+ } else {
+ c2 = (c1 + Ab + aB) >> p;
+ }
+
+ uint64_t hi = AB + c2;
+
if (bits(psrc1, dataSize * 8 - 1))
- hiResult -= op2;
+ hi -= op2;
if (bits(op2, dataSize * 8 - 1))
- hiResult -= psrc1;
- ProdHi = hiResult;
+ hi -= psrc1;
+
+ ProdHi = hi;
'''
flag_code = '''
if ((-ProdHi & mask(dataSize * 8)) !=
@@ -578,17 +597,34 @@ let {{
class Mul1u(WrRegOp):
op_class = 'IntMultOp'
+ # Multiply two values Aa and Bb where Aa = A << p + a.
+ # Aa * Bb
+ # = (A << p + a) * (B << p + b)
+ # = (A * B) << 2p + (A * b + a * B) << p + a * b
code = '''
ProdLow = psrc1 * op2;
- int halfSize = (dataSize * 8) / 2;
- uint64_t shifter = (ULL(1) << halfSize);
- uint64_t psrc1_h = psrc1 / shifter;
- uint64_t psrc1_l = psrc1 & mask(halfSize);
- uint64_t psrc2_h = (op2 / shifter) & mask(halfSize);
- uint64_t psrc2_l = op2 & mask(halfSize);
- ProdHi = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
- ((psrc1_l * psrc2_l) / shifter)) / shifter) +
- psrc1_h * psrc2_h;
+
+ int p = (dataSize * 8) / 2;
+ uint64_t A = bits(psrc1, 2 * p - 1, p);
+ uint64_t a = bits(psrc1, p - 1, 0);
+ uint64_t B = bits<uint64_t>(op2, 2 * p - 1, p);
+ uint64_t b = bits<uint64_t>(op2, p - 1, 0);
+
+ uint64_t c1, c2; // Carry between place values.
+ uint64_t ab = a * b, Ab = A * b, aB = a * B, AB = A * B;
+
+ c1 = ab >> p;
+
+ // Be careful to avoid overflow if p is large.
+ if (p == 32) {
+ c2 = (c1 >> 1) + (Ab >> 1) + (aB >> 1);
+ c2 += ((c1 & 0x1) + (Ab & 0x1) + (aB & 0x1)) >> 1;
+ c2 >>= (p - 1);
+ } else {
+ c2 = (c1 + Ab + aB) >> p;
+ }
+
+ ProdHi = AB + c2;
'''
flag_code = '''
if (ProdHi) {