summaryrefslogtreecommitdiff
path: root/src/arch/arm/insts/vfp.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/arch/arm/insts/vfp.cc')
-rw-r--r--src/arch/arm/insts/vfp.cc484
1 files changed, 253 insertions, 231 deletions
diff --git a/src/arch/arm/insts/vfp.cc b/src/arch/arm/insts/vfp.cc
index ca0f58226..03fdc83fa 100644
--- a/src/arch/arm/insts/vfp.cc
+++ b/src/arch/arm/insts/vfp.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -46,6 +46,37 @@
*/
std::string
+FpCondCompRegOp::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, op1);
+ ccprintf(ss, ", ");
+ printReg(ss, op2);
+ ccprintf(ss, ", #%d", defCc);
+ ccprintf(ss, ", ");
+ printCondition(ss, condCode, true);
+ return ss.str();
+}
+
+std::string
+FpCondSelOp::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, op1);
+ ccprintf(ss, ", ");
+ printReg(ss, op2);
+ ccprintf(ss, ", ");
+ printCondition(ss, condCode, true);
+ return ss.str();
+}
+
+std::string
FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
@@ -92,6 +123,21 @@ FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
}
std::string
+FpRegRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss);
+ printReg(ss, dest + FP_Reg_Base);
+ ss << ", ";
+ printReg(ss, op1 + FP_Reg_Base);
+ ss << ", ";
+ printReg(ss, op2 + FP_Reg_Base);
+ ss << ", ";
+ printReg(ss, op3 + FP_Reg_Base);
+ return ss.str();
+}
+
+std::string
FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
@@ -131,24 +177,25 @@ prepFpState(uint32_t rMode)
}
void
-finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush)
+finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
{
int exceptions = fetestexcept(FeAllExceptions);
bool underflow = false;
- if (exceptions & FeInvalid) {
+ if ((exceptions & FeInvalid) && mask.ioc) {
fpscr.ioc = 1;
}
- if (exceptions & FeDivByZero) {
+ if ((exceptions & FeDivByZero) && mask.dzc) {
fpscr.dzc = 1;
}
- if (exceptions & FeOverflow) {
+ if ((exceptions & FeOverflow) && mask.ofc) {
fpscr.ofc = 1;
}
if (exceptions & FeUnderflow) {
underflow = true;
- fpscr.ufc = 1;
+ if (mask.ufc)
+ fpscr.ufc = 1;
}
- if ((exceptions & FeInexact) && !(underflow && flush)) {
+ if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) {
fpscr.ixc = 1;
}
fesetround(state);
@@ -329,19 +376,33 @@ fixFpSFpDDest(FPSCR fpscr, float val)
return mid;
}
-uint16_t
-vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
- uint32_t rMode, bool ahp, float op)
+static inline uint16_t
+vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan,
+ uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)
{
- uint32_t opBits = fpToBits(op);
+ uint32_t mWidth;
+ uint32_t eWidth;
+ uint32_t eHalfRange;
+ uint32_t sBitPos;
+
+ if (isDouble) {
+ mWidth = 52;
+ eWidth = 11;
+ } else {
+ mWidth = 23;
+ eWidth = 8;
+ }
+ sBitPos = eWidth + mWidth;
+ eHalfRange = (1 << (eWidth-1)) - 1;
+
// Extract the operand.
- bool neg = bits(opBits, 31);
- uint32_t exponent = bits(opBits, 30, 23);
- uint32_t oldMantissa = bits(opBits, 22, 0);
- uint32_t mantissa = oldMantissa >> (23 - 10);
+ bool neg = bits(opBits, sBitPos);
+ uint32_t exponent = bits(opBits, sBitPos-1, mWidth);
+ uint64_t oldMantissa = bits(opBits, mWidth-1, 0);
+ uint32_t mantissa = oldMantissa >> (mWidth - 10);
// Do the conversion.
- uint32_t extra = oldMantissa & mask(23 - 10);
- if (exponent == 0xff) {
+ uint64_t extra = oldMantissa & mask(mWidth - 10);
+ if (exponent == mask(eWidth)) {
if (oldMantissa != 0) {
// Nans.
if (bits(mantissa, 9) == 0) {
@@ -379,7 +440,6 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
if (exponent == 0) {
// Denormalized.
-
// If flush to zero is on, this shouldn't happen.
assert(!flush);
@@ -407,13 +467,13 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
// We need to track the dropped bits differently since
// more can be dropped by denormalizing.
- bool topOne = bits(extra, 12);
- bool restZeros = bits(extra, 11, 0) == 0;
+ bool topOne = bits(extra, mWidth - 10 - 1);
+ bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0;
- if (exponent <= (127 - 15)) {
+ if (exponent <= (eHalfRange - 15)) {
// The result is too small. Denormalize.
mantissa |= (1 << 10);
- while (mantissa && exponent <= (127 - 15)) {
+ while (mantissa && exponent <= (eHalfRange - 15)) {
restZeros = restZeros && !topOne;
topOne = bits(mantissa, 0);
mantissa = mantissa >> 1;
@@ -424,7 +484,7 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
exponent = 0;
} else {
// Change bias.
- exponent -= (127 - 15);
+ exponent -= (eHalfRange - 15);
}
if (exponent == 0 && (inexact || fpscr.ufe)) {
@@ -488,155 +548,115 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
return result;
}
-float
-vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
+uint16_t
+vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
+ uint32_t rMode, bool ahp, float op)
{
- float junk = 0.0;
+ uint64_t opBits = fpToBits(op);
+ return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false);
+}
+
+uint16_t
+vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
+ uint32_t rMode, bool ahp, double op)
+{
+ uint64_t opBits = fpToBits(op);
+ return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true);
+}
+
+static inline uint64_t
+vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble)
+{
+ uint32_t mWidth;
+ uint32_t eWidth;
+ uint32_t eHalfRange;
+ uint32_t sBitPos;
+
+ if (isDouble) {
+ mWidth = 52;
+ eWidth = 11;
+ } else {
+ mWidth = 23;
+ eWidth = 8;
+ }
+ sBitPos = eWidth + mWidth;
+ eHalfRange = (1 << (eWidth-1)) - 1;
+
// Extract the bitfields.
bool neg = bits(op, 15);
uint32_t exponent = bits(op, 14, 10);
- uint32_t mantissa = bits(op, 9, 0);
+ uint64_t mantissa = bits(op, 9, 0);
// Do the conversion.
if (exponent == 0) {
if (mantissa != 0) {
// Normalize the value.
- exponent = exponent + (127 - 15) + 1;
+ exponent = exponent + (eHalfRange - 15) + 1;
while (mantissa < (1 << 10)) {
mantissa = mantissa << 1;
exponent--;
}
}
- mantissa = mantissa << (23 - 10);
+ mantissa = mantissa << (mWidth - 10);
} else if (exponent == 0x1f && !ahp) {
// Infinities and nans.
- exponent = 0xff;
+ exponent = mask(eWidth);
if (mantissa != 0) {
// Nans.
- mantissa = mantissa << (23 - 10);
- if (bits(mantissa, 22) == 0) {
+ mantissa = mantissa << (mWidth - 10);
+ if (bits(mantissa, mWidth-1) == 0) {
// Signalling nan.
fpscr.ioc = 1;
- mantissa |= (1 << 22);
+ mantissa |= (((uint64_t) 1) << (mWidth-1));
}
if (defaultNan) {
- mantissa &= ~mask(22);
+ mantissa &= ~mask(mWidth-1);
neg = false;
}
}
} else {
- exponent = exponent + (127 - 15);
- mantissa = mantissa << (23 - 10);
+ exponent = exponent + (eHalfRange - 15);
+ mantissa = mantissa << (mWidth - 10);
}
// Reassemble the result.
- uint32_t result = bits(mantissa, 22, 0);
- replaceBits(result, 30, 23, exponent);
- if (neg)
- result |= (1 << 31);
+ uint64_t result = bits(mantissa, mWidth-1, 0);
+ replaceBits(result, sBitPos-1, mWidth, exponent);
+ if (neg) {
+ result |= (((uint64_t) 1) << sBitPos);
+ }
+ return result;
+}
+
+double
+vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
+{
+ double junk = 0.0;
+ uint64_t result;
+
+ result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true);
return bitsToFp(result, junk);
}
-uint64_t
-vfpFpSToFixed(float val, bool isSigned, bool half,
- uint8_t imm, bool rzero)
+float
+vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
{
- int rmode = rzero ? FeRoundZero : fegetround();
- __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
- fesetround(FeRoundNearest);
- val = val * powf(2.0, imm);
- __asm__ __volatile__("" : "=m" (val) : "m" (val));
- fesetround(rmode);
- feclearexcept(FeAllExceptions);
- __asm__ __volatile__("" : "=m" (val) : "m" (val));
- float origVal = val;
- val = rintf(val);
- int fpType = std::fpclassify(val);
- if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
- if (fpType == FP_NAN) {
- feraiseexcept(FeInvalid);
- }
- val = 0.0;
- } else if (origVal != val) {
- switch (rmode) {
- case FeRoundNearest:
- if (origVal - val > 0.5)
- val += 1.0;
- else if (val - origVal > 0.5)
- val -= 1.0;
- break;
- case FeRoundDown:
- if (origVal < val)
- val -= 1.0;
- break;
- case FeRoundUpward:
- if (origVal > val)
- val += 1.0;
- break;
- }
- feraiseexcept(FeInexact);
- }
+ float junk = 0.0;
+ uint64_t result;
- if (isSigned) {
- if (half) {
- if ((double)val < (int16_t)(1 << 15)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int16_t)(1 << 15);
- }
- if ((double)val > (int16_t)mask(15)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int16_t)mask(15);
- }
- return (int16_t)val;
- } else {
- if ((double)val < (int32_t)(1 << 31)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int32_t)(1 << 31);
- }
- if ((double)val > (int32_t)mask(31)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int32_t)mask(31);
- }
- return (int32_t)val;
- }
- } else {
- if (half) {
- if ((double)val < 0) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return 0;
- }
- if ((double)val > (mask(16))) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return mask(16);
- }
- return (uint16_t)val;
- } else {
- if ((double)val < 0) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return 0;
- }
- if ((double)val > (mask(32))) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return mask(32);
- }
- return (uint32_t)val;
- }
- }
+ result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false);
+ return bitsToFp(result, junk);
}
float
vfpUFixedToFpS(bool flush, bool defaultNan,
- uint32_t val, bool half, uint8_t imm)
+ uint64_t val, uint8_t width, uint8_t imm)
{
fesetround(FeRoundNearest);
- if (half)
+ if (width == 16)
val = (uint16_t)val;
+ else if (width == 32)
+ val = (uint32_t)val;
+ else if (width != 64)
+ panic("Unsupported width %d", width);
float scale = powf(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
@@ -646,11 +666,16 @@ vfpUFixedToFpS(bool flush, bool defaultNan,
float
vfpSFixedToFpS(bool flush, bool defaultNan,
- int32_t val, bool half, uint8_t imm)
+ int64_t val, uint8_t width, uint8_t imm)
{
fesetround(FeRoundNearest);
- if (half)
+ if (width == 16)
val = sext<16>(val & mask(16));
+ else if (width == 32)
+ val = sext<32>(val & mask(32));
+ else if (width != 64)
+ panic("Unsupported width %d", width);
+
float scale = powf(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
@@ -658,106 +683,19 @@ vfpSFixedToFpS(bool flush, bool defaultNan,
return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
}
-uint64_t
-vfpFpDToFixed(double val, bool isSigned, bool half,
- uint8_t imm, bool rzero)
-{
- int rmode = rzero ? FeRoundZero : fegetround();
- fesetround(FeRoundNearest);
- val = val * pow(2.0, imm);
- __asm__ __volatile__("" : "=m" (val) : "m" (val));
- fesetround(rmode);
- feclearexcept(FeAllExceptions);
- __asm__ __volatile__("" : "=m" (val) : "m" (val));
- double origVal = val;
- val = rint(val);
- int fpType = std::fpclassify(val);
- if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
- if (fpType == FP_NAN) {
- feraiseexcept(FeInvalid);
- }
- val = 0.0;
- } else if (origVal != val) {
- switch (rmode) {
- case FeRoundNearest:
- if (origVal - val > 0.5)
- val += 1.0;
- else if (val - origVal > 0.5)
- val -= 1.0;
- break;
- case FeRoundDown:
- if (origVal < val)
- val -= 1.0;
- break;
- case FeRoundUpward:
- if (origVal > val)
- val += 1.0;
- break;
- }
- feraiseexcept(FeInexact);
- }
- if (isSigned) {
- if (half) {
- if (val < (int16_t)(1 << 15)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int16_t)(1 << 15);
- }
- if (val > (int16_t)mask(15)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int16_t)mask(15);
- }
- return (int16_t)val;
- } else {
- if (val < (int32_t)(1 << 31)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int32_t)(1 << 31);
- }
- if (val > (int32_t)mask(31)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int32_t)mask(31);
- }
- return (int32_t)val;
- }
- } else {
- if (half) {
- if (val < 0) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return 0;
- }
- if (val > mask(16)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return mask(16);
- }
- return (uint16_t)val;
- } else {
- if (val < 0) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return 0;
- }
- if (val > mask(32)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return mask(32);
- }
- return (uint32_t)val;
- }
- }
-}
double
vfpUFixedToFpD(bool flush, bool defaultNan,
- uint32_t val, bool half, uint8_t imm)
+ uint64_t val, uint8_t width, uint8_t imm)
{
fesetround(FeRoundNearest);
- if (half)
+ if (width == 16)
val = (uint16_t)val;
+ else if (width == 32)
+ val = (uint32_t)val;
+ else if (width != 64)
+ panic("Unsupported width %d", width);
+
double scale = pow(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
@@ -767,11 +705,16 @@ vfpUFixedToFpD(bool flush, bool defaultNan,
double
vfpSFixedToFpD(bool flush, bool defaultNan,
- int32_t val, bool half, uint8_t imm)
+ int64_t val, uint8_t width, uint8_t imm)
{
fesetround(FeRoundNearest);
- if (half)
+ if (width == 16)
val = sext<16>(val & mask(16));
+ else if (width == 32)
+ val = sext<32>(val & mask(32));
+ else if (width != 64)
+ panic("Unsupported width %d", width);
+
double scale = pow(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
@@ -976,6 +919,85 @@ template
double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
double op1, double op2) const;
+// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB
+template <class fpType>
+fpType
+FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
+ fpType (*func)(fpType, fpType, fpType),
+ bool flush, bool defaultNan, uint32_t rMode) const
+{
+ const bool single = (sizeof(fpType) == sizeof(float));
+ fpType junk = 0.0;
+
+ if (flush && (flushToZero(op1, op2) || flushToZero(op3)))
+ fpscr.idc = 1;
+ VfpSavedState state = prepFpState(rMode);
+ __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state)
+ : "m" (op1), "m" (op2), "m" (op3), "m" (state));
+ fpType dest = func(op1, op2, op3);
+ __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
+
+ int fpClass = std::fpclassify(dest);
+ // Get NAN behavior right. This varies between x86 and ARM.
+ if (fpClass == FP_NAN) {
+ const uint64_t qnan =
+ single ? 0x7fc00000 : ULL(0x7ff8000000000000);
+ const bool nan1 = std::isnan(op1);
+ const bool nan2 = std::isnan(op2);
+ const bool nan3 = std::isnan(op3);
+ const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
+ const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
+ const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan);
+ if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) {
+ dest = bitsToFp(qnan, junk);
+ } else if (signal1) {
+ dest = bitsToFp(fpToBits(op1) | qnan, junk);
+ } else if (signal2) {
+ dest = bitsToFp(fpToBits(op2) | qnan, junk);
+ } else if (signal3) {
+ dest = bitsToFp(fpToBits(op3) | qnan, junk);
+ } else if (nan1) {
+ dest = op1;
+ } else if (nan2) {
+ dest = op2;
+ } else if (nan3) {
+ dest = op3;
+ }
+ } else if (flush && flushToZero(dest)) {
+ feraiseexcept(FeUnderflow);
+ } else if ((
+ (single && (dest == bitsToFp(0x00800000, junk) ||
+ dest == bitsToFp(0x80800000, junk))) ||
+ (!single &&
+ (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
+ dest == bitsToFp(ULL(0x8010000000000000), junk)))
+ ) && rMode != VfpRoundZero) {
+ /*
+ * Correct for the fact that underflow is detected -before- rounding
+ * in ARM and -after- rounding in x86.
+ */
+ fesetround(FeRoundZero);
+ __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3)
+ : "m" (op1), "m" (op2), "m" (op3));
+ fpType temp = func(op1, op2, op2);
+ __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
+ if (flush && flushToZero(temp)) {
+ dest = temp;
+ }
+ }
+ finishVfp(fpscr, state, flush);
+ return dest;
+}
+
+template
+float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3,
+ float (*func)(float, float, float),
+ bool flush, bool defaultNan, uint32_t rMode) const;
+template
+double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3,
+ double (*func)(double, double, double),
+ bool flush, bool defaultNan, uint32_t rMode) const;
+
template <class fpType>
fpType
FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,