From 237c0617a0c095e35169c3f4e48e93eaf4ada527 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 2 Jun 2010 12:58:16 -0500 Subject: ARM: Implement conversion to/from half precision. --- src/arch/arm/insts/vfp.hh | 217 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) (limited to 'src/arch/arm/insts') diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index 259bf9c11..37553a5dc 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -396,6 +396,223 @@ fixFpSFpDDest(FPSCR fpscr, float val) return mid; } +static inline float +vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top) +{ + float junk = 0.0; + uint32_t destBits = fpToBits(dest); + uint32_t opBits = fpToBits(op); + // Extract the operand. + bool neg = bits(opBits, 31); + uint32_t exponent = bits(opBits, 30, 23); + uint32_t oldMantissa = bits(opBits, 22, 0); + uint32_t mantissa = oldMantissa >> (23 - 10); + // Do the conversion. + uint32_t extra = oldMantissa & mask(23 - 10); + if (exponent == 0xff) { + if (oldMantissa != 0) { + // Nans. + if (bits(mantissa, 9) == 0) { + // Signalling nan. + fpscr.ioc = 1; + } + if (fpscr.ahp) { + mantissa = 0; + exponent = 0; + fpscr.ioc = 1; + } else if (fpscr.dn) { + mantissa = (1 << 9); + exponent = 0x1f; + neg = false; + } else { + exponent = 0x1f; + mantissa |= (1 << 9); + } + } else { + // Infinities. + exponent = 0x1F; + if (fpscr.ahp) { + fpscr.ioc = 1; + mantissa = 0x3ff; + } else { + mantissa = 0; + } + } + } else if (exponent == 0 && oldMantissa == 0) { + // Zero, don't need to do anything. + } else { + // Normalized or denormalized numbers. + + bool inexact = (extra != 0); + + if (exponent == 0) { + // Denormalized. + + // If flush to zero is on, this shouldn't happen. + assert(fpscr.fz == 0); + + // Check for underflow + if (inexact || fpscr.ufe) + fpscr.ufc = 1; + + // Handle rounding. + unsigned mode = fpscr.rMode; + if ((mode == VfpRoundUpward && !neg && extra) || + (mode == VfpRoundDown && neg && extra) || + (mode == VfpRoundNearest && + (extra > (1 << 9) || + (extra == (1 << 9) && bits(mantissa, 0))))) { + mantissa++; + } + + // See if the number became normalized after rounding. + if (mantissa == (1 << 10)) { + mantissa = 0; + exponent = 1; + } + } else { + // Normalized. + + // We need to track the dropped bits differently since + // more can be dropped by denormalizing. + bool topOne = bits(extra, 12); + bool restZeros = bits(extra, 11, 0) == 0; + + if (exponent <= (127 - 15)) { + // The result is too small. Denormalize. + mantissa |= (1 << 10); + while (mantissa && exponent <= (127 - 15)) { + restZeros = restZeros && !topOne; + topOne = bits(mantissa, 0); + mantissa = mantissa >> 1; + exponent++; + } + if (topOne || !restZeros) + inexact = true; + exponent = 0; + } else { + // Change bias. + exponent -= (127 - 15); + } + + if (exponent == 0 && (inexact || fpscr.ufe)) { + // Underflow + fpscr.ufc = 1; + } + + // Handle rounding. + unsigned mode = fpscr.rMode; + bool nonZero = topOne || !restZeros; + if ((mode == VfpRoundUpward && !neg && nonZero) || + (mode == VfpRoundDown && neg && nonZero) || + (mode == VfpRoundNearest && topOne && + (!restZeros || bits(mantissa, 0)))) { + mantissa++; + } + + // See if we rounded up and need to bump the exponent. + if (mantissa == (1 << 10)) { + mantissa = 0; + exponent++; + } + + // Deal with overflow + if (fpscr.ahp) { + if (exponent >= 0x20) { + exponent = 0x1f; + mantissa = 0x3ff; + fpscr.ioc = 1; + // Supress inexact exception. + inexact = false; + } + } else { + if (exponent >= 0x1f) { + if ((mode == VfpRoundNearest) || + (mode == VfpRoundUpward && !neg) || + (mode == VfpRoundDown && neg)) { + // Overflow to infinity. + exponent = 0x1f; + mantissa = 0; + } else { + // Overflow to max normal. + exponent = 0x1e; + mantissa = 0x3ff; + } + fpscr.ofc = 1; + inexact = true; + } + } + } + + if (inexact) { + fpscr.ixc = 1; + } + } + // Reassemble and install the result. + uint32_t result = bits(mantissa, 9, 0); + replaceBits(result, 14, 10, exponent); + if (neg) + result |= (1 << 15); + if (top) + replaceBits(destBits, 31, 16, result); + else + replaceBits(destBits, 15, 0, result); + return bitsToFp(destBits, junk); +} + +static inline float +vcvtFpHFpS(FPSCR &fpscr, float op, bool top) +{ + float junk = 0.0; + uint32_t opBits = fpToBits(op); + // Extract the operand. + if (top) + opBits = bits(opBits, 31, 16); + else + opBits = bits(opBits, 15, 0); + // Extract the bitfields. + bool neg = bits(opBits, 15); + uint32_t exponent = bits(opBits, 14, 10); + uint32_t mantissa = bits(opBits, 9, 0); + // Do the conversion. + if (exponent == 0) { + if (mantissa != 0) { + // Normalize the value. + exponent = exponent + (127 - 15) + 1; + while (mantissa < (1 << 10)) { + mantissa = mantissa << 1; + exponent--; + } + } + mantissa = mantissa << (23 - 10); + } else if (exponent == 0x1f && !fpscr.ahp) { + // Infinities and nans. + exponent = 0xff; + if (mantissa != 0) { + // Nans. + mantissa = mantissa << (23 - 10); + if (bits(mantissa, 22) == 0) { + // Signalling nan. + fpscr.ioc = 1; + mantissa |= (1 << 22); + } + if (fpscr.dn) { + mantissa &= ~mask(22); + neg = false; + } + } + } else { + exponent = exponent + (127 - 15); + mantissa = mantissa << (23 - 10); + } + // Reassemble the result. + uint32_t result = bits(mantissa, 22, 0); + replaceBits(result, 30, 23, exponent); + if (neg) + result |= (1 << 31); + return bitsToFp(result, junk); +} + static inline double makeDouble(uint32_t low, uint32_t high) { -- cgit v1.2.3