From 237c0617a0c095e35169c3f4e48e93eaf4ada527 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 2 Jun 2010 12:58:16 -0500 Subject: ARM: Implement conversion to/from half precision. --- src/arch/arm/insts/vfp.hh | 217 ++++++++++++++++++++++++++++++++++++++++ src/arch/arm/isa/formats/fp.isa | 19 +++- src/arch/arm/isa/insts/fp.isa | 69 +++++++++++++ 3 files changed, 303 insertions(+), 2 deletions(-) diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index 259bf9c11..37553a5dc 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -396,6 +396,223 @@ fixFpSFpDDest(FPSCR fpscr, float val) return mid; } +static inline float +vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top) +{ + float junk = 0.0; + uint32_t destBits = fpToBits(dest); + uint32_t opBits = fpToBits(op); + // Extract the operand. + bool neg = bits(opBits, 31); + uint32_t exponent = bits(opBits, 30, 23); + uint32_t oldMantissa = bits(opBits, 22, 0); + uint32_t mantissa = oldMantissa >> (23 - 10); + // Do the conversion. + uint32_t extra = oldMantissa & mask(23 - 10); + if (exponent == 0xff) { + if (oldMantissa != 0) { + // Nans. + if (bits(mantissa, 9) == 0) { + // Signalling nan. + fpscr.ioc = 1; + } + if (fpscr.ahp) { + mantissa = 0; + exponent = 0; + fpscr.ioc = 1; + } else if (fpscr.dn) { + mantissa = (1 << 9); + exponent = 0x1f; + neg = false; + } else { + exponent = 0x1f; + mantissa |= (1 << 9); + } + } else { + // Infinities. + exponent = 0x1F; + if (fpscr.ahp) { + fpscr.ioc = 1; + mantissa = 0x3ff; + } else { + mantissa = 0; + } + } + } else if (exponent == 0 && oldMantissa == 0) { + // Zero, don't need to do anything. + } else { + // Normalized or denormalized numbers. + + bool inexact = (extra != 0); + + if (exponent == 0) { + // Denormalized. + + // If flush to zero is on, this shouldn't happen. + assert(fpscr.fz == 0); + + // Check for underflow + if (inexact || fpscr.ufe) + fpscr.ufc = 1; + + // Handle rounding. + unsigned mode = fpscr.rMode; + if ((mode == VfpRoundUpward && !neg && extra) || + (mode == VfpRoundDown && neg && extra) || + (mode == VfpRoundNearest && + (extra > (1 << 9) || + (extra == (1 << 9) && bits(mantissa, 0))))) { + mantissa++; + } + + // See if the number became normalized after rounding. + if (mantissa == (1 << 10)) { + mantissa = 0; + exponent = 1; + } + } else { + // Normalized. + + // We need to track the dropped bits differently since + // more can be dropped by denormalizing. + bool topOne = bits(extra, 12); + bool restZeros = bits(extra, 11, 0) == 0; + + if (exponent <= (127 - 15)) { + // The result is too small. Denormalize. + mantissa |= (1 << 10); + while (mantissa && exponent <= (127 - 15)) { + restZeros = restZeros && !topOne; + topOne = bits(mantissa, 0); + mantissa = mantissa >> 1; + exponent++; + } + if (topOne || !restZeros) + inexact = true; + exponent = 0; + } else { + // Change bias. + exponent -= (127 - 15); + } + + if (exponent == 0 && (inexact || fpscr.ufe)) { + // Underflow + fpscr.ufc = 1; + } + + // Handle rounding. + unsigned mode = fpscr.rMode; + bool nonZero = topOne || !restZeros; + if ((mode == VfpRoundUpward && !neg && nonZero) || + (mode == VfpRoundDown && neg && nonZero) || + (mode == VfpRoundNearest && topOne && + (!restZeros || bits(mantissa, 0)))) { + mantissa++; + } + + // See if we rounded up and need to bump the exponent. + if (mantissa == (1 << 10)) { + mantissa = 0; + exponent++; + } + + // Deal with overflow + if (fpscr.ahp) { + if (exponent >= 0x20) { + exponent = 0x1f; + mantissa = 0x3ff; + fpscr.ioc = 1; + // Supress inexact exception. + inexact = false; + } + } else { + if (exponent >= 0x1f) { + if ((mode == VfpRoundNearest) || + (mode == VfpRoundUpward && !neg) || + (mode == VfpRoundDown && neg)) { + // Overflow to infinity. + exponent = 0x1f; + mantissa = 0; + } else { + // Overflow to max normal. + exponent = 0x1e; + mantissa = 0x3ff; + } + fpscr.ofc = 1; + inexact = true; + } + } + } + + if (inexact) { + fpscr.ixc = 1; + } + } + // Reassemble and install the result. + uint32_t result = bits(mantissa, 9, 0); + replaceBits(result, 14, 10, exponent); + if (neg) + result |= (1 << 15); + if (top) + replaceBits(destBits, 31, 16, result); + else + replaceBits(destBits, 15, 0, result); + return bitsToFp(destBits, junk); +} + +static inline float +vcvtFpHFpS(FPSCR &fpscr, float op, bool top) +{ + float junk = 0.0; + uint32_t opBits = fpToBits(op); + // Extract the operand. + if (top) + opBits = bits(opBits, 31, 16); + else + opBits = bits(opBits, 15, 0); + // Extract the bitfields. + bool neg = bits(opBits, 15); + uint32_t exponent = bits(opBits, 14, 10); + uint32_t mantissa = bits(opBits, 9, 0); + // Do the conversion. + if (exponent == 0) { + if (mantissa != 0) { + // Normalize the value. + exponent = exponent + (127 - 15) + 1; + while (mantissa < (1 << 10)) { + mantissa = mantissa << 1; + exponent--; + } + } + mantissa = mantissa << (23 - 10); + } else if (exponent == 0x1f && !fpscr.ahp) { + // Infinities and nans. + exponent = 0xff; + if (mantissa != 0) { + // Nans. + mantissa = mantissa << (23 - 10); + if (bits(mantissa, 22) == 0) { + // Signalling nan. + fpscr.ioc = 1; + mantissa |= (1 << 22); + } + if (fpscr.dn) { + mantissa &= ~mask(22); + neg = false; + } + } + } else { + exponent = exponent + (127 - 15); + mantissa = mantissa << (23 - 10); + } + // Reassemble the result. + uint32_t result = bits(mantissa, 22, 0); + replaceBits(result, 30, 23, exponent); + if (neg) + result |= (1 << 31); + return bitsToFp(result, junk); +} + static inline double makeDouble(uint32_t low, uint32_t high) { diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index d509fc28a..03e574648 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -655,8 +655,23 @@ let {{ } case 0x2: case 0x3: - // Between half and single precision. - return new WarnUnimplemented("vcvtb, vcvtt", machInst); + { + const bool toHalf = bits(machInst, 16); + const bool top = bits(machInst, 7); + if (top) { + if (toHalf) { + return new VcvtFpSFpHT(machInst, vd, vm); + } else { + return new VcvtFpHTFpS(machInst, vd, vm); + } + } else { + if (toHalf) { + return new VcvtFpSFpHB(machInst, vd, vm); + } else { + return new VcvtFpHBFpS(machInst, vd, vm); + } + } + } case 0x4: if (single) { if (e) { diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index bee63d671..c4682b66c 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -912,6 +912,75 @@ let {{ decoder_output += FpRegRegOpConstructor.subst(vcvtFpDFpSIop); exec_output += PredOpExecute.subst(vcvtFpDFpSIop); + vcvtFpHTFpSCode = ''' + FPSCR fpscr = Fpscr; + vfpFlushToZero(fpscr, FpOp1); + VfpSavedState state = prepFpState(fpscr.rMode); + __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); + FpDest = vcvtFpHFpS(fpscr, FpOp1, true); + __asm__ __volatile__("" :: "m" (FpDest)); + finishVfp(fpscr, state); + Fpscr = fpscr; + ''' + vcvtFpHTFpSIop = InstObjParams("vcvtt", "VcvtFpHTFpS", "FpRegRegOp", + { "code": vcvtFpHTFpSCode, + "predicate_test": predicateTest }, []) + header_output += FpRegRegOpDeclare.subst(vcvtFpHTFpSIop); + decoder_output += FpRegRegOpConstructor.subst(vcvtFpHTFpSIop); + exec_output += PredOpExecute.subst(vcvtFpHTFpSIop); + + vcvtFpHBFpSCode = ''' + FPSCR fpscr = Fpscr; + VfpSavedState state = prepFpState(fpscr.rMode); + __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); + FpDest = vcvtFpHFpS(fpscr, FpOp1, false); + __asm__ __volatile__("" :: "m" (FpDest)); + finishVfp(fpscr, state); + Fpscr = fpscr; + ''' + vcvtFpHBFpSIop = InstObjParams("vcvtb", "VcvtFpHBFpS", "FpRegRegOp", + { "code": vcvtFpHBFpSCode, + "predicate_test": predicateTest }, []) + header_output += FpRegRegOpDeclare.subst(vcvtFpHBFpSIop); + decoder_output += FpRegRegOpConstructor.subst(vcvtFpHBFpSIop); + exec_output += PredOpExecute.subst(vcvtFpHBFpSIop); + + vcvtFpSFpHTCode = ''' + FPSCR fpscr = Fpscr; + vfpFlushToZero(fpscr, FpOp1); + VfpSavedState state = prepFpState(fpscr.rMode); + __asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest) + : "m" (FpOp1), "m" (FpDest)); + FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, true); + __asm__ __volatile__("" :: "m" (FpDest)); + finishVfp(fpscr, state); + Fpscr = fpscr; + ''' + vcvtFpSFpHTIop = InstObjParams("vcvtt", "VcvtFpSFpHT", "FpRegRegOp", + { "code": vcvtFpHTFpSCode, + "predicate_test": predicateTest }, []) + header_output += FpRegRegOpDeclare.subst(vcvtFpSFpHTIop); + decoder_output += FpRegRegOpConstructor.subst(vcvtFpSFpHTIop); + exec_output += PredOpExecute.subst(vcvtFpSFpHTIop); + + vcvtFpSFpHBCode = ''' + FPSCR fpscr = Fpscr; + vfpFlushToZero(fpscr, FpOp1); + VfpSavedState state = prepFpState(fpscr.rMode); + __asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest) + : "m" (FpOp1), "m" (FpDest)); + FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, false); + __asm__ __volatile__("" :: "m" (FpDest)); + finishVfp(fpscr, state); + Fpscr = fpscr; + ''' + vcvtFpSFpHBIop = InstObjParams("vcvtb", "VcvtFpSFpHB", "FpRegRegOp", + { "code": vcvtFpSFpHBCode, + "predicate_test": predicateTest }, []) + header_output += FpRegRegOpDeclare.subst(vcvtFpSFpHBIop); + decoder_output += FpRegRegOpConstructor.subst(vcvtFpSFpHBIop); + exec_output += PredOpExecute.subst(vcvtFpSFpHBIop); + vcmpSCode = ''' FPSCR fpscr = Fpscr; vfpFlushToZero(fpscr, FpDest, FpOp1); -- cgit v1.2.3