From 8466999aefe1da1eade680af3b4d1336d4e04e7f Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 2 Jun 2010 12:58:15 -0500 Subject: ARM: Implement flush to zero mode for VFP, and clean up some corner cases. --- src/arch/arm/insts/vfp.hh | 89 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 5 deletions(-) (limited to 'src/arch/arm/insts/vfp.hh') diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index 5a0ecf828..b0fc8b6dc 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -43,6 +43,7 @@ #include "arch/arm/insts/misc.hh" #include "arch/arm/miscregs.hh" #include +#include enum VfpMicroMode { VfpNotAMicroop, @@ -101,6 +102,26 @@ enum VfpRoundingMode VfpRoundZero = 3 }; +template +static inline void +vfpFlushToZero(uint32_t &_fpscr, fpType &op) +{ + FPSCR fpscr = _fpscr; + if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) { + fpscr.idc = 1; + op = 0; + } + _fpscr = fpscr; +} + +template +static inline void +vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2) +{ + vfpFlushToZero(fpscr, op1); + vfpFlushToZero(fpscr, op2); +} + static inline uint64_t vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm) { @@ -108,24 +129,41 @@ vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm) val = val * powf(2.0, imm); __asm__ __volatile__("" : "=m" (val) : "m" (val)); feclearexcept(FeAllExceptions); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + float origVal = val; + val = rintf(val); + int fpType = std::fpclassify(val); + if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { + if (fpType == FP_NAN) { + feraiseexcept(FeInvalid); + } + val = 0.0; + } else if (origVal != val) { + feraiseexcept(FeInexact); + } + if (isSigned) { if (half) { if ((double)val < (int16_t)(1 << 15)) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return (int16_t)(1 << 15); } if ((double)val > (int16_t)mask(15)) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return (int16_t)mask(15); } return (int16_t)val; } else { if ((double)val < (int32_t)(1 << 31)) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return (int32_t)(1 << 31); } if ((double)val > (int32_t)mask(31)) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return (int32_t)mask(31); } return (int32_t)val; @@ -134,20 +172,24 @@ vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm) if (half) { if ((double)val < 0) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return 0; } if ((double)val > (mask(16))) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return mask(16); } return (uint16_t)val; } else { if ((double)val < 0) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return 0; } if ((double)val > (mask(32))) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return mask(32); } return (uint32_t)val; @@ -161,7 +203,11 @@ vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm) fesetround(FeRoundNearest); if (half) val = (uint16_t)val; - return val / powf(2.0, imm); + float scale = powf(2.0, imm); + __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); + feclearexcept(FeAllExceptions); + __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); + return val / scale; } static inline float @@ -170,34 +216,55 @@ vfpSFixedToFpS(int32_t val, bool half, uint8_t imm) fesetround(FeRoundNearest); if (half) val = sext<16>(val & mask(16)); - return val / powf(2.0, imm); + float scale = powf(2.0, imm); + __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); + feclearexcept(FeAllExceptions); + __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); + return val / scale; } static inline uint64_t vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm) { - fesetround(FeRoundZero); + fesetround(FeRoundNearest); val = val * pow(2.0, imm); __asm__ __volatile__("" : "=m" (val) : "m" (val)); + fesetround(FeRoundZero); feclearexcept(FeAllExceptions); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + double origVal = val; + val = rint(val); + int fpType = std::fpclassify(val); + if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { + if (fpType == FP_NAN) { + feraiseexcept(FeInvalid); + } + val = 0.0; + } else if (origVal != val) { + feraiseexcept(FeInexact); + } if (isSigned) { if (half) { if (val < (int16_t)(1 << 15)) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return (int16_t)(1 << 15); } if (val > (int16_t)mask(15)) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return (int16_t)mask(15); } return (int16_t)val; } else { if (val < (int32_t)(1 << 31)) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return (int32_t)(1 << 31); } if (val > (int32_t)mask(31)) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return (int32_t)mask(31); } return (int32_t)val; @@ -206,20 +273,24 @@ vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm) if (half) { if (val < 0) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return 0; } if (val > mask(16)) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return mask(16); } return (uint16_t)val; } else { if (val < 0) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return 0; } if (val > mask(32)) { feraiseexcept(FeInvalid); + feclearexcept(FeInexact); return mask(32); } return (uint32_t)val; @@ -233,7 +304,11 @@ vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm) fesetround(FeRoundNearest); if (half) val = (uint16_t)val; - return val / pow(2.0, imm); + double scale = pow(2.0, imm); + __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); + feclearexcept(FeAllExceptions); + __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); + return val / scale; } static inline double @@ -242,7 +317,11 @@ vfpSFixedToFpD(int32_t val, bool half, uint8_t imm) fesetround(FeRoundNearest); if (half) val = sext<16>(val & mask(16)); - return val / pow(2.0, imm); + double scale = pow(2.0, imm); + __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); + feclearexcept(FeAllExceptions); + __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); + return val / scale; } typedef int VfpSavedState; -- cgit v1.2.3