From 347ab6c7045bc800c2c00239b37981571e3893dc Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 2 Jun 2010 12:58:15 -0500 Subject: ARM: Compensate for ARM's underflow coming from -before- rounding, but x86's after. --- src/arch/arm/insts/vfp.hh | 118 +++++++++++++++++++++++++++++++++++++++--- src/arch/arm/isa/insts/fp.isa | 34 ++++++------ 2 files changed, 127 insertions(+), 25 deletions(-) diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index e32aac721..11ae8ed96 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -173,6 +173,30 @@ bitsToFp(uint64_t bits, double junk) return val.fp; } +template +static inline fpType +fixDest(FPSCR fpscr, fpType val, fpType op1) +{ + int fpClass = std::fpclassify(val); + fpType junk = 0.0; + if (fpClass == FP_NAN) { + const bool single = (sizeof(val) == sizeof(float)); + const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); + const bool nan = std::isnan(op1); + if (!nan || (fpscr.dn == 1)) { + val = bitsToFp(qnan, junk); + } else if (nan) { + val = bitsToFp(fpToBits(op1) | qnan, junk); + } + } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { + // Turn val into a zero with the correct sign; + uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); + val = bitsToFp(fpToBits(val) & bitMask, junk); + feraiseexcept(FeUnderflow); + } + return val; +} + template static inline fpType fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) @@ -206,6 +230,84 @@ fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) return val; } +template +static inline fpType +fixMultDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) +{ + fpType mid = fixDest(fpscr, val, op1, op2); + const bool single = (sizeof(fpType) == sizeof(float)); + const fpType junk = 0.0; + if ((single && (val == bitsToFp(0x00800000, junk) || + val == bitsToFp(0x80800000, junk))) || + (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) || + val == bitsToFp(ULL(0x8010000000000000), junk))) + ) { + __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); + fesetround(FeRoundZero); + fpType temp = 0.0; + __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); + temp = op1 * op2; + if (!std::isnormal(temp)) { + feraiseexcept(FeUnderflow); + } + __asm__ __volatile__("" :: "m" (temp)); + } + return mid; +} + +template +static inline fpType +fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) +{ + fpType mid = fixDest(fpscr, val, op1, op2); + const bool single = (sizeof(fpType) == sizeof(float)); + const fpType junk = 0.0; + if ((single && (val == bitsToFp(0x00800000, junk) || + val == bitsToFp(0x80800000, junk))) || + (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) || + val == bitsToFp(ULL(0x8010000000000000), junk))) + ) { + __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); + fesetround(FeRoundZero); + fpType temp = 0.0; + __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); + temp = op1 / op2; + if (!std::isnormal(temp)) { + feraiseexcept(FeUnderflow); + } + __asm__ __volatile__("" :: "m" (temp)); + } + return mid; +} + +static inline float +fixFpDFpSDest(FPSCR fpscr, double val) +{ + const float junk = 0.0; + float op1 = 0.0; + if (std::isnan(val)) { + uint64_t valBits = fpToBits(val); + uint32_t op1Bits = bits(valBits, 50, 29) | + (mask(9) << 22) | + (bits(valBits, 63) << 31); + op1 = bitsToFp(op1Bits, junk); + } + float mid = fixDest(fpscr, (float)val, op1); + if (mid == bitsToFp(0x00800000, junk) || + mid == bitsToFp(0x80800000, junk)) { + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + fesetround(FeRoundZero); + float temp = 0.0; + __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); + temp = val; + if (!std::isnormal(temp)) { + feraiseexcept(FeUnderflow); + } + __asm__ __volatile__("" :: "m" (temp)); + } + return mid; +} + static inline uint64_t vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm) { @@ -282,7 +384,7 @@ vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm) } static inline float -vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm) +vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) @@ -291,11 +393,11 @@ vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm) __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); - return val / scale; + return fixDivDest(fpscr, val / scale, (float)val, scale); } static inline float -vfpSFixedToFpS(int32_t val, bool half, uint8_t imm) +vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) @@ -304,7 +406,7 @@ vfpSFixedToFpS(int32_t val, bool half, uint8_t imm) __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); - return val / scale; + return fixDivDest(fpscr, val / scale, (float)val, scale); } static inline uint64_t @@ -383,7 +485,7 @@ vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm) } static inline double -vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm) +vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) @@ -392,11 +494,11 @@ vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm) __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); - return val / scale; + return fixDivDest(fpscr, val / scale, (double)val, scale); } static inline double -vfpSFixedToFpD(int32_t val, bool half, uint8_t imm) +vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) @@ -405,7 +507,7 @@ vfpSFixedToFpD(int32_t val, bool half, uint8_t imm) __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); - return val / scale; + return fixDivDest(fpscr, val / scale, (double)val, scale); } typedef int VfpSavedState; diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index 4f693f46a..026e332d4 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -386,7 +386,7 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest = fixDest(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); + FpDest = fixMultDest(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { @@ -407,7 +407,7 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - cDest.fp = fixDest(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); + cDest.fp = fixMultDest(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); if ((isinf(cOp1.fp) && cOp2.fp == 0) || @@ -683,7 +683,7 @@ let {{ mid = NAN; } vfpFlushToZero(Fpscr, FpDest, mid); - FpDest = fixDest(Fpscr, FpDest - mid, FpDest, mid); + FpDest = fixDest(Fpscr, FpDest - mid, FpDest, -mid); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -707,7 +707,7 @@ let {{ (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } - cDest.fp = fixDest(Fpscr, cDest.fp - mid, cDest.fp, mid); + cDest.fp = fixDest(Fpscr, cDest.fp - mid, cDest.fp, -mid); vfpFlushToZero(Fpscr, cDest.fp, mid); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); @@ -730,7 +730,7 @@ let {{ mid = NAN; } vfpFlushToZero(Fpscr, FpDest, mid); - FpDest = fixDest(Fpscr, -FpDest - mid, FpDest, mid); + FpDest = fixDest(Fpscr, -FpDest - mid, -FpDest, -mid); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -755,7 +755,7 @@ let {{ mid = NAN; } vfpFlushToZero(Fpscr, cDest.fp, mid); - cDest.fp = fixDest(Fpscr, -cDest.fp - mid, cDest.fp, mid); + cDest.fp = fixDest(Fpscr, -cDest.fp - mid, -cDest.fp, -mid); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -777,7 +777,7 @@ let {{ mid = NAN; } vfpFlushToZero(Fpscr, FpDest, mid); - FpDest = fixDest(Fpscr, -FpDest + mid, FpDest, mid); + FpDest = fixDest(Fpscr, -FpDest + mid, -FpDest, mid); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -802,7 +802,7 @@ let {{ mid = NAN; } vfpFlushToZero(Fpscr, cDest.fp, mid); - cDest.fp = fixDest(Fpscr, -cDest.fp + mid, cDest.fp, mid); + cDest.fp = fixDest(Fpscr, -cDest.fp + mid, -cDest.fp, mid); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -1089,7 +1089,7 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - FpDest = cOp1.fp; + FpDest = fixFpDFpSDest(Fpscr, cOp1.fp); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -1267,7 +1267,7 @@ let {{ vcvtSFixedFpSCode = ''' VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1.sw) : "m" (FpOp1.sw)); - FpDest = vfpSFixedToFpS(FpOp1.sw, false, imm); + FpDest = vfpSFixedToFpS(Fpscr, FpOp1.sw, false, imm); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -1283,7 +1283,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - cDest.fp = vfpSFixedToFpD(mid, false, imm); + cDest.fp = vfpSFixedToFpD(Fpscr, mid, false, imm); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -1299,7 +1299,7 @@ let {{ vcvtUFixedFpSCode = ''' VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1.uw) : "m" (FpOp1.uw)); - FpDest = vfpUFixedToFpS(FpOp1.uw, false, imm); + FpDest = vfpUFixedToFpS(Fpscr, FpOp1.uw, false, imm); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -1315,7 +1315,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - cDest.fp = vfpUFixedToFpD(mid, false, imm); + cDest.fp = vfpUFixedToFpD(Fpscr, mid, false, imm); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -1403,7 +1403,7 @@ let {{ vcvtSHFixedFpSCode = ''' VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1.sh) : "m" (FpOp1.sh)); - FpDest = vfpSFixedToFpS(FpOp1.sh, true, imm); + FpDest = vfpSFixedToFpS(Fpscr, FpOp1.sh, true, imm); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -1420,7 +1420,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - cDest.fp = vfpSFixedToFpD(mid, true, imm); + cDest.fp = vfpSFixedToFpD(Fpscr, mid, true, imm); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -1437,7 +1437,7 @@ let {{ vcvtUHFixedFpSCode = ''' VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1.uh) : "m" (FpOp1.uh)); - FpDest = vfpUFixedToFpS(FpOp1.uh, true, imm); + FpDest = vfpUFixedToFpS(Fpscr, FpOp1.uh, true, imm); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -1454,7 +1454,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - cDest.fp = vfpUFixedToFpD(mid, true, imm); + cDest.fp = vfpUFixedToFpD(Fpscr, mid, true, imm); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; -- cgit v1.2.3