diff options
-rw-r--r-- | src/arch/arm/insts/vfp.hh | 75 | ||||
-rw-r--r-- | src/arch/arm/isa/insts/fp.isa | 60 |
2 files changed, 135 insertions, 0 deletions
diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index 77e104a13..465384304 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -41,6 +41,8 @@ #define __ARCH_ARM_INSTS_VFP_HH__ #include "arch/arm/insts/misc.hh" +#include "arch/arm/miscregs.hh" +#include <fenv.h> enum VfpMicroMode { VfpNotAMicroop, @@ -73,6 +75,79 @@ setVfpMicroFlags(VfpMicroMode mode, T &flags) } } +enum FeExceptionBit +{ + FeDivByZero = FE_DIVBYZERO, + FeInexact = FE_INEXACT, + FeInvalid = FE_INVALID, + FeOverflow = FE_OVERFLOW, + FeUnderflow = FE_UNDERFLOW, + FeAllExceptions = FE_ALL_EXCEPT +}; + +enum FeRoundingMode +{ + FeRoundDown = FE_DOWNWARD, + FeRoundNearest = FE_TONEAREST, + FeRoundZero = FE_TOWARDZERO, + FeRoundUpward = FE_UPWARD +}; + +enum VfpRoundingMode +{ + VfpRoundNearest = 0, + VfpRoundUpward = 1, + VfpRoundDown = 2, + VfpRoundZero = 3 +}; + +typedef int VfpSavedState; + +static inline VfpSavedState +prepVfpFpscr(FPSCR fpscr) +{ + int roundingMode = fegetround(); + feclearexcept(FeAllExceptions); + switch (fpscr.rMode) { + case VfpRoundNearest: + fesetround(FeRoundNearest); + break; + case VfpRoundUpward: + fesetround(FeRoundUpward); + break; + case VfpRoundDown: + fesetround(FeRoundDown); + break; + case VfpRoundZero: + fesetround(FeRoundZero); + break; + } + return roundingMode; +} + +static inline FPSCR +setVfpFpscr(FPSCR fpscr, VfpSavedState state) +{ + int exceptions = fetestexcept(FeAllExceptions); + if (exceptions & FeInvalid) { + fpscr.ioc = 1; + } + if (exceptions & FeDivByZero) { + fpscr.dzc = 1; + } + if (exceptions & FeOverflow) { + fpscr.ofc = 1; + } + if (exceptions & FeUnderflow) { + fpscr.ufc = 1; + } + if (exceptions & FeInexact) { + fpscr.ixc = 1; + } + fesetround(state); + return fpscr; +} + class VfpMacroOp : public PredMacroOp { public: diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index 0c1ce626b..1a8f25c5a 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -376,7 +376,9 @@ let {{ exec_output += PredOpExecute.subst(vmov2Core2RegIop); vmulSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); FpDest = FpOp1 * FpOp2; + Fpscr = setVfpFpscr(Fpscr, state); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { FpDest = NAN; } @@ -392,7 +394,9 @@ let {{ IntDoubleUnion cOp1, cOp2, cDest; cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); cDest.fp = cOp1.fp * cOp2.fp; + Fpscr = setVfpFpscr(Fpscr, state); if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { cDest.fp = NAN; @@ -456,7 +460,9 @@ let {{ exec_output += PredOpExecute.subst(vabsDIop); vaddSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); FpDest = FpOp1 + FpOp2; + Fpscr = setVfpFpscr(Fpscr, state); ''' vaddSIop = InstObjParams("vadds", "VaddS", "VfpRegRegRegOp", { "code": vaddSCode, @@ -469,7 +475,9 @@ let {{ IntDoubleUnion cOp1, cOp2, cDest; cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); cDest.fp = cOp1.fp + cOp2.fp; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; FpDestP1.uw = cDest.bits >> 32; ''' @@ -481,7 +489,9 @@ let {{ exec_output += PredOpExecute.subst(vaddDIop); vsubSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); FpDest = FpOp1 - FpOp2; + Fpscr = setVfpFpscr(Fpscr, state) ''' vsubSIop = InstObjParams("vsubs", "VsubS", "VfpRegRegRegOp", { "code": vsubSCode, @@ -494,7 +504,9 @@ let {{ IntDoubleUnion cOp1, cOp2, cDest; cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); cDest.fp = cOp1.fp - cOp2.fp; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; FpDestP1.uw = cDest.bits >> 32; ''' @@ -506,7 +518,9 @@ let {{ exec_output += PredOpExecute.subst(vsubDIop); vdivSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); FpDest = FpOp1 / FpOp2; + Fpscr = setVfpFpscr(Fpscr, state); ''' vdivSIop = InstObjParams("vdivs", "VdivS", "VfpRegRegRegOp", { "code": vdivSCode, @@ -519,7 +533,9 @@ let {{ IntDoubleUnion cOp1, cOp2, cDest; cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); cDest.fp = cOp1.fp / cOp2.fp; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; FpDestP1.uw = cDest.bits >> 32; ''' @@ -531,7 +547,9 @@ let {{ exec_output += PredOpExecute.subst(vdivDIop); vsqrtSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); FpDest = sqrtf(FpOp1); + Fpscr = setVfpFpscr(Fpscr, state); if (FpOp1 < 0) { FpDest = NAN; } @@ -546,7 +564,9 @@ let {{ vsqrtDCode = ''' IntDoubleUnion cOp1, cDest; cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); cDest.fp = sqrt(cOp1.fp); + Fpscr = setVfpFpscr(Fpscr, state); if (cOp1.fp < 0) { cDest.fp = NAN; } @@ -561,11 +581,13 @@ let {{ exec_output += PredOpExecute.subst(vsqrtDIop); vmlaSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); float mid = FpOp1 * FpOp2; if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } FpDest = FpDest + mid; + Fpscr = setVfpFpscr(Fpscr, state); ''' vmlaSIop = InstObjParams("vmlas", "VmlaS", "VfpRegRegRegOp", { "code": vmlaSCode, @@ -579,12 +601,14 @@ let {{ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); double mid = cOp1.fp * cOp2.fp; if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } cDest.fp = cDest.fp + mid; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; FpDestP1.uw = cDest.bits >> 32; ''' @@ -596,11 +620,13 @@ let {{ exec_output += PredOpExecute.subst(vmlaDIop); vmlsSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); float mid = FpOp1 * FpOp2; if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } FpDest = FpDest - mid; + Fpscr = setVfpFpscr(Fpscr, state); ''' vmlsSIop = InstObjParams("vmlss", "VmlsS", "VfpRegRegRegOp", { "code": vmlsSCode, @@ -614,12 +640,14 @@ let {{ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); double mid = cOp1.fp * cOp2.fp; if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } cDest.fp = cDest.fp - mid; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; FpDestP1.uw = cDest.bits >> 32; ''' @@ -631,11 +659,13 @@ let {{ exec_output += PredOpExecute.subst(vmlsDIop); vnmlaSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); float mid = FpOp1 * FpOp2; if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } FpDest = -FpDest - mid; + Fpscr = setVfpFpscr(Fpscr, state); ''' vnmlaSIop = InstObjParams("vnmlas", "VnmlaS", "VfpRegRegRegOp", { "code": vnmlaSCode, @@ -649,12 +679,14 @@ let {{ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); double mid = cOp1.fp * cOp2.fp; if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } cDest.fp = -cDest.fp - mid; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; FpDestP1.uw = cDest.bits >> 32; ''' @@ -666,11 +698,13 @@ let {{ exec_output += PredOpExecute.subst(vnmlaDIop); vnmlsSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); float mid = FpOp1 * FpOp2; if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } FpDest = -FpDest + mid; + Fpscr = setVfpFpscr(Fpscr, state); ''' vnmlsSIop = InstObjParams("vnmlss", "VnmlsS", "VfpRegRegRegOp", { "code": vnmlsSCode, @@ -684,12 +718,14 @@ let {{ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); double mid = cOp1.fp * cOp2.fp; if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } cDest.fp = -cDest.fp + mid; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; FpDestP1.uw = cDest.bits >> 32; ''' @@ -701,11 +737,13 @@ let {{ exec_output += PredOpExecute.subst(vnmlsDIop); vnmulSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); float mid = FpOp1 * FpOp2; if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } FpDest = -mid; + Fpscr = setVfpFpscr(Fpscr, state); ''' vnmulSIop = InstObjParams("vnmuls", "VnmulS", "VfpRegRegRegOp", { "code": vnmulSCode, @@ -719,12 +757,14 @@ let {{ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); double mid = cOp1.fp * cOp2.fp; if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } cDest.fp = -mid; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; FpDestP1.uw = cDest.bits >> 32; ''' @@ -736,7 +776,9 @@ let {{ exec_output += PredOpExecute.subst(vnmulDIop); vcvtUIntFpSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); FpDest = FpOp1.uw; + Fpscr = setVfpFpscr(Fpscr, state); ''' vcvtUIntFpSIop = InstObjParams("vcvt", "VcvtUIntFpS", "VfpRegRegOp", { "code": vcvtUIntFpSCode, @@ -747,7 +789,9 @@ let {{ vcvtUIntFpDCode = ''' IntDoubleUnion cDest; + VfpSavedState state = prepVfpFpscr(Fpscr); cDest.fp = (uint64_t)FpOp1P0.uw; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; FpDestP1.uw = cDest.bits >> 32; ''' @@ -759,7 +803,9 @@ let {{ exec_output += PredOpExecute.subst(vcvtUIntFpDIop); vcvtSIntFpSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); FpDest = FpOp1.sw; + Fpscr = setVfpFpscr(Fpscr, state); ''' vcvtSIntFpSIop = InstObjParams("vcvt", "VcvtSIntFpS", "VfpRegRegOp", { "code": vcvtSIntFpSCode, @@ -770,7 +816,9 @@ let {{ vcvtSIntFpDCode = ''' IntDoubleUnion cDest; + VfpSavedState state = prepVfpFpscr(Fpscr); cDest.fp = FpOp1P0.sw; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; FpDestP1.uw = cDest.bits >> 32; ''' @@ -782,7 +830,9 @@ let {{ exec_output += PredOpExecute.subst(vcvtSIntFpDIop); vcvtFpUIntSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); FpDest.uw = FpOp1; + Fpscr = setVfpFpscr(Fpscr, state); ''' vcvtFpUIntSIop = InstObjParams("vcvt", "VcvtFpUIntS", "VfpRegRegOp", { "code": vcvtFpUIntSCode, @@ -794,7 +844,9 @@ let {{ vcvtFpUIntDCode = ''' IntDoubleUnion cOp1; cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); uint64_t result = cOp1.fp; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = result; ''' vcvtFpUIntDIop = InstObjParams("vcvt", "VcvtFpUIntD", "VfpRegRegOp", @@ -805,7 +857,9 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpUIntDIop); vcvtFpSIntSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); FpDest.sw = FpOp1; + Fpscr = setVfpFpscr(Fpscr, state); ''' vcvtFpSIntSIop = InstObjParams("vcvt", "VcvtFpSIntS", "VfpRegRegOp", { "code": vcvtFpSIntSCode, @@ -817,7 +871,9 @@ let {{ vcvtFpSIntDCode = ''' IntDoubleUnion cOp1; cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); int64_t result = cOp1.fp; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = result; ''' vcvtFpSIntDIop = InstObjParams("vcvt", "VcvtFpSIntD", "VfpRegRegOp", @@ -829,7 +885,9 @@ let {{ vcvtFpSFpDCode = ''' IntDoubleUnion cDest; + VfpSavedState state = prepVfpFpscr(Fpscr); cDest.fp = FpOp1; + Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; FpDestP1.uw = cDest.bits >> 32; ''' @@ -843,7 +901,9 @@ let {{ vcvtFpDFpSCode = ''' IntDoubleUnion cOp1; cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); FpDest = cOp1.fp; + Fpscr = setVfpFpscr(Fpscr, state); ''' vcvtFpDFpSIop = InstObjParams("vcvt", "VcvtFpDFpS", "VfpRegRegOp", { "code": vcvtFpDFpSCode, |