summaryrefslogtreecommitdiff
path: root/src/arch/arm
diff options
context:
space:
mode:
authorGabe Black <gblack@eecs.umich.edu>2010-06-02 12:58:16 -0500
committerGabe Black <gblack@eecs.umich.edu>2010-06-02 12:58:16 -0500
commit237c0617a0c095e35169c3f4e48e93eaf4ada527 (patch)
treeb4b3c805611a04dc8bcdc923e0133c071374a4b2 /src/arch/arm
parent04e196f4223b5dfd61782edaaac27166a2bfcf3c (diff)
downloadgem5-237c0617a0c095e35169c3f4e48e93eaf4ada527.tar.xz
ARM: Implement conversion to/from half precision.
Diffstat (limited to 'src/arch/arm')
-rw-r--r--src/arch/arm/insts/vfp.hh217
-rw-r--r--src/arch/arm/isa/formats/fp.isa19
-rw-r--r--src/arch/arm/isa/insts/fp.isa69
3 files changed, 303 insertions, 2 deletions
diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh
index 259bf9c11..37553a5dc 100644
--- a/src/arch/arm/insts/vfp.hh
+++ b/src/arch/arm/insts/vfp.hh
@@ -396,6 +396,223 @@ fixFpSFpDDest(FPSCR fpscr, float val)
return mid;
}
+static inline float
+vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
+{
+ float junk = 0.0;
+ uint32_t destBits = fpToBits(dest);
+ uint32_t opBits = fpToBits(op);
+ // Extract the operand.
+ bool neg = bits(opBits, 31);
+ uint32_t exponent = bits(opBits, 30, 23);
+ uint32_t oldMantissa = bits(opBits, 22, 0);
+ uint32_t mantissa = oldMantissa >> (23 - 10);
+ // Do the conversion.
+ uint32_t extra = oldMantissa & mask(23 - 10);
+ if (exponent == 0xff) {
+ if (oldMantissa != 0) {
+ // Nans.
+ if (bits(mantissa, 9) == 0) {
+ // Signalling nan.
+ fpscr.ioc = 1;
+ }
+ if (fpscr.ahp) {
+ mantissa = 0;
+ exponent = 0;
+ fpscr.ioc = 1;
+ } else if (fpscr.dn) {
+ mantissa = (1 << 9);
+ exponent = 0x1f;
+ neg = false;
+ } else {
+ exponent = 0x1f;
+ mantissa |= (1 << 9);
+ }
+ } else {
+ // Infinities.
+ exponent = 0x1F;
+ if (fpscr.ahp) {
+ fpscr.ioc = 1;
+ mantissa = 0x3ff;
+ } else {
+ mantissa = 0;
+ }
+ }
+ } else if (exponent == 0 && oldMantissa == 0) {
+ // Zero, don't need to do anything.
+ } else {
+ // Normalized or denormalized numbers.
+
+ bool inexact = (extra != 0);
+
+ if (exponent == 0) {
+ // Denormalized.
+
+ // If flush to zero is on, this shouldn't happen.
+ assert(fpscr.fz == 0);
+
+ // Check for underflow
+ if (inexact || fpscr.ufe)
+ fpscr.ufc = 1;
+
+ // Handle rounding.
+ unsigned mode = fpscr.rMode;
+ if ((mode == VfpRoundUpward && !neg && extra) ||
+ (mode == VfpRoundDown && neg && extra) ||
+ (mode == VfpRoundNearest &&
+ (extra > (1 << 9) ||
+ (extra == (1 << 9) && bits(mantissa, 0))))) {
+ mantissa++;
+ }
+
+ // See if the number became normalized after rounding.
+ if (mantissa == (1 << 10)) {
+ mantissa = 0;
+ exponent = 1;
+ }
+ } else {
+ // Normalized.
+
+ // We need to track the dropped bits differently since
+ // more can be dropped by denormalizing.
+ bool topOne = bits(extra, 12);
+ bool restZeros = bits(extra, 11, 0) == 0;
+
+ if (exponent <= (127 - 15)) {
+ // The result is too small. Denormalize.
+ mantissa |= (1 << 10);
+ while (mantissa && exponent <= (127 - 15)) {
+ restZeros = restZeros && !topOne;
+ topOne = bits(mantissa, 0);
+ mantissa = mantissa >> 1;
+ exponent++;
+ }
+ if (topOne || !restZeros)
+ inexact = true;
+ exponent = 0;
+ } else {
+ // Change bias.
+ exponent -= (127 - 15);
+ }
+
+ if (exponent == 0 && (inexact || fpscr.ufe)) {
+ // Underflow
+ fpscr.ufc = 1;
+ }
+
+ // Handle rounding.
+ unsigned mode = fpscr.rMode;
+ bool nonZero = topOne || !restZeros;
+ if ((mode == VfpRoundUpward && !neg && nonZero) ||
+ (mode == VfpRoundDown && neg && nonZero) ||
+ (mode == VfpRoundNearest && topOne &&
+ (!restZeros || bits(mantissa, 0)))) {
+ mantissa++;
+ }
+
+ // See if we rounded up and need to bump the exponent.
+ if (mantissa == (1 << 10)) {
+ mantissa = 0;
+ exponent++;
+ }
+
+ // Deal with overflow
+ if (fpscr.ahp) {
+ if (exponent >= 0x20) {
+ exponent = 0x1f;
+ mantissa = 0x3ff;
+ fpscr.ioc = 1;
+ // Supress inexact exception.
+ inexact = false;
+ }
+ } else {
+ if (exponent >= 0x1f) {
+ if ((mode == VfpRoundNearest) ||
+ (mode == VfpRoundUpward && !neg) ||
+ (mode == VfpRoundDown && neg)) {
+ // Overflow to infinity.
+ exponent = 0x1f;
+ mantissa = 0;
+ } else {
+ // Overflow to max normal.
+ exponent = 0x1e;
+ mantissa = 0x3ff;
+ }
+ fpscr.ofc = 1;
+ inexact = true;
+ }
+ }
+ }
+
+ if (inexact) {
+ fpscr.ixc = 1;
+ }
+ }
+ // Reassemble and install the result.
+ uint32_t result = bits(mantissa, 9, 0);
+ replaceBits(result, 14, 10, exponent);
+ if (neg)
+ result |= (1 << 15);
+ if (top)
+ replaceBits(destBits, 31, 16, result);
+ else
+ replaceBits(destBits, 15, 0, result);
+ return bitsToFp(destBits, junk);
+}
+
+static inline float
+vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
+{
+ float junk = 0.0;
+ uint32_t opBits = fpToBits(op);
+ // Extract the operand.
+ if (top)
+ opBits = bits(opBits, 31, 16);
+ else
+ opBits = bits(opBits, 15, 0);
+ // Extract the bitfields.
+ bool neg = bits(opBits, 15);
+ uint32_t exponent = bits(opBits, 14, 10);
+ uint32_t mantissa = bits(opBits, 9, 0);
+ // Do the conversion.
+ if (exponent == 0) {
+ if (mantissa != 0) {
+ // Normalize the value.
+ exponent = exponent + (127 - 15) + 1;
+ while (mantissa < (1 << 10)) {
+ mantissa = mantissa << 1;
+ exponent--;
+ }
+ }
+ mantissa = mantissa << (23 - 10);
+ } else if (exponent == 0x1f && !fpscr.ahp) {
+ // Infinities and nans.
+ exponent = 0xff;
+ if (mantissa != 0) {
+ // Nans.
+ mantissa = mantissa << (23 - 10);
+ if (bits(mantissa, 22) == 0) {
+ // Signalling nan.
+ fpscr.ioc = 1;
+ mantissa |= (1 << 22);
+ }
+ if (fpscr.dn) {
+ mantissa &= ~mask(22);
+ neg = false;
+ }
+ }
+ } else {
+ exponent = exponent + (127 - 15);
+ mantissa = mantissa << (23 - 10);
+ }
+ // Reassemble the result.
+ uint32_t result = bits(mantissa, 22, 0);
+ replaceBits(result, 30, 23, exponent);
+ if (neg)
+ result |= (1 << 31);
+ return bitsToFp(result, junk);
+}
+
static inline double
makeDouble(uint32_t low, uint32_t high)
{
diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa
index d509fc28a..03e574648 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -655,8 +655,23 @@ let {{
}
case 0x2:
case 0x3:
- // Between half and single precision.
- return new WarnUnimplemented("vcvtb, vcvtt", machInst);
+ {
+ const bool toHalf = bits(machInst, 16);
+ const bool top = bits(machInst, 7);
+ if (top) {
+ if (toHalf) {
+ return new VcvtFpSFpHT(machInst, vd, vm);
+ } else {
+ return new VcvtFpHTFpS(machInst, vd, vm);
+ }
+ } else {
+ if (toHalf) {
+ return new VcvtFpSFpHB(machInst, vd, vm);
+ } else {
+ return new VcvtFpHBFpS(machInst, vd, vm);
+ }
+ }
+ }
case 0x4:
if (single) {
if (e) {
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa
index bee63d671..c4682b66c 100644
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -912,6 +912,75 @@ let {{
decoder_output += FpRegRegOpConstructor.subst(vcvtFpDFpSIop);
exec_output += PredOpExecute.subst(vcvtFpDFpSIop);
+ vcvtFpHTFpSCode = '''
+ FPSCR fpscr = Fpscr;
+ vfpFlushToZero(fpscr, FpOp1);
+ VfpSavedState state = prepFpState(fpscr.rMode);
+ __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
+ FpDest = vcvtFpHFpS(fpscr, FpOp1, true);
+ __asm__ __volatile__("" :: "m" (FpDest));
+ finishVfp(fpscr, state);
+ Fpscr = fpscr;
+ '''
+ vcvtFpHTFpSIop = InstObjParams("vcvtt", "VcvtFpHTFpS", "FpRegRegOp",
+ { "code": vcvtFpHTFpSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += FpRegRegOpDeclare.subst(vcvtFpHTFpSIop);
+ decoder_output += FpRegRegOpConstructor.subst(vcvtFpHTFpSIop);
+ exec_output += PredOpExecute.subst(vcvtFpHTFpSIop);
+
+ vcvtFpHBFpSCode = '''
+ FPSCR fpscr = Fpscr;
+ VfpSavedState state = prepFpState(fpscr.rMode);
+ __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
+ FpDest = vcvtFpHFpS(fpscr, FpOp1, false);
+ __asm__ __volatile__("" :: "m" (FpDest));
+ finishVfp(fpscr, state);
+ Fpscr = fpscr;
+ '''
+ vcvtFpHBFpSIop = InstObjParams("vcvtb", "VcvtFpHBFpS", "FpRegRegOp",
+ { "code": vcvtFpHBFpSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += FpRegRegOpDeclare.subst(vcvtFpHBFpSIop);
+ decoder_output += FpRegRegOpConstructor.subst(vcvtFpHBFpSIop);
+ exec_output += PredOpExecute.subst(vcvtFpHBFpSIop);
+
+ vcvtFpSFpHTCode = '''
+ FPSCR fpscr = Fpscr;
+ vfpFlushToZero(fpscr, FpOp1);
+ VfpSavedState state = prepFpState(fpscr.rMode);
+ __asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest)
+ : "m" (FpOp1), "m" (FpDest));
+ FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, true);
+ __asm__ __volatile__("" :: "m" (FpDest));
+ finishVfp(fpscr, state);
+ Fpscr = fpscr;
+ '''
+ vcvtFpSFpHTIop = InstObjParams("vcvtt", "VcvtFpSFpHT", "FpRegRegOp",
+ { "code": vcvtFpHTFpSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += FpRegRegOpDeclare.subst(vcvtFpSFpHTIop);
+ decoder_output += FpRegRegOpConstructor.subst(vcvtFpSFpHTIop);
+ exec_output += PredOpExecute.subst(vcvtFpSFpHTIop);
+
+ vcvtFpSFpHBCode = '''
+ FPSCR fpscr = Fpscr;
+ vfpFlushToZero(fpscr, FpOp1);
+ VfpSavedState state = prepFpState(fpscr.rMode);
+ __asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest)
+ : "m" (FpOp1), "m" (FpDest));
+ FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, false);
+ __asm__ __volatile__("" :: "m" (FpDest));
+ finishVfp(fpscr, state);
+ Fpscr = fpscr;
+ '''
+ vcvtFpSFpHBIop = InstObjParams("vcvtb", "VcvtFpSFpHB", "FpRegRegOp",
+ { "code": vcvtFpSFpHBCode,
+ "predicate_test": predicateTest }, [])
+ header_output += FpRegRegOpDeclare.subst(vcvtFpSFpHBIop);
+ decoder_output += FpRegRegOpConstructor.subst(vcvtFpSFpHBIop);
+ exec_output += PredOpExecute.subst(vcvtFpSFpHBIop);
+
vcmpSCode = '''
FPSCR fpscr = Fpscr;
vfpFlushToZero(fpscr, FpDest, FpOp1);