summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGabe Black <gblack@eecs.umich.edu>2010-06-02 12:58:15 -0500
committerGabe Black <gblack@eecs.umich.edu>2010-06-02 12:58:15 -0500
commitaa05e5401c37c7e60f28e13f8e6de5c5f74e904d (patch)
tree29f57669d184808df45280d9a7928899803a7ca3
parent86a1093992e686cac81db2555c477e2b4cad0c63 (diff)
downloadgem5-aa05e5401c37c7e60f28e13f8e6de5c5f74e904d.tar.xz
ARM: Implement the floating/fixed point VCVT instructions.
-rw-r--r--src/arch/arm/insts/vfp.hh144
-rw-r--r--src/arch/arm/isa/formats/fp.isa84
-rw-r--r--src/arch/arm/isa/insts/fp.isa239
3 files changed, 463 insertions, 4 deletions
diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh
index 465384304..ceeaaa3cd 100644
--- a/src/arch/arm/insts/vfp.hh
+++ b/src/arch/arm/insts/vfp.hh
@@ -101,6 +101,150 @@ enum VfpRoundingMode
VfpRoundZero = 3
};
+static inline uint64_t
+vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
+{
+ fesetround(FeRoundZero);
+ val = val * powf(2.0, imm);
+ __asm__ __volatile__("" : "=m" (val) : "m" (val));
+ feclearexcept(FeAllExceptions);
+ if (isSigned) {
+ if (half) {
+ if (val < (int16_t)(1 << 15)) {
+ feraiseexcept(FeInvalid);
+ return (int16_t)(1 << 15);
+ }
+ if (val > (int16_t)mask(15)) {
+ feraiseexcept(FeInvalid);
+ return (int16_t)mask(15);
+ }
+ return (int16_t)val;
+ } else {
+ if (val < (int32_t)(1 << 31)) {
+ feraiseexcept(FeInvalid);
+ return (int32_t)(1 << 31);
+ }
+ if (val > (int32_t)mask(31)) {
+ feraiseexcept(FeInvalid);
+ return (int32_t)mask(31);
+ }
+ return (int32_t)val;
+ }
+ } else {
+ if (half) {
+ if (val < 0) {
+ feraiseexcept(FeInvalid);
+ return 0;
+ }
+ if (val > (mask(16))) {
+ feraiseexcept(FeInvalid);
+ return mask(16);
+ }
+ return (uint16_t)val;
+ } else {
+ if (val < 0) {
+ feraiseexcept(FeInvalid);
+ return 0;
+ }
+ if (val > (mask(32))) {
+ feraiseexcept(FeInvalid);
+ return mask(32);
+ }
+ return (uint32_t)val;
+ }
+ }
+}
+
+static inline float
+vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm)
+{
+ fesetround(FeRoundNearest);
+ if (half)
+ val = (uint16_t)val;
+ return val / powf(2.0, imm);
+}
+
+static inline float
+vfpSFixedToFpS(int32_t val, bool half, uint8_t imm)
+{
+ fesetround(FeRoundNearest);
+ if (half)
+ val = sext<16>(val & mask(16));
+ return val / powf(2.0, imm);
+}
+
+static inline uint64_t
+vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
+{
+ fesetround(FeRoundZero);
+ val = val * pow(2.0, imm);
+ __asm__ __volatile__("" : "=m" (val) : "m" (val));
+ feclearexcept(FeAllExceptions);
+ if (isSigned) {
+ if (half) {
+ if (val < (int16_t)(1 << 15)) {
+ feraiseexcept(FeInvalid);
+ return (int16_t)(1 << 15);
+ }
+ if (val > (int16_t)mask(15)) {
+ feraiseexcept(FeInvalid);
+ return (int16_t)mask(15);
+ }
+ return (int16_t)val;
+ } else {
+ if (val < (int32_t)(1 << 31)) {
+ feraiseexcept(FeInvalid);
+ return (int32_t)(1 << 31);
+ }
+ if (val > (int32_t)mask(31)) {
+ feraiseexcept(FeInvalid);
+ return (int32_t)mask(31);
+ }
+ return (int32_t)val;
+ }
+ } else {
+ if (half) {
+ if (val < 0) {
+ feraiseexcept(FeInvalid);
+ return 0;
+ }
+ if (val > mask(16)) {
+ feraiseexcept(FeInvalid);
+ return mask(16);
+ }
+ return (uint16_t)val;
+ } else {
+ if (val < 0) {
+ feraiseexcept(FeInvalid);
+ return 0;
+ }
+ if (val > mask(32)) {
+ feraiseexcept(FeInvalid);
+ return mask(32);
+ }
+ return (uint32_t)val;
+ }
+ }
+}
+
+static inline double
+vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm)
+{
+ fesetround(FeRoundNearest);
+ if (half)
+ val = (uint16_t)val;
+ return val / pow(2.0, imm);
+}
+
+static inline double
+vfpSFixedToFpD(int32_t val, bool half, uint8_t imm)
+{
+ fesetround(FeRoundNearest);
+ if (half)
+ val = sext<16>(val & mask(16));
+ return val / pow(2.0, imm);
+}
+
typedef int VfpSavedState;
static inline VfpSavedState
diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa
index e553b180d..3d40caf9e 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -683,9 +683,47 @@ let {{
}
}
case 0xa:
+ {
+ const bool half = (bits(machInst, 7) == 0);
+ const uint32_t imm = bits(machInst, 5) |
+ (bits(machInst, 3, 0) << 1);
+ const uint32_t size =
+ (bits(machInst, 7) == 0 ? 16 : 32) - imm;
+ if (single) {
+ if (half) {
+ return new VcvtSHFixedFpS(machInst, vd, vd, size);
+ } else {
+ return new VcvtSFixedFpS(machInst, vd, vd, size);
+ }
+ } else {
+ if (half) {
+ return new VcvtSHFixedFpD(machInst, vd, vd, size);
+ } else {
+ return new VcvtSFixedFpD(machInst, vd, vd, size);
+ }
+ }
+ }
case 0xb:
- // Between FP and fixed point.
- return new WarnUnimplemented("vcvt", machInst);
+ {
+ const bool half = (bits(machInst, 7) == 0);
+ const uint32_t imm = bits(machInst, 5) |
+ (bits(machInst, 3, 0) << 1);
+ const uint32_t size =
+ (bits(machInst, 7) == 0 ? 16 : 32) - imm;
+ if (single) {
+ if (half) {
+ return new VcvtUHFixedFpS(machInst, vd, vd, size);
+ } else {
+ return new VcvtUFixedFpS(machInst, vd, vd, size);
+ }
+ } else {
+ if (half) {
+ return new VcvtUHFixedFpD(machInst, vd, vd, size);
+ } else {
+ return new VcvtUFixedFpD(machInst, vd, vd, size);
+ }
+ }
+ }
case 0xc:
if (single) {
return new VcvtFpUIntS(machInst, vd, vm);
@@ -703,9 +741,47 @@ let {{
return new VcvtFpSIntD(machInst, vd, vm);
}
case 0xe:
+ {
+ const bool half = (bits(machInst, 7) == 0);
+ const uint32_t imm = bits(machInst, 5) |
+ (bits(machInst, 3, 0) << 1);
+ const uint32_t size =
+ (bits(machInst, 7) == 0 ? 16 : 32) - imm;
+ if (single) {
+ if (half) {
+ return new VcvtFpSHFixedS(machInst, vd, vd, size);
+ } else {
+ return new VcvtFpSFixedS(machInst, vd, vd, size);
+ }
+ } else {
+ if (half) {
+ return new VcvtFpSHFixedD(machInst, vd, vd, size);
+ } else {
+ return new VcvtFpSFixedD(machInst, vd, vd, size);
+ }
+ }
+ }
case 0xf:
- // Between FP and fixed point.
- return new WarnUnimplemented("vcvt", machInst);
+ {
+ const bool half = (bits(machInst, 7) == 0);
+ const uint32_t imm = bits(machInst, 5) |
+ (bits(machInst, 3, 0) << 1);
+ const uint32_t size =
+ (bits(machInst, 7) == 0 ? 16 : 32) - imm;
+ if (single) {
+ if (half) {
+ return new VcvtFpUHFixedS(machInst, vd, vd, size);
+ } else {
+ return new VcvtFpUFixedS(machInst, vd, vd, size);
+ }
+ } else {
+ if (half) {
+ return new VcvtFpUHFixedD(machInst, vd, vd, size);
+ } else {
+ return new VcvtFpUFixedD(machInst, vd, vd, size);
+ }
+ }
+ }
}
break;
}
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa
index 1a8f25c5a..db1c5bf6b 100644
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -997,3 +997,242 @@ let {{
decoder_output += VfpRegImmOpConstructor.subst(vcmpZeroDIop);
exec_output += PredOpExecute.subst(vcmpZeroDIop);
}};
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ vcvtFpSFixedSCode = '''
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ FpDest.sw = vfpFpSToFixed(FpOp1, true, false, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ '''
+ vcvtFpSFixedSIop = InstObjParams("vcvt", "VcvtFpSFixedS", "VfpRegRegImmOp",
+ { "code": vcvtFpSFixedSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSFixedSIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSFixedSIop);
+ exec_output += PredOpExecute.subst(vcvtFpSFixedSIop);
+
+ vcvtFpSFixedDCode = '''
+ IntDoubleUnion cOp1;
+ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ uint64_t mid = vfpFpDToFixed(cOp1.fp, true, false, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ FpDestP0.uw = mid;
+ FpDestP1.uw = mid >> 32;
+ '''
+ vcvtFpSFixedDIop = InstObjParams("vcvt", "VcvtFpSFixedD", "VfpRegRegImmOp",
+ { "code": vcvtFpSFixedDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSFixedDIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSFixedDIop);
+ exec_output += PredOpExecute.subst(vcvtFpSFixedDIop);
+
+ vcvtFpUFixedSCode = '''
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ FpDest.uw = vfpFpSToFixed(FpOp1, false, false, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ '''
+ vcvtFpUFixedSIop = InstObjParams("vcvt", "VcvtFpUFixedS", "VfpRegRegImmOp",
+ { "code": vcvtFpUFixedSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUFixedSIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUFixedSIop);
+ exec_output += PredOpExecute.subst(vcvtFpUFixedSIop);
+
+ vcvtFpUFixedDCode = '''
+ IntDoubleUnion cOp1;
+ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ uint64_t mid = vfpFpDToFixed(cOp1.fp, false, false, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ FpDestP0.uw = mid;
+ FpDestP1.uw = mid >> 32;
+ '''
+ vcvtFpUFixedDIop = InstObjParams("vcvt", "VcvtFpUFixedD", "VfpRegRegImmOp",
+ { "code": vcvtFpUFixedDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUFixedDIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUFixedDIop);
+ exec_output += PredOpExecute.subst(vcvtFpUFixedDIop);
+
+ vcvtSFixedFpSCode = '''
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ FpDest = vfpSFixedToFpS(FpOp1.sw, true, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ '''
+ vcvtSFixedFpSIop = InstObjParams("vcvt", "VcvtSFixedFpS", "VfpRegRegImmOp",
+ { "code": vcvtSFixedFpSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtSFixedFpSIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSFixedFpSIop);
+ exec_output += PredOpExecute.subst(vcvtSFixedFpSIop);
+
+ vcvtSFixedFpDCode = '''
+ IntDoubleUnion cDest;
+ uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ cDest.fp = vfpSFixedToFpD(mid, true, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ FpDestP0.uw = cDest.bits;
+ FpDestP1.uw = cDest.bits >> 32;
+ '''
+ vcvtSFixedFpDIop = InstObjParams("vcvt", "VcvtSFixedFpD", "VfpRegRegImmOp",
+ { "code": vcvtSFixedFpDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtSFixedFpDIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSFixedFpDIop);
+ exec_output += PredOpExecute.subst(vcvtSFixedFpDIop);
+
+ vcvtUFixedFpSCode = '''
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ FpDest = vfpUFixedToFpS(FpOp1.uw, false, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ '''
+ vcvtUFixedFpSIop = InstObjParams("vcvt", "VcvtUFixedFpS", "VfpRegRegImmOp",
+ { "code": vcvtUFixedFpSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtUFixedFpSIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUFixedFpSIop);
+ exec_output += PredOpExecute.subst(vcvtUFixedFpSIop);
+
+ vcvtUFixedFpDCode = '''
+ IntDoubleUnion cDest;
+ uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ cDest.fp = vfpUFixedToFpD(mid, false, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ FpDestP0.uw = cDest.bits;
+ FpDestP1.uw = cDest.bits >> 32;
+ '''
+ vcvtUFixedFpDIop = InstObjParams("vcvt", "VcvtUFixedFpD", "VfpRegRegImmOp",
+ { "code": vcvtUFixedFpDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtUFixedFpDIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUFixedFpDIop);
+ exec_output += PredOpExecute.subst(vcvtUFixedFpDIop);
+
+ vcvtFpSHFixedSCode = '''
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ FpDest.sh = vfpFpSToFixed(FpOp1, true, true, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ '''
+ vcvtFpSHFixedSIop = InstObjParams("vcvt", "VcvtFpSHFixedS",
+ "VfpRegRegImmOp",
+ { "code": vcvtFpSHFixedSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSHFixedSIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSHFixedSIop);
+ exec_output += PredOpExecute.subst(vcvtFpSHFixedSIop);
+
+ vcvtFpSHFixedDCode = '''
+ IntDoubleUnion cOp1;
+ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ uint64_t result = vfpFpDToFixed(cOp1.fp, true, true, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ FpDestP0.uw = result;
+ FpDestP1.uw = result >> 32;
+ '''
+ vcvtFpSHFixedDIop = InstObjParams("vcvt", "VcvtFpSHFixedD",
+ "VfpRegRegImmOp",
+ { "code": vcvtFpSHFixedDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSHFixedDIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSHFixedDIop);
+ exec_output += PredOpExecute.subst(vcvtFpSHFixedDIop);
+
+ vcvtFpUHFixedSCode = '''
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ FpDest.uh = vfpFpSToFixed(FpOp1, false, true, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ '''
+ vcvtFpUHFixedSIop = InstObjParams("vcvt", "VcvtFpUHFixedS",
+ "VfpRegRegImmOp",
+ { "code": vcvtFpUHFixedSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUHFixedSIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUHFixedSIop);
+ exec_output += PredOpExecute.subst(vcvtFpUHFixedSIop);
+
+ vcvtFpUHFixedDCode = '''
+ IntDoubleUnion cOp1;
+ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ uint64_t mid = vfpFpDToFixed(cOp1.fp, false, true, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ FpDestP0.uw = mid;
+ FpDestP1.uw = mid >> 32;
+ '''
+ vcvtFpUHFixedDIop = InstObjParams("vcvt", "VcvtFpUHFixedD",
+ "VfpRegRegImmOp",
+ { "code": vcvtFpUHFixedDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUHFixedDIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUHFixedDIop);
+ exec_output += PredOpExecute.subst(vcvtFpUHFixedDIop);
+
+ vcvtSHFixedFpSCode = '''
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ FpDest = vfpSFixedToFpS(FpOp1.sh, true, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ '''
+ vcvtSHFixedFpSIop = InstObjParams("vcvt", "VcvtSHFixedFpS",
+ "VfpRegRegImmOp",
+ { "code": vcvtSHFixedFpSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtSHFixedFpSIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSHFixedFpSIop);
+ exec_output += PredOpExecute.subst(vcvtSHFixedFpSIop);
+
+ vcvtSHFixedFpDCode = '''
+ IntDoubleUnion cDest;
+ uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ cDest.fp = vfpSFixedToFpD(mid, true, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ FpDestP0.uw = cDest.bits;
+ FpDestP1.uw = cDest.bits >> 32;
+ '''
+ vcvtSHFixedFpDIop = InstObjParams("vcvt", "VcvtSHFixedFpD",
+ "VfpRegRegImmOp",
+ { "code": vcvtSHFixedFpDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtSHFixedFpDIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSHFixedFpDIop);
+ exec_output += PredOpExecute.subst(vcvtSHFixedFpDIop);
+
+ vcvtUHFixedFpSCode = '''
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ FpDest = vfpUFixedToFpS(FpOp1.uh, true, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ '''
+ vcvtUHFixedFpSIop = InstObjParams("vcvt", "VcvtUHFixedFpS",
+ "VfpRegRegImmOp",
+ { "code": vcvtUHFixedFpSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtUHFixedFpSIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUHFixedFpSIop);
+ exec_output += PredOpExecute.subst(vcvtUHFixedFpSIop);
+
+ vcvtUHFixedFpDCode = '''
+ IntDoubleUnion cDest;
+ uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ VfpSavedState state = prepVfpFpscr(Fpscr);
+ cDest.fp = vfpUFixedToFpD(mid, true, imm);
+ Fpscr = setVfpFpscr(Fpscr, state);
+ FpDestP0.uw = cDest.bits;
+ FpDestP1.uw = cDest.bits >> 32;
+ '''
+ vcvtUHFixedFpDIop = InstObjParams("vcvt", "VcvtUHFixedFpD",
+ "VfpRegRegImmOp",
+ { "code": vcvtUHFixedFpDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegImmOpDeclare.subst(vcvtUHFixedFpDIop);
+ decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUHFixedFpDIop);
+ exec_output += PredOpExecute.subst(vcvtUHFixedFpDIop);
+}};