summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGabe Black <gblack@eecs.umich.edu>2010-06-02 12:58:14 -0500
committerGabe Black <gblack@eecs.umich.edu>2010-06-02 12:58:14 -0500
commit80fa3a7ccfd930b87c9702f33e0f8461c1eb9e5b (patch)
tree1cd8731595cd7205854ce7810646b4083016de93
parent3111a6216924643c332cd8ae3ebdd66fcbbf2c0f (diff)
downloadgem5-80fa3a7ccfd930b87c9702f33e0f8461c1eb9e5b.tar.xz
ARM: Implement the VFP negated multiplies.
-rw-r--r--src/arch/arm/isa/formats/fp.isa64
-rw-r--r--src/arch/arm/isa/insts/fp.isa105
2 files changed, 167 insertions, 2 deletions
diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa
index 9bb062a2e..850f761d7 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -524,6 +524,48 @@ let {{
(IntRegIndex)vn, (IntRegIndex)vm);
}
}
+ case 0x1:
+ if (bits(machInst, 6) == 1) {
+ uint32_t vd;
+ uint32_t vm;
+ uint32_t vn;
+ if (bits(machInst, 8) == 0) {
+ vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1);
+ vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1);
+ vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1);
+ return new VnmlaS(machInst, (IntRegIndex)vd,
+ (IntRegIndex)vn, (IntRegIndex)vm);
+ } else {
+ vd = (bits(machInst, 22) << 5) |
+ (bits(machInst, 15, 12) << 1);
+ vm = (bits(machInst, 5) << 5) |
+ (bits(machInst, 3, 0) << 1);
+ vn = (bits(machInst, 7) << 5) |
+ (bits(machInst, 19, 16) << 1);
+ return new VnmlaD(machInst, (IntRegIndex)vd,
+ (IntRegIndex)vn, (IntRegIndex)vm);
+ }
+ } else {
+ uint32_t vd;
+ uint32_t vm;
+ uint32_t vn;
+ if (bits(machInst, 8) == 0) {
+ vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1);
+ vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1);
+ vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1);
+ return new VnmlsS(machInst, (IntRegIndex)vd,
+ (IntRegIndex)vn, (IntRegIndex)vm);
+ } else {
+ vd = (bits(machInst, 22) << 5) |
+ (bits(machInst, 15, 12) << 1);
+ vm = (bits(machInst, 5) << 5) |
+ (bits(machInst, 3, 0) << 1);
+ vn = (bits(machInst, 7) << 5) |
+ (bits(machInst, 19, 16) << 1);
+ return new VnmlsD(machInst, (IntRegIndex)vd,
+ (IntRegIndex)vn, (IntRegIndex)vm);
+ }
+ }
case 0x2:
if ((opc3 & 0x1) == 0) {
uint32_t vd;
@@ -545,9 +587,27 @@ let {{
return new VmulD(machInst, (IntRegIndex)vd,
(IntRegIndex)vn, (IntRegIndex)vm);
}
+ } else {
+ uint32_t vd;
+ uint32_t vm;
+ uint32_t vn;
+ if (bits(machInst, 8) == 0) {
+ vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1);
+ vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1);
+ vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1);
+ return new VnmulS(machInst, (IntRegIndex)vd,
+ (IntRegIndex)vn, (IntRegIndex)vm);
+ } else {
+ vd = (bits(machInst, 22) << 5) |
+ (bits(machInst, 15, 12) << 1);
+ vm = (bits(machInst, 5) << 5) |
+ (bits(machInst, 3, 0) << 1);
+ vn = (bits(machInst, 7) << 5) |
+ (bits(machInst, 19, 16) << 1);
+ return new VnmulD(machInst, (IntRegIndex)vd,
+ (IntRegIndex)vn, (IntRegIndex)vm);
+ }
}
- case 0x1:
- return new WarnUnimplemented("vnmla, vnmls, vnmul", machInst);
case 0x3:
if ((opc3 & 0x1) == 0) {
uint32_t vd;
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa
index 58c2cafa7..d40b00176 100644
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -481,4 +481,109 @@ let {{
header_output += RegRegRegOpDeclare.subst(vmlsDIop);
decoder_output += RegRegRegOpConstructor.subst(vmlsDIop);
exec_output += PredOpExecute.subst(vmlsDIop);
+
+ vnmlaSCode = '''
+ float mid = FpOp1 * FpOp2;
+ if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
+ mid = NAN;
+ }
+ FpDest = -FpDest - mid;
+ '''
+ vnmlaSIop = InstObjParams("vnmlas", "VnmlaS", "RegRegRegOp",
+ { "code": vnmlaSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += RegRegRegOpDeclare.subst(vnmlaSIop);
+ decoder_output += RegRegRegOpConstructor.subst(vnmlaSIop);
+ exec_output += PredOpExecute.subst(vnmlaSIop);
+
+ vnmlaDCode = '''
+ IntDoubleUnion cOp1, cOp2, cDest;
+ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+ cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+ double mid = cOp1.fp * cOp2.fp;
+ if ((isinf(cOp1.fp) && cOp2.fp == 0) ||
+ (isinf(cOp2.fp) && cOp1.fp == 0)) {
+ mid = NAN;
+ }
+ cDest.fp = -cDest.fp - mid;
+ FpDestP0.uw = cDest.bits;
+ FpDestP1.uw = cDest.bits >> 32;
+ '''
+ vnmlaDIop = InstObjParams("vnmlad", "VnmlaD", "RegRegRegOp",
+ { "code": vnmlaDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += RegRegRegOpDeclare.subst(vnmlaDIop);
+ decoder_output += RegRegRegOpConstructor.subst(vnmlaDIop);
+ exec_output += PredOpExecute.subst(vnmlaDIop);
+
+ vnmlsSCode = '''
+ float mid = FpOp1 * FpOp2;
+ if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
+ mid = NAN;
+ }
+ FpDest = -FpDest + mid;
+ '''
+ vnmlsSIop = InstObjParams("vnmlss", "VnmlsS", "RegRegRegOp",
+ { "code": vnmlsSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += RegRegRegOpDeclare.subst(vnmlsSIop);
+ decoder_output += RegRegRegOpConstructor.subst(vnmlsSIop);
+ exec_output += PredOpExecute.subst(vnmlsSIop);
+
+ vnmlsDCode = '''
+ IntDoubleUnion cOp1, cOp2, cDest;
+ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+ cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+ double mid = cOp1.fp * cOp2.fp;
+ if ((isinf(cOp1.fp) && cOp2.fp == 0) ||
+ (isinf(cOp2.fp) && cOp1.fp == 0)) {
+ mid = NAN;
+ }
+ cDest.fp = -cDest.fp + mid;
+ FpDestP0.uw = cDest.bits;
+ FpDestP1.uw = cDest.bits >> 32;
+ '''
+ vnmlsDIop = InstObjParams("vnmlsd", "VnmlsD", "RegRegRegOp",
+ { "code": vnmlsDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += RegRegRegOpDeclare.subst(vnmlsDIop);
+ decoder_output += RegRegRegOpConstructor.subst(vnmlsDIop);
+ exec_output += PredOpExecute.subst(vnmlsDIop);
+
+ vnmulSCode = '''
+ float mid = FpOp1 * FpOp2;
+ if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
+ mid = NAN;
+ }
+ FpDest = -mid;
+ '''
+ vnmulSIop = InstObjParams("vnmuls", "VnmulS", "RegRegRegOp",
+ { "code": vnmulSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += RegRegRegOpDeclare.subst(vnmulSIop);
+ decoder_output += RegRegRegOpConstructor.subst(vnmulSIop);
+ exec_output += PredOpExecute.subst(vnmulSIop);
+
+ vnmulDCode = '''
+ IntDoubleUnion cOp1, cOp2, cDest;
+ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+ cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+ double mid = cOp1.fp * cOp2.fp;
+ if ((isinf(cOp1.fp) && cOp2.fp == 0) ||
+ (isinf(cOp2.fp) && cOp1.fp == 0)) {
+ mid = NAN;
+ }
+ cDest.fp = -mid;
+ FpDestP0.uw = cDest.bits;
+ FpDestP1.uw = cDest.bits >> 32;
+ '''
+ vnmulDIop = InstObjParams("vnmuld", "VnmulD", "RegRegRegOp",
+ { "code": vnmulDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += RegRegRegOpDeclare.subst(vnmulDIop);
+ decoder_output += RegRegRegOpConstructor.subst(vnmulDIop);
+ exec_output += PredOpExecute.subst(vnmulDIop);
}};