summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGabe Black <gblack@eecs.umich.edu>2010-06-02 12:58:15 -0500
committerGabe Black <gblack@eecs.umich.edu>2010-06-02 12:58:15 -0500
commit49b7088b9101dfabd236c9cf76b700fade70c265 (patch)
treec15733883b0f259e67a6d11c98cde1386c9dd79c
parent23ba9c7b965ebf2a54a8d399809eb400fc6fe6db (diff)
downloadgem5-49b7088b9101dfabd236c9cf76b700fade70c265.tar.xz
ARM: Implement the VCMPE instruction.
-rw-r--r--src/arch/arm/isa/formats/fp.isa26
-rw-r--r--src/arch/arm/isa/insts/fp.isa138
2 files changed, 158 insertions, 6 deletions
diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa
index 2b999f751..83f541584 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -482,6 +482,8 @@ let {{
const uint32_t opc3 = bits(machInst, 7, 6);
//const uint32_t opc4 = bits(machInst, 3, 0);
const bool single = (bits(machInst, 8) == 0);
+ // Used to select between vcmp and vcmpe.
+ const bool e = (bits(machInst, 7) == 1);
IntRegIndex vd;
IntRegIndex vm;
IntRegIndex vn;
@@ -641,15 +643,31 @@ let {{
return new WarnUnimplemented("vcvtb, vcvtt", machInst);
case 0x4:
if (single) {
- return new VcmpS(machInst, vd, vm);
+ if (e) {
+ return new VcmpeS(machInst, vd, vm);
+ } else {
+ return new VcmpS(machInst, vd, vm);
+ }
} else {
- return new VcmpD(machInst, vd, vm);
+ if (e) {
+ return new VcmpeD(machInst, vd, vm);
+ } else {
+ return new VcmpD(machInst, vd, vm);
+ }
}
case 0x5:
if (single) {
- return new VcmpZeroS(machInst, vd, 0);
+ if (e) {
+ return new VcmpeZeroS(machInst, vd, 0);
+ } else {
+ return new VcmpZeroS(machInst, vd, 0);
+ }
} else {
- return new VcmpZeroD(machInst, vd, 0);
+ if (e) {
+ return new VcmpeZeroD(machInst, vd, 0);
+ } else {
+ return new VcmpZeroD(machInst, vd, 0);
+ }
}
case 0x7:
if (opc3 == 0x3) {
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa
index 045f516ce..0abae6a20 100644
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -1101,8 +1101,8 @@ let {{
exec_output += PredOpExecute.subst(vcvtFpDFpSIop);
vcmpSCode = '''
- FPSCR fpscr = Fpscr;
vfpFlushToZero(Fpscr, FpDest, FpOp1);
+ FPSCR fpscr = Fpscr;
if (FpDest == FpOp1) {
fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
} else if (FpDest < FpOp1) {
@@ -1110,6 +1110,20 @@ let {{
} else if (FpDest > FpOp1) {
fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 0;
} else {
+ const uint32_t qnan = 0x7fc00000;
+ union
+ {
+ float fp;
+ uint32_t bits;
+ } cvtr;
+ cvtr.fp = FpDest;
+ const bool nan1 = std::isnan(FpDest);
+ const bool signal1 = nan1 && ((cvtr.bits & qnan) != qnan);
+ cvtr.fp = FpOp1;
+ const bool nan2 = std::isnan(FpOp1);
+ const bool signal2 = nan2 && ((cvtr.bits & qnan) != qnan);
+ if (signal1 || signal2)
+ fpscr.ioc = 1;
fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1;
}
Fpscr = fpscr;
@@ -1134,6 +1148,13 @@ let {{
} else if (cDest.fp > cOp1.fp) {
fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 0;
} else {
+ const uint64_t qnan = ULL(0x7ff8000000000000);
+ const bool nan1 = std::isnan(cDest.fp);
+ const bool signal1 = nan1 && ((cDest.bits & qnan) != qnan);
+ const bool nan2 = std::isnan(cOp1.fp);
+ const bool signal2 = nan2 && ((cOp1.bits & qnan) != qnan);
+ if (signal1 || signal2)
+ fpscr.ioc = 1;
fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1;
}
Fpscr = fpscr;
@@ -1146,8 +1167,10 @@ let {{
exec_output += PredOpExecute.subst(vcmpDIop);
vcmpZeroSCode = '''
- FPSCR fpscr = Fpscr;
vfpFlushToZero(Fpscr, FpDest);
+ FPSCR fpscr = Fpscr;
+ // This only handles imm == 0 for now.
+ assert(imm == 0);
if (FpDest == imm) {
fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
} else if (FpDest < imm) {
@@ -1155,6 +1178,17 @@ let {{
} else if (FpDest > imm) {
fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 0;
} else {
+ const uint32_t qnan = 0x7fc00000;
+ union
+ {
+ float fp;
+ uint32_t bits;
+ } cvtr;
+ cvtr.fp = FpDest;
+ const bool nan = std::isnan(FpDest);
+ const bool signal = nan && ((cvtr.bits & qnan) != qnan);
+ if (signal)
+ fpscr.ioc = 1;
fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1;
}
Fpscr = fpscr;
@@ -1168,6 +1202,8 @@ let {{
vcmpZeroDCode = '''
IntDoubleUnion cDest;
+ // This only handles imm == 0 for now.
+ assert(imm == 0);
cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
vfpFlushToZero(Fpscr, cDest.fp);
FPSCR fpscr = Fpscr;
@@ -1178,6 +1214,11 @@ let {{
} else if (cDest.fp > imm) {
fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 0;
} else {
+ const uint64_t qnan = ULL(0x7ff8000000000000);
+ const bool nan = std::isnan(cDest.fp);
+ const bool signal = nan && ((cDest.bits & qnan) != qnan);
+ if (signal)
+ fpscr.ioc = 1;
fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1;
}
Fpscr = fpscr;
@@ -1188,6 +1229,99 @@ let {{
header_output += VfpRegImmOpDeclare.subst(vcmpZeroDIop);
decoder_output += VfpRegImmOpConstructor.subst(vcmpZeroDIop);
exec_output += PredOpExecute.subst(vcmpZeroDIop);
+
+ vcmpeSCode = '''
+ vfpFlushToZero(Fpscr, FpDest, FpOp1);
+ FPSCR fpscr = Fpscr;
+ if (FpDest == FpOp1) {
+ fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
+ } else if (FpDest < FpOp1) {
+ fpscr.n = 1; fpscr.z = 0; fpscr.c = 0; fpscr.v = 0;
+ } else if (FpDest > FpOp1) {
+ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 0;
+ } else {
+ fpscr.ioc = 1;
+ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ vcmpeSIop = InstObjParams("vcmpes", "VcmpeS", "VfpRegRegOp",
+ { "code": vcmpeSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegOpDeclare.subst(vcmpeSIop);
+ decoder_output += VfpRegRegOpConstructor.subst(vcmpeSIop);
+ exec_output += PredOpExecute.subst(vcmpeSIop);
+
+ vcmpeDCode = '''
+ IntDoubleUnion cOp1, cDest;
+ cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+ cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+ vfpFlushToZero(Fpscr, cDest.fp, cOp1.fp);
+ FPSCR fpscr = Fpscr;
+ if (cDest.fp == cOp1.fp) {
+ fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
+ } else if (cDest.fp < cOp1.fp) {
+ fpscr.n = 1; fpscr.z = 0; fpscr.c = 0; fpscr.v = 0;
+ } else if (cDest.fp > cOp1.fp) {
+ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 0;
+ } else {
+ fpscr.ioc = 1;
+ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ vcmpeDIop = InstObjParams("vcmped", "VcmpeD", "VfpRegRegOp",
+ { "code": vcmpeDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegRegOpDeclare.subst(vcmpeDIop);
+ decoder_output += VfpRegRegOpConstructor.subst(vcmpeDIop);
+ exec_output += PredOpExecute.subst(vcmpeDIop);
+
+ vcmpeZeroSCode = '''
+ vfpFlushToZero(Fpscr, FpDest);
+ FPSCR fpscr = Fpscr;
+ if (FpDest == imm) {
+ fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
+ } else if (FpDest < imm) {
+ fpscr.n = 1; fpscr.z = 0; fpscr.c = 0; fpscr.v = 0;
+ } else if (FpDest > imm) {
+ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 0;
+ } else {
+ fpscr.ioc = 1;
+ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ vcmpeZeroSIop = InstObjParams("vcmpeZeros", "VcmpeZeroS", "VfpRegImmOp",
+ { "code": vcmpeZeroSCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegImmOpDeclare.subst(vcmpeZeroSIop);
+ decoder_output += VfpRegImmOpConstructor.subst(vcmpeZeroSIop);
+ exec_output += PredOpExecute.subst(vcmpeZeroSIop);
+
+ vcmpeZeroDCode = '''
+ IntDoubleUnion cDest;
+ cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+ vfpFlushToZero(Fpscr, cDest.fp);
+ FPSCR fpscr = Fpscr;
+ if (cDest.fp == imm) {
+ fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
+ } else if (cDest.fp < imm) {
+ fpscr.n = 1; fpscr.z = 0; fpscr.c = 0; fpscr.v = 0;
+ } else if (cDest.fp > imm) {
+ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 0;
+ } else {
+ fpscr.ioc = 1;
+ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ vcmpeZeroDIop = InstObjParams("vcmpeZerod", "VcmpeZeroD", "VfpRegImmOp",
+ { "code": vcmpeZeroDCode,
+ "predicate_test": predicateTest }, [])
+ header_output += VfpRegImmOpDeclare.subst(vcmpeZeroDIop);
+ decoder_output += VfpRegImmOpConstructor.subst(vcmpeZeroDIop);
+ exec_output += PredOpExecute.subst(vcmpeZeroDIop);
}};
let {{