summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/arch/arm/insts/vfp.cc13
-rw-r--r--src/arch/arm/insts/vfp.hh5
-rw-r--r--src/arch/arm/isa/formats/fp.isa42
-rw-r--r--src/arch/arm/isa/insts/neon.isa80
4 files changed, 88 insertions, 52 deletions
diff --git a/src/arch/arm/insts/vfp.cc b/src/arch/arm/insts/vfp.cc
index f72fba675..7056f8491 100644
--- a/src/arch/arm/insts/vfp.cc
+++ b/src/arch/arm/insts/vfp.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2013 ARM Limited
+ * Copyright (c) 2010-2013, 2019 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -891,6 +891,17 @@ unsignedRecipEstimate(uint32_t op)
}
}
+FPSCR
+fpStandardFPSCRValue(const FPSCR &fpscr)
+{
+ FPSCR new_fpscr(0);
+ new_fpscr.ahp = fpscr.ahp;
+ new_fpscr.dn = 1;
+ new_fpscr.fz = 1;
+ new_fpscr.fz16 = fpscr.fz16;
+ return new_fpscr;
+};
+
template <class fpType>
fpType
FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh
index ac20643b8..d7a072408 100644
--- a/src/arch/arm/insts/vfp.hh
+++ b/src/arch/arm/insts/vfp.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2013 ARM Limited
+ * Copyright (c) 2010-2013, 2019 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -447,6 +447,9 @@ uint32_t unsignedRSqrtEstimate(uint32_t op);
float fpRecipEstimate(FPSCR &fpscr, float op);
uint32_t unsignedRecipEstimate(uint32_t op);
+FPSCR
+fpStandardFPSCRValue(const FPSCR &fpscr);
+
class VfpMacroOp : public PredMacroOp
{
public:
diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa
index 1bb6bc89d..e730833db 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -742,7 +742,23 @@ let {{
case 0xf:
if (o1) {
if (u) {
- return new Unknown(machInst);
+ if (bits(size, 1) == 0) {
+ if (q) {
+ return new VmaxnmQFp<uint32_t>(
+ machInst, vd, vn, vm);
+ } else {
+ return new VmaxnmDFp<uint32_t>(
+ machInst, vd, vn, vm);
+ }
+ } else {
+ if (q) {
+ return new VminnmQFp<uint32_t>(
+ machInst, vd, vn, vm);
+ } else {
+ return new VminnmDFp<uint32_t>(
+ machInst, vd, vn, vm);
+ }
+ }
} else {
if (bits(size, 1) == 0) {
if (q) {
@@ -762,29 +778,37 @@ let {{
if (u) {
if (bits(size, 1) == 0) {
if (q) {
- return new VpmaxQFp<float>(machInst, vd, vn, vm);
+ return new VpmaxQFp<uint32_t>(
+ machInst, vd, vn, vm);
} else {
- return new VpmaxDFp<float>(machInst, vd, vn, vm);
+ return new VpmaxDFp<uint32_t>(
+ machInst, vd, vn, vm);
}
} else {
if (q) {
- return new VpminQFp<float>(machInst, vd, vn, vm);
+ return new VpminQFp<uint32_t>(
+ machInst, vd, vn, vm);
} else {
- return new VpminDFp<float>(machInst, vd, vn, vm);
+ return new VpminDFp<uint32_t>(
+ machInst, vd, vn, vm);
}
}
} else {
if (bits(size, 1) == 0) {
if (q) {
- return new VmaxQFp<float>(machInst, vd, vn, vm);
+ return new VmaxQFp<uint32_t>(
+ machInst, vd, vn, vm);
} else {
- return new VmaxDFp<float>(machInst, vd, vn, vm);
+ return new VmaxDFp<uint32_t>(
+ machInst, vd, vn, vm);
}
} else {
if (q) {
- return new VminQFp<float>(machInst, vd, vn, vm);
+ return new VminQFp<uint32_t>(
+ machInst, vd, vn, vm);
} else {
- return new VminDFp<float>(machInst, vd, vn, vm);
+ return new VminDFp<uint32_t>(
+ machInst, vd, vn, vm);
}
}
}
diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa
index 1e0c1164f..bfebd103d 100644
--- a/src/arch/arm/isa/insts/neon.isa
+++ b/src/arch/arm/isa/insts/neon.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010-2011, 2015 ARM Limited
+// Copyright (c) 2010-2011, 2015, 2019 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -1132,7 +1132,8 @@ let {{
allTypes = unsignedTypes + signedTypes
def threeEqualRegInst(name, Name, opClass, types, rCount, op,
- readDest=False, pairwise=False):
+ readDest=False, pairwise=False,
+ standardFpcsr=False):
global header_output, exec_output
eWalkCode = simdEnabledCheckCode + '''
RegVect srcReg1, srcReg2, destReg;
@@ -1147,6 +1148,10 @@ let {{
destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
''' % { "reg" : reg }
readDestCode = ''
+ if standardFpcsr:
+ eWalkCode += '''
+ FPSCR fpscr = fpStandardFPSCRValue((FPSCR)FpscrExc);
+ '''
if readDest:
readDestCode = 'destElem = gtoh(destReg.elements[i]);'
if pairwise:
@@ -1175,6 +1180,10 @@ let {{
destReg.elements[i] = htog(destElem);
}
''' % { "op" : op, "readDest" : readDestCode }
+ if standardFpcsr:
+ eWalkCode += '''
+ FpscrExc = fpscr;
+ '''
for reg in range(rCount):
eWalkCode += '''
FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
@@ -2649,45 +2658,34 @@ let {{
threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
"SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
- vmaxfpCode = '''
- FPSCR fpscr = (FPSCR) FpscrExc;
- bool done;
- destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
- if (!done) {
- destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
- true, true, VfpRoundNearest);
- } else if (flushToZero(srcReg1, srcReg2)) {
- fpscr.idc = 1;
- }
- FpscrExc = fpscr;
- '''
- threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
- threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
-
- vminfpCode = '''
- FPSCR fpscr = (FPSCR) FpscrExc;
- bool done;
- destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
- if (!done) {
- destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
- true, true, VfpRoundNearest);
- } else if (flushToZero(srcReg1, srcReg2)) {
- fpscr.idc = 1;
- }
- FpscrExc = fpscr;
- '''
- threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
- threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
-
- threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
- 2, vmaxfpCode, pairwise=True)
- threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
- 4, vmaxfpCode, pairwise=True)
-
- threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
- 2, vminfpCode, pairwise=True)
- threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
- 4, vminfpCode, pairwise=True)
+ vMinMaxFpCode = '''
+ destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr);
+ '''
+ vMinMaxInsts = [
+ ("vmax", "VmaxDFp", 2, "Max", False, ),
+ ("vmax", "VmaxQFp", 4, "Max", False, ),
+ ("vmaxnm", "VmaxnmDFp", 2, "MaxNum", False, ),
+ ("vmaxnm", "VmaxnmQFp", 4, "MaxNum", False, ),
+ ("vpmax", "VpmaxDFp", 2, "Max", True, ),
+ ("vpmax", "VpmaxQFp", 4, "Max", True, ),
+ ("vmin", "VminDFp", 2, "Min", False, ),
+ ("vmin", "VminQFp", 4, "Min", False, ),
+ ("vminnm", "VminnmDFp", 2, "MinNum", False, ),
+ ("vminnm", "VminnmQFp", 4, "MinNum", False, ),
+ ("vpmin", "VpminDFp", 2, "Min", True, ),
+ ("vpmin", "VpminQFp", 4, "Min", True, ),
+ ]
+ for name, Name, rCount, op, pairwise in vMinMaxInsts:
+ threeEqualRegInst(
+ name,
+ Name,
+ "SimdFloatCmpOp",
+ ("uint32_t",),
+ rCount,
+ vMinMaxFpCode % op,
+ pairwise=pairwise,
+ standardFpcsr=True,
+ )
vaddfpCode = '''
FPSCR fpscr = (FPSCR) FpscrExc;