diff options
Diffstat (limited to 'src/arch/arm')
-rw-r--r-- | src/arch/arm/insts/vfp.cc | 13 | ||||
-rw-r--r-- | src/arch/arm/insts/vfp.hh | 5 | ||||
-rw-r--r-- | src/arch/arm/isa/formats/fp.isa | 42 | ||||
-rw-r--r-- | src/arch/arm/isa/insts/neon.isa | 80 |
4 files changed, 88 insertions, 52 deletions
diff --git a/src/arch/arm/insts/vfp.cc b/src/arch/arm/insts/vfp.cc index f72fba675..7056f8491 100644 --- a/src/arch/arm/insts/vfp.cc +++ b/src/arch/arm/insts/vfp.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2013 ARM Limited + * Copyright (c) 2010-2013, 2019 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -891,6 +891,17 @@ unsignedRecipEstimate(uint32_t op) } } +FPSCR +fpStandardFPSCRValue(const FPSCR &fpscr) +{ + FPSCR new_fpscr(0); + new_fpscr.ahp = fpscr.ahp; + new_fpscr.dn = 1; + new_fpscr.fz = 1; + new_fpscr.fz16 = fpscr.fz16; + return new_fpscr; +}; + template <class fpType> fpType FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index ac20643b8..d7a072408 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2013 ARM Limited + * Copyright (c) 2010-2013, 2019 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -447,6 +447,9 @@ uint32_t unsignedRSqrtEstimate(uint32_t op); float fpRecipEstimate(FPSCR &fpscr, float op); uint32_t unsignedRecipEstimate(uint32_t op); +FPSCR +fpStandardFPSCRValue(const FPSCR &fpscr); + class VfpMacroOp : public PredMacroOp { public: diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index 1bb6bc89d..e730833db 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -742,7 +742,23 @@ let {{ case 0xf: if (o1) { if (u) { - return new Unknown(machInst); + if (bits(size, 1) == 0) { + if (q) { + return new VmaxnmQFp<uint32_t>( + machInst, vd, vn, vm); + } else { + return new VmaxnmDFp<uint32_t>( + machInst, vd, vn, vm); + } + } else { + if (q) { + return new VminnmQFp<uint32_t>( + machInst, vd, vn, vm); + } else { + return new VminnmDFp<uint32_t>( + machInst, vd, vn, vm); + } + } } else { if (bits(size, 1) == 0) { if (q) { @@ -762,29 +778,37 @@ let {{ if (u) { if (bits(size, 1) == 0) { if (q) { - return new VpmaxQFp<float>(machInst, vd, vn, vm); + return new VpmaxQFp<uint32_t>( + machInst, vd, vn, vm); } else { - return new VpmaxDFp<float>(machInst, vd, vn, vm); + return new VpmaxDFp<uint32_t>( + machInst, vd, vn, vm); } } else { if (q) { - return new VpminQFp<float>(machInst, vd, vn, vm); + return new VpminQFp<uint32_t>( + machInst, vd, vn, vm); } else { - return new VpminDFp<float>(machInst, vd, vn, vm); + return new VpminDFp<uint32_t>( + machInst, vd, vn, vm); } } } else { if (bits(size, 1) == 0) { if (q) { - return new VmaxQFp<float>(machInst, vd, vn, vm); + return new VmaxQFp<uint32_t>( + machInst, vd, vn, vm); } else { - return new VmaxDFp<float>(machInst, vd, vn, vm); + return new VmaxDFp<uint32_t>( + machInst, vd, vn, vm); } } else { if (q) { - return new VminQFp<float>(machInst, vd, vn, vm); + return new VminQFp<uint32_t>( + machInst, vd, vn, vm); } else { - return new VminDFp<float>(machInst, vd, vn, vm); + return new VminDFp<uint32_t>( + machInst, vd, vn, vm); } } } diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa index 1e0c1164f..bfebd103d 100644 --- a/src/arch/arm/isa/insts/neon.isa +++ b/src/arch/arm/isa/insts/neon.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2011, 2015 ARM Limited +// Copyright (c) 2010-2011, 2015, 2019 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -1132,7 +1132,8 @@ let {{ allTypes = unsignedTypes + signedTypes def threeEqualRegInst(name, Name, opClass, types, rCount, op, - readDest=False, pairwise=False): + readDest=False, pairwise=False, + standardFpcsr=False): global header_output, exec_output eWalkCode = simdEnabledCheckCode + ''' RegVect srcReg1, srcReg2, destReg; @@ -1147,6 +1148,10 @@ let {{ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); ''' % { "reg" : reg } readDestCode = '' + if standardFpcsr: + eWalkCode += ''' + FPSCR fpscr = fpStandardFPSCRValue((FPSCR)FpscrExc); + ''' if readDest: readDestCode = 'destElem = gtoh(destReg.elements[i]);' if pairwise: @@ -1175,6 +1180,10 @@ let {{ destReg.elements[i] = htog(destElem); } ''' % { "op" : op, "readDest" : readDestCode } + if standardFpcsr: + eWalkCode += ''' + FpscrExc = fpscr; + ''' for reg in range(rCount): eWalkCode += ''' FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); @@ -2649,45 +2658,34 @@ let {{ threeEqualRegInst("vqrdmulh", "VqrdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) - vmaxfpCode = ''' - FPSCR fpscr = (FPSCR) FpscrExc; - bool done; - destReg = processNans(fpscr, done, true, srcReg1, srcReg2); - if (!done) { - destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>, - true, true, VfpRoundNearest); - } else if (flushToZero(srcReg1, srcReg2)) { - fpscr.idc = 1; - } - FpscrExc = fpscr; - ''' - threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode) - threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode) - - vminfpCode = ''' - FPSCR fpscr = (FPSCR) FpscrExc; - bool done; - destReg = processNans(fpscr, done, true, srcReg1, srcReg2); - if (!done) { - destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>, - true, true, VfpRoundNearest); - } else if (flushToZero(srcReg1, srcReg2)) { - fpscr.idc = 1; - } - FpscrExc = fpscr; - ''' - threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode) - threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode) - - threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",), - 2, vmaxfpCode, pairwise=True) - threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",), - 4, vmaxfpCode, pairwise=True) - - threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",), - 2, vminfpCode, pairwise=True) - threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",), - 4, vminfpCode, pairwise=True) + vMinMaxFpCode = ''' + destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr); + ''' + vMinMaxInsts = [ + ("vmax", "VmaxDFp", 2, "Max", False, ), + ("vmax", "VmaxQFp", 4, "Max", False, ), + ("vmaxnm", "VmaxnmDFp", 2, "MaxNum", False, ), + ("vmaxnm", "VmaxnmQFp", 4, "MaxNum", False, ), + ("vpmax", "VpmaxDFp", 2, "Max", True, ), + ("vpmax", "VpmaxQFp", 4, "Max", True, ), + ("vmin", "VminDFp", 2, "Min", False, ), + ("vmin", "VminQFp", 4, "Min", False, ), + ("vminnm", "VminnmDFp", 2, "MinNum", False, ), + ("vminnm", "VminnmQFp", 4, "MinNum", False, ), + ("vpmin", "VpminDFp", 2, "Min", True, ), + ("vpmin", "VpminQFp", 4, "Min", True, ), + ] + for name, Name, rCount, op, pairwise in vMinMaxInsts: + threeEqualRegInst( + name, + Name, + "SimdFloatCmpOp", + ("uint32_t",), + rCount, + vMinMaxFpCode % op, + pairwise=pairwise, + standardFpcsr=True, + ) vaddfpCode = ''' FPSCR fpscr = (FPSCR) FpscrExc; |