summaryrefslogtreecommitdiff
path: root/src/arch/arm/isa/insts/fp64.isa
diff options
context:
space:
mode:
authorARM gem5 Developers <none@none>2014-01-24 15:29:34 -0600
committerARM gem5 Developers <none@none>2014-01-24 15:29:34 -0600
commit612f8f074fa1099cf70faf495d46cc647762a031 (patch)
treebd1e99c43bf15292395eadd4b7ae3f5c823545c3 /src/arch/arm/isa/insts/fp64.isa
parentf3585c841e964c98911784a187fc4f081a02a0a6 (diff)
downloadgem5-612f8f074fa1099cf70faf495d46cc647762a031.tar.xz
arm: Add support for ARMv8 (AArch64 & AArch32)
Note: AArch64 and AArch32 interworking is not supported. If you use an AArch64 kernel you are restricted to AArch64 user-mode binaries. This will be addressed in a later patch. Note: Virtualization is only supported in AArch32 mode. This will also be fixed in a later patch. Contributors: Giacomo Gabrielli (TrustZone, LPAE, system-level AArch64, AArch64 NEON, validation) Thomas Grocutt (AArch32 Virtualization, AArch64 FP, validation) Mbou Eyole (AArch64 NEON, validation) Ali Saidi (AArch64 Linux support, code integration, validation) Edmund Grimley-Evans (AArch64 FP) William Wang (AArch64 Linux support) Rene De Jong (AArch64 Linux support, performance opt.) Matt Horsnell (AArch64 MP, validation) Matt Evans (device models, code integration, validation) Chris Adeniyi-Jones (AArch64 syscall-emulation) Prakash Ramrakhyani (validation) Dam Sunwoo (validation) Chander Sudanthi (validation) Stephan Diestelhorst (validation) Andreas Hansson (code integration, performance opt.) Eric Van Hensbergen (performance opt.) Gabe Black
Diffstat (limited to 'src/arch/arm/isa/insts/fp64.isa')
-rw-r--r--src/arch/arm/isa/insts/fp64.isa811
1 files changed, 811 insertions, 0 deletions
diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa
new file mode 100644
index 000000000..95dec5062
--- /dev/null
+++ b/src/arch/arm/isa/insts/fp64.isa
@@ -0,0 +1,811 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2012-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Thomas Grocutt
+// Edmund Grimley Evans
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ fmovImmSCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = bits(imm, 31, 0);
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp",
+ { "code": fmovImmSCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegImmOpDeclare.subst(fmovImmSIop);
+ decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop);
+ exec_output += BasicExecute.subst(fmovImmSIop);
+
+ fmovImmDCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = bits(imm, 31, 0);
+ AA64FpDestP1_uw = bits(imm, 63, 32);
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp",
+ { "code": fmovImmDCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegImmOpDeclare.subst(fmovImmDIop);
+ decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop);
+ exec_output += BasicExecute.subst(fmovImmDIop);
+
+ fmovRegSCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = AA64FpOp1P0_uw;
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp",
+ { "code": fmovRegSCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovRegSIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop);
+ exec_output += BasicExecute.subst(fmovRegSIop);
+
+ fmovRegDCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = AA64FpOp1P0_uw;
+ AA64FpDestP1_uw = AA64FpOp1P1_uw;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp",
+ { "code": fmovRegDCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovRegDIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop);
+ exec_output += BasicExecute.subst(fmovRegDIop);
+
+ fmovCoreRegWCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = WOp1_uw;
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp",
+ { "code": fmovCoreRegWCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop);
+ exec_output += BasicExecute.subst(fmovCoreRegWIop);
+
+ fmovCoreRegXCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = XOp1_ud;
+ AA64FpDestP1_uw = XOp1_ud >> 32;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp",
+ { "code": fmovCoreRegXCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop);
+ exec_output += BasicExecute.subst(fmovCoreRegXIop);
+
+ fmovUCoreRegXCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP2_uw = XOp1_ud;
+ AA64FpDestP3_uw = XOp1_ud >> 32;
+ '''
+ fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp",
+ { "code": fmovUCoreRegXCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop);
+ exec_output += BasicExecute.subst(fmovUCoreRegXIop);
+
+ fmovRegCoreWCode = vfp64EnabledCheckCode + '''
+ WDest = AA64FpOp1P0_uw;
+ '''
+ fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp",
+ { "code": fmovRegCoreWCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop);
+ exec_output += BasicExecute.subst(fmovRegCoreWIop);
+
+ fmovRegCoreXCode = vfp64EnabledCheckCode + '''
+ XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw;
+ '''
+ fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp",
+ { "code": fmovRegCoreXCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop);
+ exec_output += BasicExecute.subst(fmovRegCoreXIop);
+
+ fmovURegCoreXCode = vfp64EnabledCheckCode + '''
+ XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw;
+ '''
+ fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp",
+ { "code": fmovURegCoreXCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop);
+ exec_output += BasicExecute.subst(fmovURegCoreXIop);
+}};
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ singleIntConvCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint32_t cOp1 = AA64FpOp1P0_uw;
+ uint32_t cDest = %(op)s;
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ singleIntConvCode2 = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint32_t cOp1 = AA64FpOp1P0_uw;
+ uint32_t cOp2 = AA64FpOp2P0_uw;
+ uint32_t cDest = %(op)s;
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \
+ "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
+ singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)"
+
+ doubleIntConvCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
+ uint64_t cDest = %(op)s;
+ AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
+ AA64FpDestP1_uw = cDest >> 32;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ doubleIntConvCode2 = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
+ uint64_t cOp2 = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw;
+ uint64_t cDest = %(op)s;
+ AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
+ AA64FpDestP1_uw = cDest >> 32;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ doubleBinOp = '''
+ binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw),
+ dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw),
+ %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode);
+ '''
+ doubleUnaryOp = '''
+ unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s,
+ fpscr.fz, fpscr.rMode)
+ '''
+
+ def buildTernaryFpOp(name, opClass, sOp, dOp):
+ global header_output, decoder_output, exec_output
+ for isDouble in True, False:
+ code = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ '''
+ if isDouble:
+ code += '''
+ uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+ uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32;
+ uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32;
+ uint64_t cDest;
+ ''' "cDest = " + dOp + ";" + '''
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = cDest >> 32;
+ '''
+ else:
+ code += '''
+ uint32_t cOp1 = AA64FpOp1P0_uw;
+ uint32_t cOp2 = AA64FpOp2P0_uw;
+ uint32_t cOp3 = AA64FpOp3P0_uw;
+ uint32_t cDest;
+ ''' "cDest = " + sOp + ";" + '''
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = 0;
+ '''
+ code += '''
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"),
+ "FpRegRegRegRegOp",
+ { "code": code, "op_class": opClass }, [])
+
+ header_output += AA64FpRegRegRegRegOpDeclare.subst(iop)
+ decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp",
+ "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)",
+ "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" )
+ buildTernaryFpOp("FMSub", "SimdFloatMultAccOp",
+ "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
+ "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
+ buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp",
+ "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
+ "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
+ buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp",
+ "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
+ "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" )
+
+ def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp):
+ global header_output, decoder_output, exec_output
+
+ code = singleIntConvCode2 % { "op": singleOp }
+ sIop = InstObjParams(name, Name + "S", base,
+ { "code": code,
+ "op_class": opClass }, [])
+
+ code = doubleIntConvCode2 % { "op": doubleOp }
+ dIop = InstObjParams(name, Name + "D", base,
+ { "code": code,
+ "op_class": opClass }, [])
+
+ declareTempl = eval( base + "Declare");
+ constructorTempl = eval("AA64" + base + "Constructor");
+
+ for iop in sIop, dIop:
+ header_output += declareTempl.subst(iop)
+ decoder_output += constructorTempl.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp",
+ "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp",
+ "fplibSub<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibSub<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp",
+ "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp",
+ "fplibMul<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibMul<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp",
+ "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
+ "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))")
+ buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp",
+ "fplibMin<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibMin<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp",
+ "fplibMax<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibMax<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp",
+ "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp",
+ "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)")
+
+ def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None):
+ if doubleOp is None:
+ doubleOp = singleOp
+ global header_output, decoder_output, exec_output
+
+ code = singleIntConvCode % { "op": singleOp }
+ sIop = InstObjParams(name, Name + "S", base,
+ { "code": code,
+ "op_class": opClass }, [])
+ code = doubleIntConvCode % { "op": doubleOp }
+ dIop = InstObjParams(name, Name + "D", base,
+ { "code": code,
+ "op_class": opClass }, [])
+
+ declareTempl = eval( base + "Declare");
+ constructorTempl = eval("AA64" + base + "Constructor");
+
+ for iop in sIop, dIop:
+ header_output += declareTempl.subst(iop)
+ decoder_output += constructorTempl.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp",
+ "fplibSqrt<uint32_t>(cOp1, fpscr)", "fplibSqrt<uint64_t>(cOp1, fpscr)")
+
+ def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp,
+ doubleOp = None, isIntConv = True):
+ if doubleOp is None:
+ doubleOp = singleOp
+ global header_output, decoder_output, exec_output
+
+ if isIntConv:
+ sCode = singleIntConvCode
+ dCode = doubleIntConvCode
+ else:
+ sCode = singleCode
+ dCode = doubleCode
+
+ for code, op, suffix in [[sCode, singleOp, "S"],
+ [dCode, doubleOp, "D"]]:
+ iop = InstObjParams(name, Name + suffix, base,
+ { "code": code % { "op": op },
+ "op_class": opClass }, [])
+
+ declareTempl = eval( base + "Declare");
+ constructorTempl = eval("AA64" + base + "Constructor");
+
+ header_output += declareTempl.subst(iop)
+ decoder_output += constructorTempl.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibNeg<uint32_t>(cOp1)", "fplibNeg<uint64_t>(cOp1)")
+ buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibAbs<uint32_t>(cOp1)", "fplibAbs<uint64_t>(cOp1)")
+ buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)")
+ buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)")
+ buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)")
+ buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)")
+ buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)")
+ buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)")
+ buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)")
+}};
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ # Creates the integer to floating point instructions, including variants for
+ # signed/unsigned, float/double, etc
+ for regL, regOpL, width in [["W", "w", 32],
+ ["X", "d", 64]]:
+ for isDouble in True, False:
+ for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)],
+ ["S", "int%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]:
+ fcvtIntFpDCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ %s
+ ''' %(usCode)
+
+ if isDouble:
+ fcvtIntFpDCode += '''
+ uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0,
+ %s, FPCRRounding(fpscr), fpscr);
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = cDest >> 32;
+ ''' % ("true" if us == "U" else "false")
+ else:
+ fcvtIntFpDCode += '''
+ uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0,
+ %s, FPCRRounding(fpscr), fpscr);
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = 0;
+ ''' % ("true" if us == "U" else "false")
+ fcvtIntFpDCode += '''
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S")
+ mnem = "%scvtf" %(us.lower())
+ fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp",
+ { "code": fcvtIntFpDCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop);
+ exec_output += BasicExecute.subst(fcvtIntFpDIop);
+
+ # Generates the floating point to integer conversion instructions in various
+ # variants, eg signed/unsigned
+ def buildFpCvtIntOp(isDouble, isSigned, isXReg):
+ global header_output, decoder_output, exec_output
+
+ for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"],
+ ["P", "FPRounding_POSINF"],
+ ["M", "FPRounding_NEGINF"],
+ ["Z", "FPRounding_ZERO"],
+ ["A", "FPRounding_TIEAWAY"]]:
+ fcvtFpIntCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;'''
+ if isDouble:
+ fcvtFpIntCode += '''
+ uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+ '''
+ else:
+ fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
+
+ fcvtFpIntCode += '''
+ %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr);
+ FpscrExc = fpscr;
+ ''' %("X" if isXReg else "W",
+ "64" if isDouble else "32",
+ "64" if isXReg else "32",
+ "false" if isSigned else "true",
+ roundingMode)
+
+ instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U",
+ "X" if isXReg else "W",
+ "D" if isDouble else "S", rmode)
+ mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u")
+ fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp",
+ { "code": fcvtFpIntCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop);
+ decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop);
+ exec_output += BasicExecute.subst(fcvtFpIntIop);
+
+ # Now actually do the building with the different variants
+ for isDouble in True, False:
+ for isSigned in True, False:
+ for isXReg in True, False:
+ buildFpCvtIntOp(isDouble, isSigned, isXReg)
+
+ fcvtFpSFpDCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw,
+ FPCRRounding(fpscr), fpscr);
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = cDest >> 32;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+ fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp",
+ { "code": fcvtFpSFpDCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop);
+ exec_output += BasicExecute.subst(fcvtFpSFpDIop);
+
+ fcvtFpDFpSCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+ AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1,
+ FPCRRounding(fpscr), fpscr);
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+ fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp",
+ {"code": fcvtFpDFpSCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop);
+ exec_output += BasicExecute.subst(fcvtFpDFpSIop);
+
+ # Half precision to single or double precision conversion
+ for isDouble in True, False:
+ code = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ %s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw,
+ FPCRRounding(fpscr), fpscr);
+ ''' % ("uint64_t" if isDouble else "uint32_t",
+ "64" if isDouble else "32")
+ if isDouble:
+ code += '''
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = cDest >> 32;
+ '''
+ else:
+ code += '''
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = 0;
+ '''
+ code += '''
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ instName = "FcvtFpHFp%s" %("D" if isDouble else "S")
+ fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp",
+ { "code": code,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop);
+ exec_output += BasicExecute.subst(fcvtFpHFpIop);
+
+ # single or double precision to Half precision conversion
+ for isDouble in True, False:
+ code = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ %s;
+ AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1,
+ FPCRRounding(fpscr), fpscr);
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
+ if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw",
+ "64" if isDouble else "32")
+
+ instName = "FcvtFp%sFpH" %("D" if isDouble else "S")
+ fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp",
+ { "code": code,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop);
+ exec_output += BasicExecute.subst(fcvtFpFpHIop);
+
+ # Build the various versions of the floating point compare instructions
+ def buildFCmpOp(isQuiet, isDouble, isImm):
+ global header_output, decoder_output, exec_output
+
+ fcmpCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ %s cOp1 = %s;
+ ''' % ("uint64_t" if isDouble else "uint32_t",
+ "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32"
+ if isDouble else "AA64FpDestP0_uw")
+ if isImm:
+ fcmpCode += '''
+ %s cOp2 = imm;
+ ''' % ("uint64_t" if isDouble else "uint32_t")
+ else:
+ fcmpCode += '''
+ %s cOp2 = %s;
+ ''' % ("uint64_t" if isDouble else "uint32_t",
+ "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
+ if isDouble else "AA64FpOp1P0_uw")
+ fcmpCode += '''
+ int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
+ CondCodesNZ = cc >> 2 & 3;
+ CondCodesC = cc >> 1 & 1;
+ CondCodesV = cc & 1;
+ FpCondCodes = fpscr & FpCondCodesMask;
+ FpscrExc = fpscr;
+ ''' % ("64" if isDouble else "32", "false" if isQuiet else "true")
+
+ typeName = "Imm" if isImm else "Reg"
+ instName = "FCmp%s%s%s" %("" if isQuiet else "E", typeName,
+ "D" if isDouble else "S")
+ fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName,
+ "FpReg%sOp" %(typeName),
+ {"code": fcmpCode,
+ "op_class": "SimdFloatCmpOp"}, [])
+
+ declareTemp = eval("FpReg%sOpDeclare" %(typeName));
+ constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName));
+ header_output += declareTemp.subst(fcmpIop);
+ decoder_output += constructorTemp.subst(fcmpIop);
+ exec_output += BasicExecute.subst(fcmpIop);
+
+ for isQuiet in True, False:
+ for isDouble in True, False:
+ for isImm in True, False:
+ buildFCmpOp(isQuiet, isDouble, isImm)
+
+ # Build the various versions of the conditional floating point compare
+ # instructions
+ def buildFCCmpOp(isQuiet, isDouble):
+ global header_output, decoder_output, exec_output
+
+ fccmpCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
+ %s cOp1 = %s;
+ %s cOp2 = %s;
+ int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
+ CondCodesNZ = cc >> 2 & 3;
+ CondCodesC = cc >> 1 & 1;
+ CondCodesV = cc & 1;
+ } else {
+ CondCodesNZ = (defCc >> 2) & 0x3;
+ CondCodesC = (defCc >> 1) & 0x1;
+ CondCodesV = defCc & 0x1;
+ }
+ FpCondCodes = fpscr & FpCondCodesMask;
+ FpscrExc = fpscr;
+ ''' % ("uint64_t" if isDouble else "uint32_t",
+ "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
+ if isDouble else "AA64FpOp1P0_uw",
+ "uint64_t" if isDouble else "uint32_t",
+ "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32"
+ if isDouble else "AA64FpOp2P0_uw",
+ "64" if isDouble else "32", "false" if isQuiet else "true")
+
+ instName = "FCCmp%sReg%s" %("" if isQuiet else "E",
+ "D" if isDouble else "S")
+ fccmpIop = InstObjParams("fccmp%s" %("" if isQuiet else "e"),
+ instName, "FpCondCompRegOp",
+ {"code": fccmpCode,
+ "op_class": "SimdFloatCmpOp"}, [])
+ header_output += DataXCondCompRegDeclare.subst(fccmpIop);
+ decoder_output += DataXCondCompRegConstructor.subst(fccmpIop);
+ exec_output += BasicExecute.subst(fccmpIop);
+
+ for isQuiet in True, False:
+ for isDouble in True, False:
+ buildFCCmpOp(isQuiet, isDouble)
+
+}};
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ # Generates the variants of the floating to fixed point instructions
+ def buildFpCvtFixedOp(isSigned, isDouble, isXReg):
+ global header_output, decoder_output, exec_output
+
+ fcvtFpFixedCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ '''
+ if isDouble:
+ fcvtFpFixedCode += '''
+ uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+ '''
+ else:
+ fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
+ fcvtFpFixedCode += '''
+ %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s,
+ FPRounding_ZERO, fpscr);
+ FpscrExc = fpscr;
+ ''' %("X" if isXReg else "W",
+ "64" if isDouble else "32",
+ "64" if isXReg else "32",
+ "false" if isSigned else "true")
+
+ instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U",
+ "D" if isDouble else "S",
+ "X" if isXReg else "W")
+ mnem = "fcvtz%s" %("s" if isSigned else "u")
+ fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
+ { "code": fcvtFpFixedCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop);
+ decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop);
+ exec_output += BasicExecute.subst(fcvtFpFixedIop);
+
+ # Generates the variants of the fixed to floating point instructions
+ def buildFixedCvtFpOp(isSigned, isDouble, isXReg):
+ global header_output, decoder_output, exec_output
+
+ srcRegType = "X" if isXReg else "W"
+ fcvtFixedFpCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ %s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm,
+ %s, FPCRRounding(fpscr), fpscr);
+ ''' %("uint64_t" if isDouble else "uint32_t",
+ "64" if isDouble else "32",
+ "int" if isSigned else "uint", "64" if isXReg else "32",
+ srcRegType,
+ "false" if isSigned else "true")
+ if isDouble:
+ fcvtFixedFpCode += '''
+ AA64FpDestP0_uw = result;
+ AA64FpDestP1_uw = result >> 32;
+ '''
+ else:
+ fcvtFixedFpCode += '''
+ AA64FpDestP0_uw = result;
+ AA64FpDestP1_uw = 0;
+ '''
+ fcvtFixedFpCode += '''
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U",
+ "D" if isDouble else "S",
+ srcRegType)
+ mnem = "%scvtf" %("s" if isSigned else "u")
+ fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
+ { "code": fcvtFixedFpCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop);
+ decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop);
+ exec_output += BasicExecute.subst(fcvtFixedFpIop);
+
+ # loop over the variants building the instructions for each
+ for isXReg in True, False:
+ for isDouble in True, False:
+ for isSigned in True, False:
+ buildFpCvtFixedOp(isSigned, isDouble, isXReg)
+ buildFixedCvtFpOp(isSigned, isDouble, isXReg)
+}};
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ for isDouble in True, False:
+ code = '''
+ if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
+ AA64FpDestP0_uw = AA64FpOp1P0_uw;
+ '''
+ if isDouble:
+ code += '''
+ AA64FpDestP1_uw = AA64FpOp1P1_uw;
+ } else {
+ AA64FpDestP0_uw = AA64FpOp2P0_uw;
+ AA64FpDestP1_uw = AA64FpOp2P1_uw;
+ }
+ '''
+ else:
+ code += '''
+ } else {
+ AA64FpDestP0_uw = AA64FpOp2P0_uw;
+ }
+ AA64FpDestP1_uw = 0;
+ '''
+ code += '''
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+
+ iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"),
+ "FpCondSelOp", code)
+ header_output += DataXCondSelDeclare.subst(iop)
+ decoder_output += DataXCondSelConstructor.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+}};