From 352d666fa1e9b5ae960127c95d19cf63c8ff0df7 Mon Sep 17 00:00:00 2001 From: Edmund Grimley Evans Date: Thu, 28 Jun 2018 14:32:01 +0100 Subject: arch-arm: Add FP16 support introduced by Armv8.2-A This changeset adds support for FP/SIMD instructions with half-precision floating-point operands. Change-Id: I4957f111c9c5e5d6a3747fe9d169d394d642fee8 Signed-off-by: Giacomo Gabrielli Reviewed-on: https://gem5-review.googlesource.com/13084 Reviewed-by: Andreas Sandberg Maintainer: Andreas Sandberg --- src/arch/arm/isa/insts/fp64.isa | 162 ++++++++++++++++++++++++++++---------- src/arch/arm/isa/insts/neon64.isa | 4 +- 2 files changed, 123 insertions(+), 43 deletions(-) (limited to 'src/arch/arm/isa/insts') diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa index a5e1085de..6c0c6b808 100644 --- a/src/arch/arm/isa/insts/fp64.isa +++ b/src/arch/arm/isa/insts/fp64.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2012-2013, 2016 ARM Limited +// Copyright (c) 2012-2013, 2016-2018 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -172,6 +172,34 @@ let {{ decoder_output = "" exec_output = "" + halfIntConvCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint16_t cOp1 = AA64FpOp1P0_uw; + uint16_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + halfIntConvCode2 = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint16_t cOp1 = AA64FpOp1P0_uw; + uint16_t cOp2 = AA64FpOp2P0_uw; + uint16_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + halfBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \ + "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" + halfUnaryOp = "unaryOp(fpscr, AA64FpOp1P0," + \ + "%(func)s, fpscr.fz, fpscr.rMode)" + singleIntConvCode = vfp64EnabledCheckCode + ''' FPSCR fpscr = (FPSCR) FpscrExc; uint32_t cOp1 = AA64FpOp1P0_uw; @@ -232,23 +260,23 @@ let {{ fpscr.fz, fpscr.rMode) ''' - def buildTernaryFpOp(name, opClass, sOp, dOp): + def buildTernaryFpOp(name, opClass, hOp, sOp, dOp): global header_output, decoder_output, exec_output - for isDouble in True, False: + for suffix in "D", "S", "H": code = vfp64EnabledCheckCode + ''' FPSCR fpscr = (FPSCR) FpscrExc; ''' - if isDouble: + if suffix == "H": code += ''' - uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; - uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32; - uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32; - uint64_t cDest; - ''' "cDest = " + dOp + ";" + ''' + uint16_t cOp1 = AA64FpOp1P0_uw; + uint16_t cOp2 = AA64FpOp2P0_uw; + uint16_t cOp3 = AA64FpOp3P0_uw; + uint16_t cDest; + ''' "cDest = " + hOp + ";" + ''' AA64FpDestP0_uw = cDest; - AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP1_uw = 0; ''' - else: + elif suffix == "S": code += ''' uint32_t cOp1 = AA64FpOp1P0_uw; uint32_t cOp2 = AA64FpOp2P0_uw; @@ -258,13 +286,23 @@ let {{ AA64FpDestP0_uw = cDest; AA64FpDestP1_uw = 0; ''' + elif suffix == "D": + code += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32; + uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32; + uint64_t cDest; + ''' "cDest = " + dOp + ";" + ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' code += ''' AA64FpDestP2_uw = 0; AA64FpDestP3_uw = 0; FpscrExc = fpscr; ''' - iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"), + iop = InstObjParams(name.lower(), name + suffix, "FpRegRegRegRegOp", { "code": code, "op_class": opClass }, []) @@ -273,21 +311,33 @@ let {{ exec_output += BasicExecute.subst(iop) buildTernaryFpOp("FMAdd", "FloatMultAccOp", + "fplibMulAdd(cOp3, cOp1, cOp2, fpscr)", "fplibMulAdd(cOp3, cOp1, cOp2, fpscr)", "fplibMulAdd(cOp3, cOp1, cOp2, fpscr)" ) buildTernaryFpOp("FMSub", "FloatMultAccOp", - "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)", - "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)" ) + "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)", + "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)", + "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)" ) buildTernaryFpOp("FNMAdd", "FloatMultAccOp", - "fplibMulAdd(fplibNeg(cOp3), fplibNeg(cOp1), cOp2, fpscr)", - "fplibMulAdd(fplibNeg(cOp3), fplibNeg(cOp1), cOp2, fpscr)" ) + "fplibMulAdd(fplibNeg(cOp3), " + + "fplibNeg(cOp1), cOp2, fpscr)", + "fplibMulAdd(fplibNeg(cOp3), " + + "fplibNeg(cOp1), cOp2, fpscr)", + "fplibMulAdd(fplibNeg(cOp3), " + + "fplibNeg(cOp1), cOp2, fpscr)" ) buildTernaryFpOp("FNMSub", "FloatMultAccOp", - "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)", - "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)" ) + "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)", + "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)", + "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)" ) - def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp): + def buildBinFpOp(name, Name, base, opClass, halfOp, singleOp, doubleOp): global header_output, decoder_output, exec_output + code = halfIntConvCode2 % { "op": halfOp } + hIop = InstObjParams(name, Name + "H", base, + { "code": code, + "op_class": opClass }, []) + code = singleIntConvCode2 % { "op": singleOp } sIop = InstObjParams(name, Name + "S", base, { "code": code, @@ -301,44 +351,58 @@ let {{ declareTempl = eval( base + "Declare"); constructorTempl = eval("AA64" + base + "Constructor"); - for iop in sIop, dIop: + for iop in hIop, sIop, dIop: header_output += declareTempl.subst(iop) decoder_output += constructorTempl.subst(iop) exec_output += BasicExecute.subst(iop) buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "FloatAddOp", + "fplibAdd(cOp1, cOp2, fpscr)", "fplibAdd(cOp1, cOp2, fpscr)", "fplibAdd(cOp1, cOp2, fpscr)") buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "FloatAddOp", + "fplibSub(cOp1, cOp2, fpscr)", "fplibSub(cOp1, cOp2, fpscr)", "fplibSub(cOp1, cOp2, fpscr)") buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "FloatDivOp", + "fplibDiv(cOp1, cOp2, fpscr)", "fplibDiv(cOp1, cOp2, fpscr)", "fplibDiv(cOp1, cOp2, fpscr)") buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "FloatMultOp", + "fplibMul(cOp1, cOp2, fpscr)", "fplibMul(cOp1, cOp2, fpscr)", "fplibMul(cOp1, cOp2, fpscr)") buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "FloatMultOp", + "fplibNeg(fplibMul(cOp1, cOp2, fpscr))", "fplibNeg(fplibMul(cOp1, cOp2, fpscr))", "fplibNeg(fplibMul(cOp1, cOp2, fpscr))") buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "FloatCmpOp", + "fplibMin(cOp1, cOp2, fpscr)", "fplibMin(cOp1, cOp2, fpscr)", "fplibMin(cOp1, cOp2, fpscr)") buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "FloatCmpOp", + "fplibMax(cOp1, cOp2, fpscr)", "fplibMax(cOp1, cOp2, fpscr)", "fplibMax(cOp1, cOp2, fpscr)") buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "FloatCmpOp", + "fplibMinNum(cOp1, cOp2, fpscr)", "fplibMinNum(cOp1, cOp2, fpscr)", "fplibMinNum(cOp1, cOp2, fpscr)") buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "FloatCmpOp", + "fplibMaxNum(cOp1, cOp2, fpscr)", "fplibMaxNum(cOp1, cOp2, fpscr)", "fplibMaxNum(cOp1, cOp2, fpscr)") - def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None): + def buildUnaryFpOp(name, Name, base, opClass, + halfOp, singleOp, doubleOp = None): if doubleOp is None: doubleOp = singleOp global header_output, decoder_output, exec_output + code = halfIntConvCode % { "op": halfOp } + hIop = InstObjParams(name, Name + "H", base, + { "code": code, + "op_class": opClass }, []) code = singleIntConvCode % { "op": singleOp } sIop = InstObjParams(name, Name + "S", base, { "code": code, @@ -351,28 +415,33 @@ let {{ declareTempl = eval( base + "Declare"); constructorTempl = eval("AA64" + base + "Constructor"); - for iop in sIop, dIop: + for iop in hIop, sIop, dIop: header_output += declareTempl.subst(iop) decoder_output += constructorTempl.subst(iop) exec_output += BasicExecute.subst(iop) buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "FloatSqrtOp", - "fplibSqrt(cOp1, fpscr)", "fplibSqrt(cOp1, fpscr)") + "fplibSqrt(cOp1, fpscr)", + "fplibSqrt(cOp1, fpscr)", + "fplibSqrt(cOp1, fpscr)") - def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp, + def buildSimpleUnaryFpOp(name, Name, base, opClass, halfOp, singleOp, doubleOp = None, isIntConv = True): if doubleOp is None: doubleOp = singleOp global header_output, decoder_output, exec_output if isIntConv: + hCode = halfIntConvCode sCode = singleIntConvCode dCode = doubleIntConvCode else: + hCode = halfCode sCode = singleCode dCode = doubleCode - for code, op, suffix in [[sCode, singleOp, "S"], + for code, op, suffix in [[hCode, halfOp, "H"], + [sCode, singleOp, "S"], [dCode, doubleOp, "D"]]: iop = InstObjParams(name, Name + suffix, base, { "code": code % { "op": op }, @@ -386,30 +455,41 @@ let {{ exec_output += BasicExecute.subst(iop) buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "FloatMiscOp", - "fplibNeg(cOp1)", "fplibNeg(cOp1)") + "fplibNeg(cOp1)", + "fplibNeg(cOp1)", + "fplibNeg(cOp1)") buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "FloatMiscOp", - "fplibAbs(cOp1)", "fplibAbs(cOp1)") + "fplibAbs(cOp1)", + "fplibAbs(cOp1)", + "fplibAbs(cOp1)") buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)", - "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)") + "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)") buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)", - "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)") + "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)") buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)", - "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)") + "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)") buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)", - "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)") + "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)") buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)", - "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)") + "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)") buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)", - "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)") + "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)") buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)", - "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)") + "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)") }}; let {{ diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa index 4897e7c91..eb130dbbd 100644 --- a/src/arch/arm/isa/insts/neon64.isa +++ b/src/arch/arm/isa/insts/neon64.isa @@ -1,6 +1,6 @@ // -*- mode: c++ -*- -// Copyright (c) 2012-2013, 2015-2016 ARM Limited +// Copyright (c) 2012-2013, 2015-2018 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -45,7 +45,7 @@ let {{ decoders = { 'Generic' : {} } # FP types (FP operations always work with unsigned representations) - floatTypes = ("uint32_t", "uint64_t") + floatTypes = ("uint16_t", "uint32_t", "uint64_t") smallFloatTypes = ("uint32_t",) def threeEqualRegInstX(name, Name, opClass, types, rCount, op, -- cgit v1.2.3