From 3111a6216924643c332cd8ae3ebdd66fcbbf2c0f Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 2 Jun 2010 12:58:14 -0500 Subject: ARM: Implement the VFP versions of VMLA and VMLS. --- src/arch/arm/isa/formats/fp.isa | 42 ++++++++++++++++++++++++- src/arch/arm/isa/insts/fp.isa | 70 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) (limited to 'src/arch') diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index 2cca96bea..9bb062a2e 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -483,7 +483,47 @@ let {{ //const uint32_t opc4 = bits(machInst, 3, 0); switch (opc1 & 0xb /* 1011 */) { case 0x0: - return new WarnUnimplemented("vmla, vmls", machInst); + if (bits(machInst, 6) == 0) { + uint32_t vd; + uint32_t vm; + uint32_t vn; + if (bits(machInst, 8) == 0) { + vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1); + vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1); + vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1); + return new VmlaS(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } else { + vd = (bits(machInst, 22) << 5) | + (bits(machInst, 15, 12) << 1); + vm = (bits(machInst, 5) << 5) | + (bits(machInst, 3, 0) << 1); + vn = (bits(machInst, 7) << 5) | + (bits(machInst, 19, 16) << 1); + return new VmlaD(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } + } else { + uint32_t vd; + uint32_t vm; + uint32_t vn; + if (bits(machInst, 8) == 0) { + vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1); + vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1); + vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1); + return new VmlsS(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } else { + vd = (bits(machInst, 22) << 5) | + (bits(machInst, 15, 12) << 1); + vm = (bits(machInst, 5) << 5) | + (bits(machInst, 3, 0) << 1); + vn = (bits(machInst, 7) << 5) | + (bits(machInst, 19, 16) << 1); + return new VmlsD(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } + } case 0x2: if ((opc3 & 0x1) == 0) { uint32_t vd; diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index 99efcec32..58c2cafa7 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -411,4 +411,74 @@ let {{ header_output += RegRegOpDeclare.subst(vsqrtDIop); decoder_output += RegRegOpConstructor.subst(vsqrtDIop); exec_output += PredOpExecute.subst(vsqrtDIop); + + vmlaSCode = ''' + float mid = FpOp1 * FpOp2; + if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { + mid = NAN; + } + FpDest = FpDest + mid; + ''' + vmlaSIop = InstObjParams("vmlas", "VmlaS", "RegRegRegOp", + { "code": vmlaSCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vmlaSIop); + decoder_output += RegRegRegOpConstructor.subst(vmlaSIop); + exec_output += PredOpExecute.subst(vmlaSIop); + + vmlaDCode = ''' + IntDoubleUnion cOp1, cOp2, cDest; + cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); + cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); + double mid = cOp1.fp * cOp2.fp; + if ((isinf(cOp1.fp) && cOp2.fp == 0) || + (isinf(cOp2.fp) && cOp1.fp == 0)) { + mid = NAN; + } + cDest.fp = cDest.fp + mid; + FpDestP0.uw = cDest.bits; + FpDestP1.uw = cDest.bits >> 32; + ''' + vmlaDIop = InstObjParams("vmlad", "VmlaD", "RegRegRegOp", + { "code": vmlaDCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vmlaDIop); + decoder_output += RegRegRegOpConstructor.subst(vmlaDIop); + exec_output += PredOpExecute.subst(vmlaDIop); + + vmlsSCode = ''' + float mid = FpOp1 * FpOp2; + if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { + mid = NAN; + } + FpDest = FpDest - mid; + ''' + vmlsSIop = InstObjParams("vmlss", "VmlsS", "RegRegRegOp", + { "code": vmlsSCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vmlsSIop); + decoder_output += RegRegRegOpConstructor.subst(vmlsSIop); + exec_output += PredOpExecute.subst(vmlsSIop); + + vmlsDCode = ''' + IntDoubleUnion cOp1, cOp2, cDest; + cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); + cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); + double mid = cOp1.fp * cOp2.fp; + if ((isinf(cOp1.fp) && cOp2.fp == 0) || + (isinf(cOp2.fp) && cOp1.fp == 0)) { + mid = NAN; + } + cDest.fp = cDest.fp - mid; + FpDestP0.uw = cDest.bits; + FpDestP1.uw = cDest.bits >> 32; + ''' + vmlsDIop = InstObjParams("vmlsd", "VmlsD", "RegRegRegOp", + { "code": vmlsDCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vmlsDIop); + decoder_output += RegRegRegOpConstructor.subst(vmlsDIop); + exec_output += PredOpExecute.subst(vmlsDIop); }}; -- cgit v1.2.3