ARM: Implement the VFP versions of VMLA and VMLS.

author: Gabe Black <gblack@eecs.umich.edu> 2010-06-02 12:58:14 -0500
committer: Gabe Black <gblack@eecs.umich.edu> 2010-06-02 12:58:14 -0500
commit: 3111a6216924643c332cd8ae3ebdd66fcbbf2c0f (patch)
tree: 9b98dfd984995528df736a6444bce71bb6f6045f /src/arch
parent: 90d70a22cb15e6461fc7397a0f55322dc163f701 (diff)
download: gem5-3111a6216924643c332cd8ae3ebdd66fcbbf2c0f.tar.xz
2 files changed, 111 insertions, 1 deletions
diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa
index 2cca96bea..9bb062a2e 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -483,7 +483,47 @@ let {{
         //const uint32_t opc4 = bits(machInst, 3, 0);
         switch (opc1 & 0xb /* 1011 */) {
           case 0x0:
-            return new WarnUnimplemented("vmla, vmls", machInst);
+            if (bits(machInst, 6) == 0) {
+                uint32_t vd;
+                uint32_t vm;
+                uint32_t vn;
+                if (bits(machInst, 8) == 0) {
+                    vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1);
+                    vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1);
+                    vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1);
+                    return new VmlaS(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                } else {
+                    vd = (bits(machInst, 22) << 5) |
+                         (bits(machInst, 15, 12) << 1);
+                    vm = (bits(machInst, 5) << 5) |
+                         (bits(machInst, 3, 0) << 1);
+                    vn = (bits(machInst, 7) << 5) |
+                         (bits(machInst, 19, 16) << 1);
+                    return new VmlaD(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                }
+            } else {
+                uint32_t vd;
+                uint32_t vm;
+                uint32_t vn;
+                if (bits(machInst, 8) == 0) {
+                    vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1);
+                    vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1);
+                    vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1);
+                    return new VmlsS(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                } else {
+                    vd = (bits(machInst, 22) << 5) |
+                         (bits(machInst, 15, 12) << 1);
+                    vm = (bits(machInst, 5) << 5) |
+                         (bits(machInst, 3, 0) << 1);
+                    vn = (bits(machInst, 7) << 5) |
+                         (bits(machInst, 19, 16) << 1);
+                    return new VmlsD(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                }
+            }
           case 0x2:
             if ((opc3 & 0x1) == 0) {
                 uint32_t vd;
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa
index 99efcec32..58c2cafa7 100644
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -411,4 +411,74 @@ let {{
     header_output += RegRegOpDeclare.subst(vsqrtDIop);
     decoder_output += RegRegOpConstructor.subst(vsqrtDIop);
     exec_output += PredOpExecute.subst(vsqrtDIop);
+
+    vmlaSCode = '''
+        float mid = FpOp1 * FpOp2;
+        if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
+            mid = NAN;
+        }
+        FpDest = FpDest + mid;
+    '''
+    vmlaSIop = InstObjParams("vmlas", "VmlaS", "RegRegRegOp",
+                                     { "code": vmlaSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vmlaSIop);
+    decoder_output += RegRegRegOpConstructor.subst(vmlaSIop);
+    exec_output += PredOpExecute.subst(vmlaSIop);
+
+    vmlaDCode = '''
+        IntDoubleUnion cOp1, cOp2, cDest;
+        cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+        cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+        double mid = cOp1.fp * cOp2.fp;
+        if ((isinf(cOp1.fp) && cOp2.fp == 0) ||
+                (isinf(cOp2.fp) && cOp1.fp == 0)) {
+            mid = NAN;
+        }
+        cDest.fp = cDest.fp + mid;
+        FpDestP0.uw = cDest.bits;
+        FpDestP1.uw = cDest.bits >> 32;
+    '''
+    vmlaDIop = InstObjParams("vmlad", "VmlaD", "RegRegRegOp",
+                                     { "code": vmlaDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vmlaDIop);
+    decoder_output += RegRegRegOpConstructor.subst(vmlaDIop);
+    exec_output += PredOpExecute.subst(vmlaDIop);
+
+    vmlsSCode = '''
+        float mid = FpOp1 * FpOp2;
+        if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
+            mid = NAN;
+        }
+        FpDest = FpDest - mid;
+    '''
+    vmlsSIop = InstObjParams("vmlss", "VmlsS", "RegRegRegOp",
+                                     { "code": vmlsSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vmlsSIop);
+    decoder_output += RegRegRegOpConstructor.subst(vmlsSIop);
+    exec_output += PredOpExecute.subst(vmlsSIop);
+
+    vmlsDCode = '''
+        IntDoubleUnion cOp1, cOp2, cDest;
+        cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+        cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+        double mid = cOp1.fp * cOp2.fp;
+        if ((isinf(cOp1.fp) && cOp2.fp == 0) ||
+                (isinf(cOp2.fp) && cOp1.fp == 0)) {
+            mid = NAN;
+        }
+        cDest.fp = cDest.fp - mid;
+        FpDestP0.uw = cDest.bits;
+        FpDestP1.uw = cDest.bits >> 32;
+    '''
+    vmlsDIop = InstObjParams("vmlsd", "VmlsD", "RegRegRegOp",
+                                     { "code": vmlsDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vmlsDIop);
+    decoder_output += RegRegRegOpConstructor.subst(vmlsDIop);
+    exec_output += PredOpExecute.subst(vmlsDIop);
 }};
author	Gabe Black <gblack@eecs.umich.edu>	2010-06-02 12:58:14 -0500
committer	Gabe Black <gblack@eecs.umich.edu>	2010-06-02 12:58:14 -0500
commit	3111a6216924643c332cd8ae3ebdd66fcbbf2c0f (patch)
tree	9b98dfd984995528df736a6444bce71bb6f6045f /src/arch
parent	90d70a22cb15e6461fc7397a0f55322dc163f701 (diff)
download	gem5-3111a6216924643c332cd8ae3ebdd66fcbbf2c0f.tar.xz