From 279501816a5fbff6dffe2a1a7c57bd26ae50eb62 Mon Sep 17 00:00:00 2001
From: Ciro Santilli <ciro.santilli@arm.com>
Date: Tue, 30 Apr 2019 18:24:00 +0100
Subject: arch-arm: implement VMINNM and VMAXNM scalar version

ARMv8.2 16-bit versions have not yet been implemented, but a placeholders
were created for them.

Refactor the nearby decoding tree to closely match the ARM spec A32 decode
table.

That piece of the tree can also be called from thumb which decodes it in
the same way, although the thumb decode table has a different terminology

The old code didn't match neither A32 or T32 terminologies, so it is
better to at least match one of them to help verify correctness.

Change-Id: Iabbbca2932557cf6c98ce36690c385c3ddf39ed8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/18690
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa/insts/fp.isa   | 60 +++++++++++++++++++++++++++++++++++++++++
 src/arch/arm/isa/insts/neon.isa | 16 +++++++++++
 2 files changed, 76 insertions(+)

(limited to 'src/arch/arm/isa/insts')
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa
index d8323c455..df4d58308 100644
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -578,6 +578,66 @@ let {{
     buildBinFpOp("vmul", "Vmul", "FpRegRegRegOp", "SimdFloatMultOp", "fpMulS",
                  "fpMulD")
 
+    def buildBinOp(name, base, opClass, op):
+        '''
+        Create backported aarch64 instructions that use fplib.
+
+        Because they are backported, these instructions are unconditional.
+        '''
+        global header_output, decoder_output, exec_output
+        inst_datas = [
+            (
+                "s",
+                '''
+                FpDest_uw = fplib%(op)s<>(FpOp1_uw, FpOp2_uw, fpscr);
+                '''
+            ),
+            (
+                "d",
+                '''
+                uint64_t op1 = ((uint64_t)FpOp1P0_uw |
+                               ((uint64_t)FpOp1P1_uw << 32));
+                uint64_t op2 = ((uint64_t)FpOp2P0_uw |
+                               ((uint64_t)FpOp2P1_uw << 32));
+                uint64_t dest = fplib%(op)s<>(op1, op2, fpscr);
+                FpDestP0_uw = dest;
+                FpDestP1_uw = dest >> 32;
+                '''
+            )
+        ]
+        Name = name[0].upper() + name[1:]
+        declareTempl = eval(base + "Declare");
+        constructorTempl = eval(base + "Constructor");
+        for size_suffix, code in inst_datas:
+            code = (
+                '''
+                FPSCR fpscr = (FPSCR)FpscrExc;
+                ''' +
+                code +
+                '''
+                FpscrExc = fpscr;
+                '''
+            )
+            iop = InstObjParams(
+                name + size_suffix,
+                Name + size_suffix.upper(),
+                base,
+                {
+                    "code": code % {"op": op},
+                    "op_class": opClass
+                },
+                []
+            )
+            header_output += declareTempl.subst(iop)
+            decoder_output += constructorTempl.subst(iop)
+            exec_output += BasicExecute.subst(iop)
+    ops = [
+        ("vminnm", "FpRegRegRegOp", "SimdFloatCmpOp", "MinNum"),
+        ("vmaxnm", "FpRegRegRegOp", "SimdFloatCmpOp", "MaxNum"),
+    ]
+    for op in ops:
+        buildBinOp(*op)
+
     def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None):
         if doubleOp is None:
             doubleOp = singleOp
diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa
index bfebd103d..f242451b2 100644
--- a/src/arch/arm/isa/insts/neon.isa
+++ b/src/arch/arm/isa/insts/neon.isa
@@ -58,6 +58,22 @@ output header {{
         }
     }
 
+    template <class BaseS, class BaseD>
+    StaticInstPtr
+    decodeNeonSizeSingleDouble(unsigned size,
+                         ExtMachInst machInst, IntRegIndex dest,
+                         IntRegIndex op1, IntRegIndex op2)
+    {
+        switch (size) {
+          case 2:
+            return new BaseS(machInst, dest, op1, op2);
+          case 3:
+            return new BaseD(machInst, dest, op1, op2);
+          default:
+            return new Unknown(machInst);
+        }
+    }
+
     template <template <typename T> class Base>
     StaticInstPtr
     decodeNeonSThreeUReg(unsigned size,
-- 
cgit v1.2.3