summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiacomo Gabrielli <giacomo.gabrielli@arm.com>2018-10-23 13:57:05 +0100
committerGiacomo Gabrielli <giacomo.gabrielli@arm.com>2019-05-30 16:31:57 +0000
commit3cf4a04fceef321b5cd6ece9a4ff1814787a236d (patch)
treefadfcbd0d8f1059f76e402e101fb7cd6cc6b75fa
parentf26f3e22b331dfdd3dca1eea11adf9ed81ef6f05 (diff)
downloadgem5-3cf4a04fceef321b5cd6ece9a4ff1814787a236d.tar.xz
arch-arm: Add initial support for SVE gather/scatter loads/stores
Change-Id: I891623015b47a39f61ed616f8896f32a7134c8e2 Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/13521 Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> Maintainer: Andreas Sandberg <andreas.sandberg@arm.com> Tested-by: kokoro <noreply+kokoro@google.com>
-rw-r--r--src/arch/arm/insts/sve_macromem.hh224
-rw-r--r--src/arch/arm/isa/formats/sve_2nd_level.isa458
-rw-r--r--src/arch/arm/isa/includes.isa10
-rw-r--r--src/arch/arm/isa/insts/sve_mem.isa434
-rw-r--r--src/arch/arm/isa/operands.isa35
-rw-r--r--src/arch/arm/isa/templates/sve_mem.isa341
-rw-r--r--src/arch/arm/registers.hh1
7 files changed, 1403 insertions, 100 deletions
diff --git a/src/arch/arm/insts/sve_macromem.hh b/src/arch/arm/insts/sve_macromem.hh
new file mode 100644
index 000000000..a31af9b92
--- /dev/null
+++ b/src/arch/arm/insts/sve_macromem.hh
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2018 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Giacomo Gabrielli
+ */
+
+#ifndef __ARCH_ARM_SVE_MACROMEM_HH__
+#define __ARCH_ARM_SVE_MACROMEM_HH__
+
+#include "arch/arm/generated/decoder.hh"
+#include "arch/arm/insts/pred_inst.hh"
+
+namespace ArmISA {
+
+template <typename RegElemType, typename MemElemType,
+ template <typename, typename> class MicroopType>
+class SveIndexedMemVI : public PredMacroOp
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ uint64_t imm;
+
+ public:
+ SveIndexedMemVI(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+ uint64_t _imm)
+ : PredMacroOp(mnem, machInst, __opClass),
+ dest(_dest), gp(_gp), base(_base), imm(_imm)
+ {
+ bool isLoad = (__opClass == MemReadOp);
+
+ int num_elems = ((machInst.sveLen + 1) * 16) / sizeof(RegElemType);
+
+ numMicroops = num_elems;
+ if (isLoad) {
+ numMicroops++;
+ }
+
+ microOps = new StaticInstPtr[numMicroops];
+
+ StaticInstPtr *uop = microOps;
+
+ if (isLoad) {
+ // The first microop of a gather load copies the source vector
+ // register used for address calculation to an auxiliary register,
+ // with all subsequent microops reading from the latter. This is
+ // needed to properly handle cases where the source vector
+ // register is the same as the destination register
+ *uop = new ArmISAInst::SveGatherLoadCpySrcVecMicroop(
+ mnem, machInst, _base, this);
+ uop++;
+ }
+
+ for (int i = 0; i < num_elems; i++, uop++) {
+ *uop = new MicroopType<RegElemType, MemElemType>(
+ mnem, machInst, __opClass, _dest, _gp,
+ isLoad ? (IntRegIndex) VECREG_UREG0 : _base, _imm, i,
+ num_elems);
+ }
+
+ --uop;
+ (*uop)->setLastMicroop();
+ microOps[0]->setFirstMicroop();
+
+ for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
+ (*uop)->setDelayedCommit();
+ }
+ }
+
+ Fault
+ execute(ExecContext *, Trace::InstRecord *) const
+ {
+ panic("Execute method called when it shouldn't!");
+ return NoFault;
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ // TODO: add suffix to transfer and base registers
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ printVecReg(ss, dest, true);
+ ccprintf(ss, "}, ");
+ printVecPredReg(ss, gp);
+ ccprintf(ss, "/z, [");
+ printVecReg(ss, base, true);
+ if (imm != 0) {
+ ccprintf(ss, ", #%d", imm * sizeof(MemElemType));
+ }
+ ccprintf(ss, "]");
+ return ss.str();
+ }
+};
+
+template <typename RegElemType, typename MemElemType,
+ template <typename, typename> class MicroopType>
+class SveIndexedMemSV : public PredMacroOp
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ IntRegIndex offset;
+
+ bool offsetIs32;
+ bool offsetIsSigned;
+ bool offsetIsScaled;
+
+ public:
+ SveIndexedMemSV(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+ IntRegIndex _offset, bool _offsetIs32,
+ bool _offsetIsSigned, bool _offsetIsScaled)
+ : PredMacroOp(mnem, machInst, __opClass),
+ dest(_dest), gp(_gp), base(_base), offset(_offset),
+ offsetIs32(_offsetIs32), offsetIsSigned(_offsetIsSigned),
+ offsetIsScaled(_offsetIsScaled)
+ {
+ bool isLoad = (__opClass == MemReadOp);
+
+ int num_elems = ((machInst.sveLen + 1) * 16) / sizeof(RegElemType);
+
+ numMicroops = num_elems;
+ if (isLoad) {
+ numMicroops++;
+ }
+
+ microOps = new StaticInstPtr[numMicroops];
+
+ StaticInstPtr *uop = microOps;
+
+ if (isLoad) {
+ // The first microop of a gather load copies the source vector
+ // register used for address calculation to an auxiliary register,
+ // with all subsequent microops reading from the latter. This is
+ // needed to properly handle cases where the source vector
+ // register is the same as the destination register
+ *uop = new ArmISAInst::SveGatherLoadCpySrcVecMicroop(
+ mnem, machInst, _offset, this);
+ uop++;
+ }
+
+ for (int i = 0; i < num_elems; i++, uop++) {
+ *uop = new MicroopType<RegElemType, MemElemType>(
+ mnem, machInst, __opClass, _dest, _gp, _base,
+ isLoad ? (IntRegIndex) VECREG_UREG0 : _offset, _offsetIs32,
+ _offsetIsSigned, _offsetIsScaled, i, num_elems);
+ }
+
+ --uop;
+ (*uop)->setLastMicroop();
+ microOps[0]->setFirstMicroop();
+
+ for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
+ (*uop)->setDelayedCommit();
+ }
+ }
+
+ Fault
+ execute(ExecContext *, Trace::InstRecord *) const
+ {
+ panic("Execute method called when it shouldn't!");
+ return NoFault;
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ // TODO: add suffix to transfer and base registers
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ printVecReg(ss, dest, true);
+ ccprintf(ss, "}, ");
+ printVecPredReg(ss, gp);
+ ccprintf(ss, "/z, [");
+ printIntReg(ss, base);
+ ccprintf(ss, ", ");
+ printVecReg(ss, offset, true);
+ ccprintf(ss, "]");
+ return ss.str();
+ }
+};
+
+} // namespace ArmISA
+
+#endif // __ARCH_ARM_SVE_MACROMEM_HH__
diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa
index e81ab3ed7..7b2d3af49 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -2896,34 +2896,153 @@ namespace Aarch64
StaticInstPtr
decodeSveMemGather32(ExtMachInst machInst)
{
- // TODO: for now only LDR and LD1R are implemented
- if (bits(machInst, 22) && bits(machInst, 15)) {
- IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
- IntRegIndex rn = makeSP(
- (IntRegIndex) (uint8_t) bits(machInst, 9, 5));
- uint64_t imm = bits(machInst, 21, 16);
- IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
- uint8_t dtype = (bits(machInst, 24, 23) << 2) |
- bits(machInst, 14, 13);
- return decodeSveContigLoadSIInsts<SveLoadAndRepl>(
- dtype, machInst, zt, pg, rn, imm, false, true);
- } else if (bits(machInst, 24, 22) == 0x6 &&
- bits(machInst, 15, 13) == 0x0 &&
- bits(machInst, 4) == 0x0) {
- IntRegIndex pt = (IntRegIndex) (uint8_t) bits(machInst, 3, 0);
- IntRegIndex rn = makeSP(
- (IntRegIndex) (uint8_t) bits(machInst, 9, 5));
- uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
- bits(machInst, 12, 10));
- return new SveLdrPred(machInst, pt, rn, imm);
- } else if (bits(machInst, 24, 22) == 0x6 &&
- bits(machInst, 15, 13) == 0x2) {
- IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
- IntRegIndex rn = makeSP(
- (IntRegIndex) (uint8_t) bits(machInst, 9, 5));
- uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
- bits(machInst, 12, 10));
- return new SveLdrVec(machInst, zt, rn, imm);
+ if (bits(machInst, 15)) {
+ if (bits(machInst, 22)) {
+ // SVE load and broadcast element
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ uint64_t imm = bits(machInst, 21, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 2) |
+ bits(machInst, 14, 13);
+ return decodeSveContigLoadSIInsts<SveLoadAndRepl>(
+ dtype, machInst, zt, pg, rn, imm, false, true);
+ } else {
+ if (bits(machInst, 21)) {
+ // SVE 32-bit gather load (vector plus immediate)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex zn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ uint64_t imm = bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadVIInsts(
+ dtype, machInst, zt, pg, zn, imm, true, ff);
+ } else {
+ uint8_t b14_13 = bits(machInst, 14, 13);
+ if (b14_13 == 0x2 && bits(machInst, 4) == 0) {
+ // TODO: SVE contiguous prefetch (scalar plus scalar)
+ return new Unknown64(machInst);
+ } else if (b14_13 == 0x3 && bits(machInst, 4) == 0) {
+ // TODO: SVE 32-bit gather prefetch (vector plus
+ // immediate)
+ return new Unknown64(machInst);
+ }
+ }
+ }
+ } else {
+ uint8_t b24_23 = bits(machInst, 24, 23);
+ if (b24_23 != 0x3 && bits(machInst, 21) == 0) {
+ // SVE 32-bit gather load (scalar plus 32-bit unscaled offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t xs = bits(machInst, 22);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadSVInsts(
+ dtype, machInst, zt, pg, rn, zm,
+ true, true, xs, false, ff);
+ }
+ switch (b24_23) {
+ case 0x0:
+ if (bits(machInst, 21) && bits(machInst, 4) == 0) {
+ // TODO: SVE 32-bit gather prefetch (vector plus immediate)
+ break;
+ }
+ break;
+ case 0x1:
+ if (bits(machInst, 21)) {
+ // SVE 32-bit gather load halfwords (scalar plus 32-bit
+ // scaled offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t xs = bits(machInst, 22);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ if (bits(machInst, 14)) {
+ return new SveIndexedMemSV<uint32_t, uint16_t,
+ SveGatherLoadSVMicroop>(
+ "ld1", machInst, MemReadOp, zt, pg, rn, zm,
+ true, xs, true);
+ } else {
+ return new SveIndexedMemSV<int32_t, int16_t,
+ SveGatherLoadSVMicroop>(
+ "ld1", machInst, MemReadOp, zt, pg, rn, zm,
+ true, xs, true);
+ }
+ }
+ break;
+ case 0x2:
+ if (bits(machInst, 21)) {
+ // SVE 32-bit gather load words (scalar plus 32-bit scaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t xs = bits(machInst, 22);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return new SveIndexedMemSV<uint32_t, uint32_t,
+ SveGatherLoadSVMicroop>(
+ "ld1", machInst, MemReadOp, zt, pg, rn, zm,
+ true, xs, true);
+ }
+ break;
+ case 0x3:
+ if (bits(machInst, 22) == 0 && bits(machInst, 14, 13) == 0x0 &&
+ bits(machInst, 4) == 0) {
+ // SVE load predicate register
+ IntRegIndex pt = (IntRegIndex) (uint8_t)
+ bits(machInst, 3, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
+ bits(machInst, 12, 10));
+ return new SveLdrPred(machInst, pt, rn, imm);
+ } else if (bits(machInst, 22) == 0 &&
+ bits(machInst, 14, 13) == 0x2) {
+ // SVE load vector register
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
+ bits(machInst, 12, 10));
+ return new SveLdrVec(machInst, zt, rn, imm);
+ }
+ break;
+ }
}
return new Unknown64(machInst);
} // decodeSveMemGather32
@@ -3048,6 +3167,124 @@ namespace Aarch64
StaticInstPtr
decodeSveMemGather64(ExtMachInst machInst)
{
+ switch ((bits(machInst, 21) << 1) | bits(machInst, 15)) {
+ case 0x0:
+ {
+ // SVE 64-bit gather load (scalar plus unpacked 32-bit unscaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t xs = bits(machInst, 22);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadSVInsts(
+ dtype, machInst, zt, pg, rn, zm,
+ false, true, xs, false, ff);
+ }
+ case 0x1:
+ if (bits(machInst, 22)) {
+ // SVE 64-bit gather load (scalar plus 64-bit unscaled offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadSVInsts(
+ dtype, machInst, zt, pg, rn, zm,
+ false, false, false, false, ff);
+ } else {
+ if (bits(machInst, 14, 13) == 0x3 && bits(machInst, 4) == 0) {
+ // TODO: SVE 64-bit gather prefetch (vector plus immediate)
+ break;
+ }
+ }
+ break;
+ case 0x2:
+ if (bits(machInst, 24, 23) != 0x0) {
+ // SVE 64-bit gather load (scalar plus unpacked 32-bit scaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t xs = bits(machInst, 22);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadSVInsts(
+ dtype, machInst, zt, pg, rn, zm,
+ false, true, xs, true, ff);
+ } else if (bits(machInst, 4) == 0) {
+ // TODO: SVE 64-bit gather prefetch (scalar plus unpacked
+ // 32-bit scaled offsets)
+ return new Unknown64(machInst);
+ }
+ break;
+ case 0x3:
+ if (bits(machInst, 22) == 0) {
+ // SVE 64-bit gather load (vector plus immediate)
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ uint64_t imm = bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadVIInsts(
+ dtype, machInst, zt, pg, zn, imm, false, ff);
+ } else {
+ if (bits(machInst, 24, 23) != 0x0) {
+ // SVE 64-bit gather load (scalar plus 64-bit scaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadSVInsts(
+ dtype, machInst, zt, pg, rn, zm,
+ false, false, false, true, ff);
+ } else if (bits(machInst, 4) == 0) {
+ // TODO: SVE 64-bit gather prefetch (scalar plus 64-bit
+ // scaled offsets)
+ break;
+ }
+ }
+ break;
+ }
return new Unknown64(machInst);
} // decodeSveMemGather64
@@ -3087,36 +3324,12 @@ namespace Aarch64
} // decodeSveContigNTStoreSS
StaticInstPtr
- decodeSveScatterStore64SV32U(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore64SV32U
-
- StaticInstPtr
- decodeSveScatterStore64SV64U(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore64SV64U
-
- StaticInstPtr
decodeSveContigNTStoreSI(ExtMachInst machInst)
{
return new Unknown64(machInst);
} // decodeSveContigNTStoreSI
StaticInstPtr
- decodeSveScatterStore64VI(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore64VI
-
- StaticInstPtr
- decodeSveScatterStore32SV32S(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore32SV32S
-
- StaticInstPtr
decodeSveStoreStructsSS(ExtMachInst machInst)
{
return new Unknown64(machInst);
@@ -3129,30 +3342,6 @@ namespace Aarch64
} // decodeSveStoreStructsSI
StaticInstPtr
- decodeSveScatterStore32SV32U(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore32SV32U
-
- StaticInstPtr
- decodeSveScatterStore32VI(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore32VI
-
- StaticInstPtr
- decodeSveScatterStore64SV32S(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore64SV32S
-
- StaticInstPtr
- decodeSveScatterStore64SV64S(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore64SV64S
-
- StaticInstPtr
decodeSveMemStore(ExtMachInst machInst)
{
switch (bits(machInst, 15, 13)) {
@@ -3186,37 +3375,118 @@ namespace Aarch64
}
case 0x4:
case 0x6:
- switch (bits(machInst, 22, 21)) {
- case 0x0:
- return decodeSveScatterStore64SV32U(machInst);
- case 0x1:
- if (bits(machInst, 24, 23) != 0x0) {
- return decodeSveScatterStore64SV32S(machInst);
- }
- break;
- case 0x2:
- if (bits(machInst, 24, 23) != 0x3) {
- return decodeSveScatterStore32SV32U(machInst);
+ {
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+ uint8_t xs = bits(machInst, 22);
+
+ switch (bits(machInst, 22, 21)) {
+ case 0x0:
+ // SVE 64-bit scatter store (scalar plus unpacked 32-bit
+ // unscaled offsets)
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ false, true, xs, false);
+ case 0x1:
+ if (bits(machInst, 24, 23) != 0x0) {
+ // SVE 64-bit scatter store (scalar plus unpacked
+ // 32-bit scaled offsets)
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ false, true, xs, true);
+ }
+ break;
+ case 0x2:
+ if (bits(machInst, 24, 23) != 0x3) {
+ // SVE 32-bit scatter store (scalar plus 32-bit
+ // unscaled offsets)
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ true, true, xs, false);
+ }
+ break;
+ case 0x3:
+ // SVE 32-bit scatter store (scalar plus 32-bit scaled
+ // offsets)
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ true, true, xs, true);
}
- break;
- case 0x3:
- return decodeSveScatterStore32SV32S(machInst);
}
break;
case 0x5:
switch (bits(machInst, 22, 21)) {
case 0x0:
- return decodeSveScatterStore64SV64U(machInst);
+ {
+ // SVE 64-bit scatter store (scalar plus 64-bit unscaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ false, false, false, false);
+ }
case 0x1:
if (bits(machInst, 24, 23) != 0x0) {
- return decodeSveScatterStore64SV64S(machInst);
+ // SVE 64-bit scatter store (scalar plus 64-bit scaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ false, false, false, true);
}
break;
case 0x2:
- return decodeSveScatterStore64VI(machInst);
+ {
+ // SVE 64-bit scatter store (vector plus immediate)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex zn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ uint64_t imm = bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+
+ return decodeSveScatterStoreVIInsts(
+ msz, machInst, zt, pg, zn, imm, false);
+ }
case 0x3:
if (bits(machInst, 24, 23) != 0x3) {
- return decodeSveScatterStore64VI(machInst);
+ // SVE 32-bit scatter store (vector plus immediate)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex zn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ uint64_t imm = bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+
+ return decodeSveScatterStoreVIInsts(
+ msz, machInst, zt, pg, zn, imm, true);
}
break;
}
diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa
index 9aef8c651..f054bc862 100644
--- a/src/arch/arm/isa/includes.isa
+++ b/src/arch/arm/isa/includes.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010, 2012, 2017 ARM Limited
+// Copyright (c) 2010, 2012, 2017-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -68,9 +68,10 @@ output header {{
#include "arch/arm/insts/sve_mem.hh"
#include "arch/arm/insts/vfp.hh"
#include "arch/arm/isa_traits.hh"
+#include "enums/DecoderFlavour.hh"
#include "mem/packet.hh"
#include "sim/faults.hh"
-#include "enums/DecoderFlavour.hh"
+
}};
output decoder {{
@@ -80,11 +81,12 @@ output decoder {{
#include "arch/arm/decoder.hh"
#include "arch/arm/faults.hh"
+#include "arch/arm/insts/sve_macromem.hh"
#include "arch/arm/intregs.hh"
#include "arch/arm/isa_traits.hh"
#include "arch/arm/utility.hh"
-#include "base/loader/symtab.hh"
#include "base/cprintf.hh"
+#include "base/loader/symtab.hh"
#include "cpu/thread_context.hh"
using namespace ArmISA;
@@ -102,8 +104,10 @@ output exec {{
#include "base/crc.hh"
#include "cpu/base.hh"
#include "sim/pseudo_inst.hh"
+
#if defined(linux)
#include <fenv.h>
+
#endif
#include "base/cp_annotate.hh"
diff --git a/src/arch/arm/isa/insts/sve_mem.isa b/src/arch/arm/isa/insts/sve_mem.isa
index f4ca4c3c9..3102e800a 100644
--- a/src/arch/arm/isa/insts/sve_mem.isa
+++ b/src/arch/arm/isa/insts/sve_mem.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2017 ARM Limited
+// Copyright (c) 2017-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -204,6 +204,288 @@ output header {{
}};
+output decoder {{
+
+ StaticInstPtr
+ decodeSveGatherLoadVIInsts(uint8_t dtype, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex zn,
+ uint64_t imm, bool esizeIs32,
+ bool firstFaulting)
+ {
+ const char* mn = firstFaulting ? "ldff1" : "ld1";
+ switch (dtype) {
+ case 0x0:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<int32_t, int8_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<int64_t, int8_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x1:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint8_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint8_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x2:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<int32_t, int16_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<int64_t, int16_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x3:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint16_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint16_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x4:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemVI<int64_t, int32_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x5:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint32_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint32_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x7:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint64_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ }
+ return new Unknown64(machInst);
+ }
+
+ StaticInstPtr
+ decodeSveGatherLoadSVInsts(uint8_t dtype, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex rn,
+ IntRegIndex zm, bool esizeIs32, bool offsetIs32,
+ bool offsetIsSigned, bool offsetIsScaled,
+ bool firstFaulting)
+ {
+ const char* mn = firstFaulting ? "ldff1" : "ld1";
+ switch (dtype) {
+ case 0x0:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<int32_t, int8_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<int64_t, int8_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x1:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint8_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint8_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x2:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<int32_t, int16_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<int64_t, int16_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x3:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint16_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint16_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x4:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemSV<int64_t, int32_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x5:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint32_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint32_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x7:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint64_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ }
+ return new Unknown64(machInst);
+ }
+
+ StaticInstPtr
+ decodeSveScatterStoreVIInsts(uint8_t msz, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg,
+ IntRegIndex zn, uint64_t imm,
+ bool esizeIs32)
+ {
+ const char* mn = "st1";
+ switch (msz) {
+ case 0x0:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint8_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint8_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ }
+ case 0x1:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint16_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint16_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ }
+ case 0x2:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint32_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint32_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ }
+ case 0x3:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint64_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ }
+ }
+ return new Unknown64(machInst);
+ }
+
+ StaticInstPtr
+ decodeSveScatterStoreSVInsts(uint8_t msz, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg,
+ IntRegIndex rn, IntRegIndex zm,
+ bool esizeIs32, bool offsetIs32,
+ bool offsetIsSigned, bool offsetIsScaled)
+ {
+ const char* mn = "st1";
+ switch (msz) {
+ case 0x0:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint8_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint8_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x1:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint16_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint16_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x2:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint32_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint32_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x3:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint64_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ }
+ return new Unknown64(machInst);
+ }
+
+}};
+
+
let {{
header_output = ''
@@ -323,6 +605,31 @@ let {{
('uint64_t', 'uint64_t'),
)
+ gatherLoadTplArgs = (
+ ('int32_t', 'int8_t'),
+ ('int64_t', 'int8_t'),
+ ('uint32_t', 'uint8_t'),
+ ('uint64_t', 'uint8_t'),
+ ('int32_t', 'int16_t'),
+ ('int64_t', 'int16_t'),
+ ('uint32_t', 'uint16_t'),
+ ('uint64_t', 'uint16_t'),
+ ('int64_t', 'int32_t'),
+ ('uint32_t', 'uint32_t'),
+ ('uint64_t', 'uint32_t'),
+ ('uint64_t', 'uint64_t'),
+ )
+
+ scatterStoreTplArgs = (
+ ('uint32_t', 'uint8_t'),
+ ('uint64_t', 'uint8_t'),
+ ('uint32_t', 'uint16_t'),
+ ('uint64_t', 'uint16_t'),
+ ('uint32_t', 'uint32_t'),
+ ('uint64_t', 'uint32_t'),
+ ('uint64_t', 'uint64_t'),
+ )
+
# Generates definitions for SVE contiguous loads
def emitSveContigMemInsts(offsetIsImm):
global header_output, exec_output, decoders
@@ -437,9 +744,124 @@ let {{
'class_name': 'SveLoadAndRepl'}
exec_output += SveContigMemExecDeclare.subst(substDict)
+ class IndexedAddrForm:
+ VEC_PLUS_IMM = 0
+ SCA_PLUS_VEC = 1
+
+ # Generates definitions for the transfer microops of SVE indexed memory
+ # operations (gather loads, scatter stores)
+ def emitSveIndexedMemMicroops(indexed_addr_form):
+ assert indexed_addr_form in (IndexedAddrForm.VEC_PLUS_IMM,
+ IndexedAddrForm.SCA_PLUS_VEC)
+ global header_output, exec_output, decoders
+ tplHeader = 'template <class RegElemType, class MemElemType>'
+ tplArgs = '<RegElemType, MemElemType>'
+ if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM:
+ eaCode = '''
+ EA = AA64FpBase_x[elemIndex] + imm * sizeof(MemElemType)'''
+ else:
+ eaCode = '''
+ uint64_t offset = AA64FpOffset_x[elemIndex];
+ if (offsetIs32) {
+ offset &= (1ULL << 32) - 1;
+ }
+ if (offsetIsSigned) {
+ offset = sext<32>(offset);
+ }
+ if (offsetIsScaled) {
+ offset *= sizeof(MemElemType);
+ }
+ EA = XBase + offset'''
+ loadMemAccCode = '''
+ if (GpOp_x[elemIndex]) {
+ AA64FpDest_x[elemIndex] = memData;
+ } else {
+ AA64FpDest_x[elemIndex] = 0;
+ }
+ '''
+ storeMemAccCode = '''
+ memData = AA64FpDest_x[elemIndex];
+ '''
+ predCheckCode = 'GpOp_x[elemIndex]'
+ loadIop = InstObjParams('ld1',
+ ('SveGatherLoadVIMicroop'
+ if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM
+ else 'SveGatherLoadSVMicroop'),
+ 'MicroOp',
+ {'tpl_header': tplHeader,
+ 'tpl_args': tplArgs,
+ 'memacc_code': loadMemAccCode,
+ 'ea_code' : sveEnabledCheckCode + eaCode,
+ 'pred_check_code' : predCheckCode,
+ 'fa_code' : ''},
+ ['IsMicroop', 'IsMemRef', 'IsLoad'])
+ storeIop = InstObjParams('st1',
+ ('SveScatterStoreVIMicroop'
+ if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM
+ else 'SveScatterStoreSVMicroop'),
+ 'MicroOp',
+ {'tpl_header': tplHeader,
+ 'tpl_args': tplArgs,
+ 'memacc_code': storeMemAccCode,
+ 'ea_code' : sveEnabledCheckCode + eaCode,
+ 'pred_check_code' : predCheckCode,
+ 'fa_code' : ''},
+ ['IsMicroop', 'IsMemRef', 'IsStore'])
+ if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM:
+ header_output += SveIndexedMemVIMicroopDeclare.subst(loadIop)
+ header_output += SveIndexedMemVIMicroopDeclare.subst(storeIop)
+ else:
+ header_output += SveIndexedMemSVMicroopDeclare.subst(loadIop)
+ header_output += SveIndexedMemSVMicroopDeclare.subst(storeIop)
+ exec_output += (
+ SveGatherLoadMicroopExecute.subst(loadIop) +
+ SveGatherLoadMicroopInitiateAcc.subst(loadIop) +
+ SveGatherLoadMicroopCompleteAcc.subst(loadIop) +
+ SveScatterStoreMicroopExecute.subst(storeIop) +
+ SveScatterStoreMicroopInitiateAcc.subst(storeIop) +
+ SveScatterStoreMicroopCompleteAcc.subst(storeIop))
+ for args in gatherLoadTplArgs:
+ substDict = {'tpl_args': '<%s>' % ', '.join(args),
+ 'class_name': (
+ 'SveGatherLoadVIMicroop'
+ if indexed_addr_form == \
+ IndexedAddrForm.VEC_PLUS_IMM
+ else 'SveGatherLoadSVMicroop')}
+ # TODO: this should become SveMemExecDeclare
+ exec_output += SveContigMemExecDeclare.subst(substDict)
+ for args in scatterStoreTplArgs:
+ substDict = {'tpl_args': '<%s>' % ', '.join(args),
+ 'class_name': (
+ 'SveScatterStoreVIMicroop'
+ if indexed_addr_form == \
+ IndexedAddrForm.VEC_PLUS_IMM
+ else 'SveScatterStoreSVMicroop')}
+ # TODO: this should become SveMemExecDeclare
+ exec_output += SveContigMemExecDeclare.subst(substDict)
+
+ # Generates definitions for the first microop of SVE gather loads, required
+ # to propagate the source vector register to the transfer microops
+ def emitSveGatherLoadCpySrcVecMicroop():
+ global header_output, exec_output, decoders
+ code = sveEnabledCheckCode + '''
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
+ xc->tcBase());
+ for (unsigned i = 0; i < eCount; i++) {
+ AA64FpUreg0_ub[i] = AA64FpOp1_ub[i];
+ }'''
+ iop = InstObjParams('ld1',
+ 'SveGatherLoadCpySrcVecMicroop',
+ 'MicroOp',
+ {'code': code},
+ ['IsMicroop'])
+ header_output += SveGatherLoadCpySrcVecMicroopDeclare.subst(iop)
+ exec_output += SveGatherLoadCpySrcVecMicroopExecute.subst(iop)
+
# LD1[S]{B,H,W,D} (scalar plus immediate)
+ # ST1[S]{B,H,W,D} (scalar plus immediate)
emitSveContigMemInsts(True)
# LD1[S]{B,H,W,D} (scalar plus scalar)
+ # ST1[S]{B,H,W,D} (scalar plus scalar)
emitSveContigMemInsts(False)
# LD1R[S]{B,H,W,D}
@@ -450,4 +872,14 @@ let {{
# LDR (vector), STR (vector)
emitSveMemFillSpill(False)
+ # LD1[S]{B,H,W,D} (vector plus immediate)
+ # ST1[S]{B,H,W,D} (vector plus immediate)
+ emitSveIndexedMemMicroops(IndexedAddrForm.VEC_PLUS_IMM)
+ # LD1[S]{B,H,W,D} (scalar plus vector)
+ # ST1[S]{B,H,W,D} (scalar plus vector)
+ emitSveIndexedMemMicroops(IndexedAddrForm.SCA_PLUS_VEC)
+
+ # Source vector copy microop for gather loads
+ emitSveGatherLoadCpySrcVecMicroop()
+
}};
diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa
index 0a0469acc..a3b385756 100644
--- a/src/arch/arm/isa/operands.isa
+++ b/src/arch/arm/isa/operands.isa
@@ -1,5 +1,5 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010-2014, 2016 ARM Limited
+// Copyright (c) 2010-2014, 2016-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -541,6 +541,39 @@ def operands {{
'AA64FpDestMergeQ': vectorRegElem('0', 'tud', zeroing = True)
}),
+ 'AA64FpBase': vectorReg('base',
+ {
+ 'AA64FpBaseP0': vectorRegElem('0'),
+ 'AA64FpBaseP1': vectorRegElem('1'),
+ 'AA64FpBaseP2': vectorRegElem('2'),
+ 'AA64FpBaseP3': vectorRegElem('3'),
+ 'AA64FpBaseS': vectorRegElem('0', 'sf', zeroing = True),
+ 'AA64FpBaseD': vectorRegElem('0', 'df', zeroing = True),
+ 'AA64FpBaseQ': vectorRegElem('0', 'tud', zeroing = True)
+ }),
+
+ 'AA64FpOffset': vectorReg('offset',
+ {
+ 'AA64FpOffsetP0': vectorRegElem('0'),
+ 'AA64FpOffsetP1': vectorRegElem('1'),
+ 'AA64FpOffsetP2': vectorRegElem('2'),
+ 'AA64FpOffsetP3': vectorRegElem('3'),
+ 'AA64FpOffsetS': vectorRegElem('0', 'sf', zeroing = True),
+ 'AA64FpOffsetD': vectorRegElem('0', 'df', zeroing = True),
+ 'AA64FpOffsetQ': vectorRegElem('0', 'tud', zeroing = True)
+ }),
+
+ 'AA64FpUreg0': vectorReg('VECREG_UREG0',
+ {
+ 'AA64FpUreg0P0': vectorRegElem('0'),
+ 'AA64FpUreg0P1': vectorRegElem('1'),
+ 'AA64FpUreg0P2': vectorRegElem('2'),
+ 'AA64FpUreg0P3': vectorRegElem('3'),
+ 'AA64FpUreg0S': vectorRegElem('0', 'sf', zeroing = True),
+ 'AA64FpUreg0D': vectorRegElem('0', 'df', zeroing = True),
+ 'AA64FpUreg0Q': vectorRegElem('0', 'tud', zeroing = True)
+ }),
+
# Predicate register operands
'GpOp': vecPredReg('gp'),
'POp1': vecPredReg('op1'),
diff --git a/src/arch/arm/isa/templates/sve_mem.isa b/src/arch/arm/isa/templates/sve_mem.isa
index 8471e44ba..2cdf2ffd4 100644
--- a/src/arch/arm/isa/templates/sve_mem.isa
+++ b/src/arch/arm/isa/templates/sve_mem.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2017 ARM Limited
+// Copyright (c) 2017-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -384,3 +384,342 @@ def template SveLoadAndReplCompleteAcc {{
}
}};
+def template SveIndexedMemVIMicroopDeclare {{
+ %(tpl_header)s
+ class %(class_name)s : public %(base_class)s
+ {
+ protected:
+ typedef RegElemType TPElem;
+
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ uint64_t imm;
+
+ int elemIndex;
+ int numElems;
+
+ unsigned memAccessFlags;
+
+ public:
+ %(class_name)s(const char* mnem, ExtMachInst machInst,
+ OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp,
+ IntRegIndex _base, uint64_t _imm, int _elemIndex, int _numElems)
+ : %(base_class)s(mnem, machInst, %(op_class)s),
+ dest(_dest), gp(_gp), base(_base), imm(_imm),
+ elemIndex(_elemIndex), numElems(_numElems),
+ memAccessFlags(ArmISA::TLB::AllowUnaligned |
+ ArmISA::TLB::MustBeOne)
+ {
+ %(constructor)s;
+ if (_opClass == MemReadOp && elemIndex == 0) {
+ // The first micro-op is responsible for pinning the
+ // destination register
+ _destRegIdx[0].setNumPinnedWrites(numElems - 1);
+ }
+ }
+
+ Fault execute(ExecContext *, Trace::InstRecord *) const;
+ Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
+ Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const;
+
+ virtual void
+ annotateFault(ArmFault *fault)
+ {
+ %(fa_code)s
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ // TODO: add suffix to transfer register
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ printVecReg(ss, dest, true);
+ ccprintf(ss, "}, ");
+ printVecPredReg(ss, gp);
+ if (_opClass == MemReadOp) {
+ ccprintf(ss, "/z");
+ }
+ ccprintf(ss, ", [");
+ printVecReg(ss, base, true);
+ if (imm != 0) {
+ ccprintf(ss, ", #%d", imm * sizeof(MemElemType));
+ }
+ ccprintf(ss, "] (uop elem %d tfer)", elemIndex);
+ return ss.str();
+ }
+ };
+}};
+
+def template SveIndexedMemSVMicroopDeclare {{
+ %(tpl_header)s
+ class %(class_name)s : public %(base_class)s
+ {
+ protected:
+ typedef RegElemType TPElem;
+
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ IntRegIndex offset;
+
+ bool offsetIs32;
+ bool offsetIsSigned;
+ bool offsetIsScaled;
+
+ int elemIndex;
+ int numElems;
+
+ unsigned memAccessFlags;
+
+ public:
+ %(class_name)s(const char* mnem, ExtMachInst machInst,
+ OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp,
+ IntRegIndex _base, IntRegIndex _offset, bool _offsetIs32,
+ bool _offsetIsSigned, bool _offsetIsScaled, int _elemIndex,
+ int _numElems)
+ : %(base_class)s(mnem, machInst, %(op_class)s),
+ dest(_dest), gp(_gp), base(_base), offset(_offset),
+ offsetIs32(_offsetIs32), offsetIsSigned(_offsetIsSigned),
+ offsetIsScaled(_offsetIsScaled), elemIndex(_elemIndex),
+ numElems(_numElems),
+ memAccessFlags(ArmISA::TLB::AllowUnaligned |
+ ArmISA::TLB::MustBeOne)
+ {
+ %(constructor)s;
+ if (_opClass == MemReadOp && elemIndex == 0) {
+ // The first micro-op is responsible for pinning the
+ // destination register
+ _destRegIdx[0].setNumPinnedWrites(numElems - 1);
+ }
+ }
+
+ Fault execute(ExecContext *, Trace::InstRecord *) const;
+ Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
+ Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const;
+
+ virtual void
+ annotateFault(ArmFault *fault)
+ {
+ %(fa_code)s
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ // TODO: add suffix to transfer and base registers
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ printVecReg(ss, dest, true);
+ ccprintf(ss, "}, ");
+ printVecPredReg(ss, gp);
+ if (_opClass == MemReadOp) {
+ ccprintf(ss, "/z");
+ }
+ ccprintf(ss, ", [");
+ printIntReg(ss, base);
+ ccprintf(ss, ", ");
+ printVecReg(ss, offset, true);
+ ccprintf(ss, "] (uop elem %d tfer)", elemIndex);
+ return ss.str();
+ }
+ };
+}};
+
+def template SveGatherLoadMicroopExecute {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::execute(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+
+ %(op_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ MemElemType memData;
+
+ if (%(pred_check_code)s) {
+ fault = readMemAtomic(xc, traceData, EA, memData,
+ this->memAccessFlags);
+ }
+
+ if (fault == NoFault) {
+ %(memacc_code)s;
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
+
+def template SveGatherLoadMicroopInitiateAcc {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::initiateAcc(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+
+ %(op_src_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ MemElemType memData;
+
+ if (%(pred_check_code)s) {
+ fault = initiateMemRead(xc, traceData, EA, memData,
+ this->memAccessFlags);
+ } else {
+ xc->setMemAccPredicate(false);
+ }
+
+ return fault;
+ }
+}};
+
+def template SveGatherLoadMicroopCompleteAcc {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::completeAcc(PacketPtr pkt,
+ ExecContext *xc, Trace::InstRecord *traceData) const
+ {
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+
+ %(op_decl)s;
+ %(op_rd)s;
+
+ MemElemType memData = 0;
+ if (%(pred_check_code)s) {
+ getMem(pkt, memData, traceData);
+ }
+
+ if (fault == NoFault) {
+ %(memacc_code)s;
+ }
+
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
+
+def template SveScatterStoreMicroopExecute {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::execute(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+
+ %(op_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ MemElemType memData;
+ %(memacc_code)s;
+
+ if (%(pred_check_code)s) {
+ fault = writeMemAtomic(xc, traceData, memData, EA,
+ this->memAccessFlags, NULL);
+ }
+
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
+
+def template SveScatterStoreMicroopInitiateAcc {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::initiateAcc(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+
+ %(op_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ MemElemType memData;
+ %(memacc_code)s;
+
+ if (%(pred_check_code)s) {
+ fault = writeMemTiming(xc, traceData, memData, EA,
+ this->memAccessFlags, NULL);
+ } else {
+ xc->setPredicate(false);
+ }
+
+ return fault;
+ }
+}};
+
+def template SveScatterStoreMicroopCompleteAcc {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::completeAcc(PacketPtr pkt,
+ ExecContext *xc, Trace::InstRecord *traceData) const
+ {
+ return NoFault;
+ }
+}};
+
+def template SveGatherLoadCpySrcVecMicroopDeclare {{
+ class SveGatherLoadCpySrcVecMicroop : public MicroOp
+ {
+ protected:
+ IntRegIndex op1;
+
+ StaticInst *macroOp;
+
+ public:
+ SveGatherLoadCpySrcVecMicroop(const char* mnem, ExtMachInst machInst,
+ IntRegIndex _op1, StaticInst *_macroOp)
+ : MicroOp(mnem, machInst, SimdAluOp), op1(_op1), macroOp(_macroOp)
+ {
+ %(constructor)s;
+ }
+
+ Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ std::stringstream ss;
+ ccprintf(ss, "%s", macroOp->disassemble(pc, symtab));
+ ccprintf(ss, " (uop src vec cpy)");
+ return ss.str();
+ }
+ };
+}};
+
+def template SveGatherLoadCpySrcVecMicroopExecute {{
+ Fault SveGatherLoadCpySrcVecMicroop::execute(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Fault fault = NoFault;
+ %(op_decl)s;
+ %(op_rd)s;
+
+ %(code)s;
+ if (fault == NoFault)
+ {
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh
index 8ee48edc0..8e6ce799b 100644
--- a/src/arch/arm/registers.hh
+++ b/src/arch/arm/registers.hh
@@ -88,6 +88,7 @@ const int NumVecSpecialRegs = 8;
const int NumIntRegs = NUM_INTREGS;
const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs;
const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs;
+const int VECREG_UREG0 = 32;
const int NumVecPredRegs = 17; // P0-P15, FFR
const int PREDREG_FFR = 16;
const int NumCCRegs = NUM_CCREGS;