summaryrefslogtreecommitdiff
path: root/src/arch/arm
diff options
context:
space:
mode:
authorJavier Setoain <javier.setoain@arm.com>2018-04-04 16:53:17 +0100
committerGiacomo Gabrielli <giacomo.gabrielli@arm.com>2019-07-27 20:51:31 +0000
commit2e47c6c5ed37dc1db0ea35f51b2f7d4afc0da45e (patch)
tree9423825e5ebeb0728745819b3f2afd2a2dc99cc2 /src/arch/arm
parent46da8fb805407cdc224abe788e8c666f3b0dadd1 (diff)
downloadgem5-2e47c6c5ed37dc1db0ea35f51b2f7d4afc0da45e.tar.xz
arch-arm: Add support for SVE load/store structures
Change-Id: I4d9cde18dfc3d478eacc156de6a4a9721eb9e2ff Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/13524 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Diffstat (limited to 'src/arch/arm')
-rw-r--r--src/arch/arm/insts/sve_macromem.hh289
-rw-r--r--src/arch/arm/isa/formats/sve_2nd_level.isa50
-rw-r--r--src/arch/arm/isa/insts/sve_mem.isa468
-rw-r--r--src/arch/arm/isa/operands.isa45
-rw-r--r--src/arch/arm/isa/templates/sve_mem.isa446
-rw-r--r--src/arch/arm/registers.hh7
6 files changed, 1304 insertions, 1 deletions
diff --git a/src/arch/arm/insts/sve_macromem.hh b/src/arch/arm/insts/sve_macromem.hh
index b365dcb4b..861318122 100644
--- a/src/arch/arm/insts/sve_macromem.hh
+++ b/src/arch/arm/insts/sve_macromem.hh
@@ -45,6 +45,295 @@
namespace ArmISA {
+template <typename Element,
+ template <typename> class MicroopLdMemType,
+ template <typename> class MicroopDeIntrlvType>
+class SveLdStructSS : public PredMacroOp
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ IntRegIndex offset;
+ uint8_t numregs;
+
+ public:
+ SveLdStructSS(const char* mnem, ExtMachInst machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+ IntRegIndex _offset, uint8_t _numregs)
+ : PredMacroOp(mnem, machInst, __opClass),
+ dest(_dest), gp(_gp), base(_base), offset(_offset), numregs(_numregs)
+ {
+ numMicroops = numregs * 2;
+
+ microOps = new StaticInstPtr[numMicroops];
+
+ for (int i = 0; i < numregs; ++i) {
+ microOps[i] = new MicroopLdMemType<Element>(
+ mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+ _gp, _base, _offset, _numregs, i);
+ }
+ for (int i = 0; i < numregs; ++i) {
+ microOps[i + numregs] = new MicroopDeIntrlvType<Element>(
+ mnem, machInst, static_cast<IntRegIndex>((_dest + i) % 32),
+ _numregs, i, this);
+ }
+
+ microOps[0]->setFirstMicroop();
+ microOps[numMicroops - 1]->setLastMicroop();
+
+ for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
+ (*uop)->setDelayedCommit();
+ }
+ }
+
+ Fault
+ execute(ExecContext *, Trace::InstRecord *) const
+ {
+ panic("Execute method called when it shouldn't!");
+ return NoFault;
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ for (int i = 0; i < numregs; ++i) {
+ printVecReg(ss, (dest + i) % 32, true);
+ if (i < numregs - 1)
+ ccprintf(ss, ", ");
+ }
+ ccprintf(ss, "}, ");
+ printVecPredReg(ss, gp);
+ ccprintf(ss, "/z, [");
+ printIntReg(ss, base);
+ ccprintf(ss, ", ");
+ printIntReg(ss, offset);
+ ccprintf(ss, "]");
+ return ss.str();
+ }
+};
+
+template <typename Element,
+ template <typename> class MicroopStMemType,
+ template <typename> class MicroopIntrlvType>
+class SveStStructSS : public PredMacroOp
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ IntRegIndex offset;
+ uint8_t numregs;
+
+ public:
+ SveStStructSS(const char* mnem, ExtMachInst machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+ IntRegIndex _offset, uint8_t _numregs)
+ : PredMacroOp(mnem, machInst, __opClass),
+ dest(_dest), gp(_gp), base(_base), offset(_offset), numregs(_numregs)
+ {
+ numMicroops = numregs * 2;
+
+ microOps = new StaticInstPtr[numMicroops];
+
+ for (int i = 0; i < numregs; ++i) {
+ microOps[i] = new MicroopIntrlvType<Element>(
+ mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+ _dest, _numregs, i, this);
+ }
+
+ for (int i = 0; i < numregs; ++i) {
+ microOps[i + numregs] = new MicroopStMemType<Element>(
+ mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+ _gp, _base, _offset, _numregs, i);
+ }
+
+ microOps[0]->setFirstMicroop();
+ microOps[numMicroops - 1]->setLastMicroop();
+
+ for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
+ (*uop)->setDelayedCommit();
+ }
+ }
+
+ Fault
+ execute(ExecContext *, Trace::InstRecord *) const
+ {
+ panic("Execute method called when it shouldn't!");
+ return NoFault;
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ for (int i = 0; i < numregs; ++i) {
+ printVecReg(ss, (dest + i) % 32, true);
+ if (i < numregs - 1)
+ ccprintf(ss, ", ");
+ }
+ ccprintf(ss, "}, ");
+ printVecPredReg(ss, gp);
+ ccprintf(ss, ", [");
+ printIntReg(ss, base);
+ ccprintf(ss, ", ");
+ printIntReg(ss, offset);
+ ccprintf(ss, "]");
+ return ss.str();
+ }
+};
+
+
+template <typename Element,
+ template <typename> class MicroopLdMemType,
+ template <typename> class MicroopDeIntrlvType>
+class SveLdStructSI : public PredMacroOp
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ int64_t imm;
+ uint8_t numregs;
+
+ public:
+ SveLdStructSI(const char* mnem, ExtMachInst machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+ int64_t _imm, uint8_t _numregs)
+ : PredMacroOp(mnem, machInst, __opClass),
+ dest(_dest), gp(_gp), base(_base), imm(_imm), numregs(_numregs)
+ {
+ numMicroops = numregs * 2;
+
+ microOps = new StaticInstPtr[numMicroops];
+
+ for (int i = 0; i < numregs; ++i) {
+ microOps[i] = new MicroopLdMemType<Element>(
+ mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+ _gp, _base, _imm, _numregs, i);
+ }
+ for (int i = 0; i < numregs; ++i) {
+ microOps[i + numregs] = new MicroopDeIntrlvType<Element>(
+ mnem, machInst, static_cast<IntRegIndex>((_dest + i) % 32),
+ _numregs, i, this);
+ }
+
+ microOps[0]->setFirstMicroop();
+ microOps[numMicroops - 1]->setLastMicroop();
+
+ for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
+ (*uop)->setDelayedCommit();
+ }
+ }
+
+ Fault
+ execute(ExecContext *, Trace::InstRecord *) const
+ {
+ panic("Execute method called when it shouldn't!");
+ return NoFault;
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ for (int i = 0; i < numregs; ++i) {
+ printVecReg(ss, (dest + i) % 32, true);
+ if (i < numregs - 1)
+ ccprintf(ss, ", ");
+ }
+ ccprintf(ss, "}, ");
+ printVecPredReg(ss, gp);
+ ccprintf(ss, "/z, [");
+ printIntReg(ss, base);
+ if (imm != 0) {
+ ccprintf(ss, ", #%d, MUL VL", imm);
+ }
+ ccprintf(ss, "]");
+ return ss.str();
+ }
+};
+
+template <typename Element,
+ template <typename> class MicroopStMemType,
+ template <typename> class MicroopIntrlvType>
+class SveStStructSI : public PredMacroOp
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ int64_t imm;
+ uint8_t numregs;
+
+ public:
+ SveStStructSI(const char* mnem, ExtMachInst machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+ int64_t _imm, uint8_t _numregs)
+ : PredMacroOp(mnem, machInst, __opClass),
+ dest(_dest), gp(_gp), base(_base), imm(_imm), numregs(_numregs)
+ {
+ numMicroops = numregs * 2;
+
+ microOps = new StaticInstPtr[numMicroops];
+
+ for (int i = 0; i < numregs; ++i) {
+ microOps[i] = new MicroopIntrlvType<Element>(
+ mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+ _dest, _numregs, i, this);
+ }
+
+ for (int i = 0; i < numregs; ++i) {
+ microOps[i + numregs] = new MicroopStMemType<Element>(
+ mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+ _gp, _base, _imm, _numregs, i);
+ }
+
+ microOps[0]->setFirstMicroop();
+ microOps[numMicroops - 1]->setLastMicroop();
+
+ for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
+ (*uop)->setDelayedCommit();
+ }
+ }
+
+ Fault
+ execute(ExecContext *, Trace::InstRecord *) const
+ {
+ panic("Execute method called when it shouldn't!");
+ return NoFault;
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ for (int i = 0; i < numregs; ++i) {
+ printVecReg(ss, (dest + i) % 32, true);
+ if (i < numregs - 1)
+ ccprintf(ss, ", ");
+ }
+ ccprintf(ss, "}, ");
+ printVecPredReg(ss, gp);
+ ccprintf(ss, ", [");
+ printIntReg(ss, base);
+ if (imm != 0) {
+ ccprintf(ss, ", #%d, MUL VL", imm);
+ }
+ ccprintf(ss, "]");
+ return ss.str();
+ }
+};
+
template <typename RegElemType, typename MemElemType,
template <typename, typename> class MicroopType,
template <typename> class FirstFaultWritebackMicroopType>
diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 69d80e294..def17812d 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -3123,6 +3123,18 @@ namespace Aarch64
StaticInstPtr
decodeSveLoadStructsSS(ExtMachInst machInst)
{
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+ uint8_t num = bits(machInst, 22, 21);
+
+ if (rm != 0x1f && num != 0) {
+ num++;
+ return decodeSveStructLoadSSInsts(msz, machInst,
+ zt, pg, rn, rm, num);
+ }
return new Unknown64(machInst);
} // decodeSveLoadStructsSS
@@ -3135,6 +3147,19 @@ namespace Aarch64
StaticInstPtr
decodeSveLoadStructsSI(ExtMachInst machInst)
{
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ int64_t imm = sext<4>(bits(machInst, 19, 16));
+ IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+ uint8_t num = bits(machInst, 22, 21);
+
+ if (num != 0) {
+ num++;
+ imm *= num;
+ return decodeSveStructLoadSIInsts(msz, machInst,
+ zt, pg, rn, imm, num);
+ }
return new Unknown64(machInst);
} // decodeSveLoadStructsSI
@@ -3331,12 +3356,37 @@ namespace Aarch64
StaticInstPtr
decodeSveStoreStructsSS(ExtMachInst machInst)
{
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+ uint8_t num = bits(machInst, 22, 21);
+
+ if (rm != 0x1f && num != 0) {
+ num++;
+ return decodeSveStructStoreSSInsts(msz, machInst,
+ zt, pg, rn, rm, num);
+ }
return new Unknown64(machInst);
} // decodeSveStoreStructsSS
StaticInstPtr
decodeSveStoreStructsSI(ExtMachInst machInst)
{
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ int64_t imm = sext<4>(bits(machInst, 19, 16));
+ IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+ uint8_t num = bits(machInst, 22, 21);
+
+ if (num != 0) {
+ num++;
+ imm *= num;
+ return decodeSveStructStoreSIInsts(msz, machInst,
+ zt, pg, rn, imm, num);
+ }
return new Unknown64(machInst);
} // decodeSveStoreStructsSI
diff --git a/src/arch/arm/isa/insts/sve_mem.isa b/src/arch/arm/isa/insts/sve_mem.isa
index e776deb59..32a078dbd 100644
--- a/src/arch/arm/isa/insts/sve_mem.isa
+++ b/src/arch/arm/isa/insts/sve_mem.isa
@@ -204,6 +204,238 @@ output header {{
output decoder {{
+ template <class etype>
+ StaticInstPtr
+ decodeSveStructLoadSIInstsByNReg(uint8_t esize, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+ int64_t imm, int numregs)
+ {
+ static const char* nm[5][4] = {
+ { nullptr, nullptr, nullptr, nullptr},
+ { nullptr, nullptr, nullptr, nullptr},
+ { "ld2b", "ld2h", "ld2w", "ld2d" },
+ { "ld3b", "ld3h", "ld3w", "ld3d" },
+ { "ld4b", "ld4h", "ld4w", "ld4d" } };
+
+ switch (numregs) {
+ case 2:
+ return new SveLdStructSI<etype,
+ SveLoadRegImmMicroop,
+ SveDeIntrlv2Microop>(
+ nm[numregs][esize], machInst, MemReadOp,
+ zt, pg, xn, imm, numregs);
+ case 3:
+ return new SveLdStructSI<etype,
+ SveLoadRegImmMicroop,
+ SveDeIntrlv3Microop>(
+ nm[numregs][esize], machInst, MemReadOp,
+ zt, pg, xn, imm, numregs);
+ case 4:
+ return new SveLdStructSI<etype,
+ SveLoadRegImmMicroop,
+ SveDeIntrlv4Microop>(
+ nm[numregs][esize], machInst, MemReadOp,
+ zt, pg, xn, imm, numregs);
+ }
+ return new Unknown64(machInst);
+ }
+
+ StaticInstPtr
+ decodeSveStructLoadSIInsts(uint8_t esize, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+ int64_t imm, int numregs)
+ {
+ switch (esize) {
+ case 0:
+ return decodeSveStructLoadSIInstsByNReg<uint8_t>(esize,
+ machInst, zt, pg, xn, imm, numregs);
+ case 1:
+ return decodeSveStructLoadSIInstsByNReg<uint16_t>(esize,
+ machInst, zt, pg, xn, imm, numregs);
+ case 2:
+ return decodeSveStructLoadSIInstsByNReg<uint32_t>(esize,
+ machInst, zt, pg, xn, imm, numregs);
+ case 3:
+ return decodeSveStructLoadSIInstsByNReg<uint64_t>(esize,
+ machInst, zt, pg, xn, imm, numregs);
+ }
+ return new Unknown64(machInst);
+ }
+
+ template <class etype>
+ StaticInstPtr
+ decodeSveStructStoreSIInstsByNReg(uint8_t esize, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+ int64_t imm, int numregs)
+ {
+ static const char* nm[5][4] = {
+ { nullptr, nullptr, nullptr, nullptr},
+ { nullptr, nullptr, nullptr, nullptr},
+ { "st2b", "st2h", "st2w", "st2d" },
+ { "st3b", "st3h", "st3w", "st3d" },
+ { "st4b", "st4h", "st4w", "st4d" } };
+
+ switch (numregs) {
+ case 2:
+ return new SveStStructSI<etype,
+ SveStoreRegImmMicroop,
+ SveIntrlv2Microop>(
+ nm[numregs][esize], machInst, MemWriteOp,
+ zt, pg, xn, imm, numregs);
+ case 3:
+ return new SveStStructSI<etype,
+ SveStoreRegImmMicroop,
+ SveIntrlv3Microop>(
+ nm[numregs][esize], machInst, MemWriteOp,
+ zt, pg, xn, imm, numregs);
+ case 4:
+ return new SveStStructSI<etype,
+ SveStoreRegImmMicroop,
+ SveIntrlv4Microop>(
+ nm[numregs][esize], machInst, MemWriteOp,
+ zt, pg, xn, imm, numregs);
+ }
+ return new Unknown64(machInst);
+ }
+
+ StaticInstPtr
+ decodeSveStructStoreSIInsts(uint8_t esize, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+ int64_t imm, int numregs)
+ {
+ switch (esize) {
+ case 0:
+ return decodeSveStructStoreSIInstsByNReg<uint8_t>(esize,
+ machInst, zt, pg, xn, imm, numregs);
+ case 1:
+ return decodeSveStructStoreSIInstsByNReg<uint16_t>(esize,
+ machInst, zt, pg, xn, imm, numregs);
+ case 2:
+ return decodeSveStructStoreSIInstsByNReg<uint32_t>(esize,
+ machInst, zt, pg, xn, imm, numregs);
+ case 3:
+ return decodeSveStructStoreSIInstsByNReg<uint64_t>(esize,
+ machInst, zt, pg, xn, imm, numregs);
+ }
+ return new Unknown64(machInst);
+ }
+
+ template <class etype>
+ StaticInstPtr
+ decodeSveStructLoadSSInstsByNReg(uint8_t esize, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+ IntRegIndex xm, int numregs)
+ {
+ static const char* nm[5][4] = {
+ { nullptr, nullptr, nullptr, nullptr},
+ { nullptr, nullptr, nullptr, nullptr},
+ { "ld2b", "ld2h", "ld2w", "ld2d" },
+ { "ld3b", "ld3h", "ld3w", "ld3d" },
+ { "ld4b", "ld4h", "ld4w", "ld4d" } };
+
+ switch (numregs) {
+ case 2:
+ return new SveLdStructSS<etype,
+ SveLoadRegRegMicroop,
+ SveDeIntrlv2Microop>(
+ nm[numregs][esize], machInst, MemReadOp,
+ zt, pg, xn, xm, numregs);
+ case 3:
+ return new SveLdStructSS<etype,
+ SveLoadRegRegMicroop,
+ SveDeIntrlv3Microop>(
+ nm[numregs][esize], machInst, MemReadOp,
+ zt, pg, xn, xm, numregs);
+ case 4:
+ return new SveLdStructSS<etype,
+ SveLoadRegRegMicroop,
+ SveDeIntrlv4Microop>(
+ nm[numregs][esize], machInst, MemReadOp,
+ zt, pg, xn, xm, numregs);
+ }
+ return new Unknown64(machInst);
+ }
+
+ StaticInstPtr
+ decodeSveStructLoadSSInsts(uint8_t esize, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+ IntRegIndex xm, int numregs)
+ {
+ switch (esize) {
+ case 0:
+ return decodeSveStructLoadSSInstsByNReg<uint8_t>(esize,
+ machInst, zt, pg, xn, xm, numregs);
+ case 1:
+ return decodeSveStructLoadSSInstsByNReg<uint16_t>(esize,
+ machInst, zt, pg, xn, xm, numregs);
+ case 2:
+ return decodeSveStructLoadSSInstsByNReg<uint32_t>(esize,
+ machInst, zt, pg, xn, xm, numregs);
+ case 3:
+ return decodeSveStructLoadSSInstsByNReg<uint64_t>(esize,
+ machInst, zt, pg, xn, xm, numregs);
+ }
+ return new Unknown64(machInst);
+ }
+
+ template <class etype>
+ StaticInstPtr
+ decodeSveStructStoreSSInstsByNReg(uint8_t esize, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+ IntRegIndex xm, int numregs)
+ {
+ static const char* nm[5][4] = {
+ { nullptr, nullptr, nullptr, nullptr},
+ { nullptr, nullptr, nullptr, nullptr},
+ { "st2b", "st2h", "st2w", "st2d" },
+ { "st3b", "st3h", "st3w", "st3d" },
+ { "st4b", "st4h", "st4w", "st4d" } };
+
+ switch (numregs) {
+ case 2:
+ return new SveStStructSS<etype,
+ SveStoreRegRegMicroop,
+ SveIntrlv2Microop>(
+ nm[numregs][esize], machInst, MemWriteOp,
+ zt, pg, xn, xm, numregs);
+ case 3:
+ return new SveStStructSS<etype,
+ SveStoreRegRegMicroop,
+ SveIntrlv3Microop>(
+ nm[numregs][esize], machInst, MemWriteOp,
+ zt, pg, xn, xm, numregs);
+ case 4:
+ return new SveStStructSS<etype,
+ SveStoreRegRegMicroop,
+ SveIntrlv4Microop>(
+ nm[numregs][esize], machInst, MemWriteOp,
+ zt, pg, xn, xm, numregs);
+ }
+ return new Unknown64(machInst);
+ }
+
+ StaticInstPtr
+ decodeSveStructStoreSSInsts(uint8_t esize, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+ IntRegIndex xm, int numregs)
+ {
+ switch (esize) {
+ case 0:
+ return decodeSveStructStoreSSInstsByNReg<uint8_t>(esize,
+ machInst, zt, pg, xn, xm, numregs);
+ case 1:
+ return decodeSveStructStoreSSInstsByNReg<uint16_t>(esize,
+ machInst, zt, pg, xn, xm, numregs);
+ case 2:
+ return decodeSveStructStoreSSInstsByNReg<uint32_t>(esize,
+ machInst, zt, pg, xn, xm, numregs);
+ case 3:
+ return decodeSveStructStoreSSInstsByNReg<uint64_t>(esize,
+ machInst, zt, pg, xn, xm, numregs);
+ }
+ return new Unknown64(machInst);
+ }
+
StaticInstPtr
decodeSveGatherLoadVIInsts(uint8_t dtype, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex zn,
@@ -1018,6 +1250,231 @@ let {{
header_output += SveGatherLoadCpySrcVecMicroopDeclare.subst(iop)
exec_output += SveGatherLoadCpySrcVecMicroopExecute.subst(iop)
+ def emitSveInterleaveMicroop():
+ global header_output, exec_output, decoders
+ code2 = sveEnabledCheckCode + '''
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+ for (unsigned int i = 0; i < eCount; ++i) {
+ unsigned int absIdx = regIndex * eCount + i;
+ unsigned int srcIdx = absIdx / numRegs;
+ unsigned int srcVec = absIdx % numRegs;
+ if (srcVec == 0)
+ AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx];
+ else if (srcVec == 1)
+ AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx];
+ }'''
+
+ code3 = sveEnabledCheckCode + '''
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+ for (unsigned int i = 0; i < eCount; ++i) {
+ unsigned int absIdx = regIndex * eCount + i;
+ unsigned int srcIdx = absIdx / numRegs;
+ unsigned int srcVec = absIdx % numRegs;
+ if (srcVec == 0)
+ AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx];
+ else if (srcVec == 1)
+ AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx];
+ else if (srcVec == 2)
+ AA64FpDest_x[i] = AA64FpOp1V2S_x[srcIdx];
+ }'''
+
+ code4 = sveEnabledCheckCode + '''
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+ for (unsigned int i = 0; i < eCount; ++i) {
+ unsigned int absIdx = regIndex * eCount + i;
+ unsigned int srcIdx = absIdx / numRegs;
+ unsigned int srcVec = absIdx % numRegs;
+ if (srcVec == 0)
+ AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx];
+ else if (srcVec == 1)
+ AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx];
+ else if (srcVec == 2)
+ AA64FpDest_x[i] = AA64FpOp1V2S_x[srcIdx];
+ else if (srcVec == 3)
+ AA64FpDest_x[i] = AA64FpOp1V3S_x[srcIdx];
+ }'''
+
+ iop2 = InstObjParams('intrlv',
+ 'SveIntrlv2Microop',
+ 'MicroOp',
+ {'code': code2},
+ ['IsMicroop'])
+ iop3 = InstObjParams('intrlv',
+ 'SveIntrlv3Microop',
+ 'MicroOp',
+ {'code': code3},
+ ['IsMicroop'])
+ iop4 = InstObjParams('intrlv',
+ 'SveIntrlv4Microop',
+ 'MicroOp',
+ {'code': code4},
+ ['IsMicroop'])
+ header_output += SveIntrlvMicroopDeclare.subst(iop2);
+ header_output += SveIntrlvMicroopDeclare.subst(iop3);
+ header_output += SveIntrlvMicroopDeclare.subst(iop4);
+ exec_output += SveIntrlvMicroopExecute.subst(iop2);
+ exec_output += SveIntrlvMicroopExecute.subst(iop3);
+ exec_output += SveIntrlvMicroopExecute.subst(iop4);
+ for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
+ for nreg in range(2,5):
+ substDict = {'targs' : type,
+ 'class_name' : 'SveIntrlv' + str(nreg) + 'Microop'}
+ exec_output += SveIntrlvMicroopExecDeclare.subst(substDict)
+
+ def emitSveDeInterleaveMicroop():
+ global header_output, exec_output, decoders
+ code2 = sveEnabledCheckCode + '''
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+ for (unsigned int i = 0; i < eCount; ++i) {
+ unsigned int absIdx = (regIndex + numRegs * i);
+ unsigned int srcIdx = absIdx % eCount;
+ unsigned int srcVec = absIdx / eCount;
+ if (srcVec == 0)
+ AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx];
+ else if(srcVec == 1)
+ AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx];
+ }'''
+
+ code3 = sveEnabledCheckCode + '''
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+ for (unsigned int i = 0; i < eCount; ++i) {
+ unsigned int absIdx = (regIndex + numRegs * i);
+ unsigned int srcIdx = absIdx % eCount;
+ unsigned int srcVec = absIdx / eCount;
+ if (srcVec == 0)
+ AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx];
+ else if(srcVec == 1)
+ AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx];
+ else if(srcVec == 2)
+ AA64FpDest_x[i] = AA64IntrlvReg2_x[srcIdx];
+ }'''
+
+ code4 = sveEnabledCheckCode + '''
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+ for (unsigned int i = 0; i < eCount; ++i) {
+ unsigned int absIdx = (regIndex + numRegs * i);
+ unsigned int srcIdx = absIdx % eCount;
+ unsigned int srcVec = absIdx / eCount;
+ if (srcVec == 0)
+ AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx];
+ else if(srcVec == 1)
+ AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx];
+ else if(srcVec == 2)
+ AA64FpDest_x[i] = AA64IntrlvReg2_x[srcIdx];
+ else if(srcVec == 3)
+ AA64FpDest_x[i] = AA64IntrlvReg3_x[srcIdx];
+ }'''
+
+ iop2 = InstObjParams('deintrlv',
+ 'SveDeIntrlv2Microop',
+ 'MicroOp',
+ {'code': code2},
+ ['IsMicroop'])
+ iop3 = InstObjParams('deintrlv',
+ 'SveDeIntrlv3Microop',
+ 'MicroOp',
+ {'code': code3},
+ ['IsMicroop'])
+ iop4 = InstObjParams('deintrlv',
+ 'SveDeIntrlv4Microop',
+ 'MicroOp',
+ {'code': code4},
+ ['IsMicroop'])
+ header_output += SveDeIntrlvMicroopDeclare.subst(iop2);
+ header_output += SveDeIntrlvMicroopDeclare.subst(iop3);
+ header_output += SveDeIntrlvMicroopDeclare.subst(iop4);
+ exec_output += SveIntrlvMicroopExecute.subst(iop2);
+ exec_output += SveIntrlvMicroopExecute.subst(iop3);
+ exec_output += SveIntrlvMicroopExecute.subst(iop4);
+ for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
+ for nreg in range(2,5):
+ substDict = {'targs' : type,
+ 'class_name' : 'SveDeIntrlv' + str(nreg) + 'Microop'}
+ exec_output += SveIntrlvMicroopExecDeclare.subst(substDict)
+
+ # Generates definitions for SVE struct load/store microops
+ def emitSveStructMemInsts(offsetIsImm):
+ global header_output, exec_output, decoders
+ eaCode = SPAlignmentCheckCode + '''
+ int memAccessSize = eCount * sizeof(Element);
+ EA = memAccessSize * regIndex + XBase + '''
+ if offsetIsImm:
+ eaCode += '((int64_t) this->imm * eCount * sizeof(Element))'
+ else:
+ eaCode += '(XOffset * sizeof(Element));'
+ loadMemAccCode = '''
+ for (int i = 0; i < eCount; i++) {
+ int gpIdx = (regIndex * eCount + i) / numRegs;
+ if (GpOp_x[gpIdx]) {
+ AA64FpDest_x[i] = memDataView[i];
+ } else {
+ AA64FpDest_x[i] = 0;
+ }
+ }
+ '''
+ storeMemAccCode = '''
+ for (int i = 0; i < eCount; i++) {
+ int gpIdx = (regIndex * eCount + i) / numRegs;
+ if (GpOp_x[gpIdx]) {
+ memDataView[i] = AA64FpDest_x[i];
+ } else {
+ memDataView[i] = 0;
+ for (int j = 0; j < sizeof(Element); j++) {
+ wrEn[sizeof(Element) * i + j] = false;
+ }
+ }
+ }
+ '''
+ storeWrEnableCode = '''
+ auto wrEn = std::vector<bool>(sizeof(Element) * eCount, true);
+ '''
+ loadIop = InstObjParams('ldxx',
+ 'SveLoadRegImmMicroop' if offsetIsImm else 'SveLoadRegRegMicroop',
+ 'MicroOp',
+ {'targs': 'Element',
+ 'memacc_code': loadMemAccCode,
+ 'ea_code' : sveEnabledCheckCode + eaCode,
+ 'fa_code' : ''},
+ ['IsMemRef', 'IsLoad', 'IsMicroop'])
+ storeIop = InstObjParams('stxx',
+ 'SveStoreRegImmMicroop' if offsetIsImm
+ else 'SveStoreRegRegMicroop',
+ 'MicroOp',
+ {'targs': 'Element',
+ 'wren_code': storeWrEnableCode,
+ 'memacc_code': storeMemAccCode,
+ 'ea_code' : sveEnabledCheckCode + eaCode,
+ 'fa_code' : ''},
+ ['IsMemRef', 'IsStore', 'IsMicroop'])
+ if offsetIsImm:
+ header_output += SveStructMemSIMicroopDeclare.subst(loadIop)
+ header_output += SveStructMemSIMicroopDeclare.subst(storeIop)
+ else:
+ header_output += SveStructMemSSMicroopDeclare.subst(loadIop)
+ header_output += SveStructMemSSMicroopDeclare.subst(storeIop)
+ exec_output += (
+ SveStructLoadExecute.subst(loadIop) +
+ SveStructLoadInitiateAcc.subst(loadIop) +
+ SveStructLoadCompleteAcc.subst(loadIop) +
+ SveStructStoreExecute.subst(storeIop) +
+ SveStructStoreInitiateAcc.subst(storeIop) +
+ SveStructStoreCompleteAcc.subst(storeIop))
+ tplArgs = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t')
+ for type in tplArgs:
+ substDict = {'targs': type,
+ 'class_name': 'SveLoadRegImmMicroop' if offsetIsImm
+ else 'SveLoadRegRegMicroop'}
+ exec_output += SveStructMemExecDeclare.subst(substDict)
+ substDict['class_name'] = ('SveStoreRegImmMicroop' if offsetIsImm
+ else 'SveStoreRegRegMicroop')
+ exec_output += SveStructMemExecDeclare.subst(substDict)
+
# LD1[S]{B,H,W,D} (scalar plus immediate)
# ST1[S]{B,H,W,D} (scalar plus immediate)
# LDNF1[S]{B,H,W,D} (scalar plus immediate)
@@ -1030,6 +1487,13 @@ let {{
# LD1R[S]{B,H,W,D}
emitSveLoadAndRepl()
+ # LD{2,3,4}{B,H,W,D} (scalar plus immediate)
+ # ST{2,3,4}{B,H,W,D} (scalar plus immediate)
+ emitSveStructMemInsts(offsetIsImm = True)
+ # LD{2,3,4}{B,H,W,D} (scalar plus scalar)
+ # ST{2,3,4}{B,H,W,D} (scalar plus scalar)
+ emitSveStructMemInsts(offsetIsImm = False)
+
# LDR (predicate), STR (predicate)
emitSveMemFillSpill(True)
# LDR (vector), STR (vector)
@@ -1049,4 +1513,8 @@ let {{
# Source vector copy microop for gather loads
emitSveGatherLoadCpySrcVecMicroop()
+
+ # ST/LD struct de/interleave microops
+ emitSveInterleaveMicroop()
+ emitSveDeInterleaveMicroop()
}};
diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa
index aaa64e7b0..5eae9b4b9 100644
--- a/src/arch/arm/isa/operands.isa
+++ b/src/arch/arm/isa/operands.isa
@@ -530,6 +530,51 @@ def operands {{
'AA64FpDestQV1L': vectorRegElem('0', 'tud', zeroing = True)
}),
+ # Temporary registers for SVE interleaving
+ 'AA64IntrlvReg0': vectorReg('INTRLVREG0',
+ {
+ 'AA64IntrlvReg0P0': vectorRegElem('0'),
+ 'AA64IntrlvReg0P1': vectorRegElem('1'),
+ 'AA64IntrlvReg0P2': vectorRegElem('2'),
+ 'AA64IntrlvReg0P3': vectorRegElem('3'),
+ 'AA64IntrlvReg0S': vectorRegElem('0', 'sf', zeroing = True),
+ 'AA64IntrlvReg0D': vectorRegElem('0', 'df', zeroing = True),
+ 'AA64IntrlvReg0Q': vectorRegElem('0', 'tud', zeroing = True)
+ }),
+
+ 'AA64IntrlvReg1': vectorReg('INTRLVREG1',
+ {
+ 'AA64IntrlvReg1P0': vectorRegElem('0'),
+ 'AA64IntrlvReg1P1': vectorRegElem('1'),
+ 'AA64IntrlvReg1P2': vectorRegElem('2'),
+ 'AA64IntrlvReg1P3': vectorRegElem('3'),
+ 'AA64IntrlvReg1S': vectorRegElem('0', 'sf', zeroing = True),
+ 'AA64IntrlvReg1D': vectorRegElem('0', 'df', zeroing = True),
+ 'AA64IntrlvReg1Q': vectorRegElem('0', 'tud', zeroing = True)
+ }),
+
+ 'AA64IntrlvReg2': vectorReg('INTRLVREG2',
+ {
+ 'AA64IntrlvReg2P0': vectorRegElem('0'),
+ 'AA64IntrlvReg2P1': vectorRegElem('1'),
+ 'AA64IntrlvReg2P2': vectorRegElem('2'),
+ 'AA64IntrlvReg2P3': vectorRegElem('3'),
+ 'AA64IntrlvReg2S': vectorRegElem('0', 'sf', zeroing = True),
+ 'AA64IntrlvReg2D': vectorRegElem('0', 'df', zeroing = True),
+ 'AA64IntrlvReg2Q': vectorRegElem('0', 'tud', zeroing = True)
+ }),
+
+ 'AA64IntrlvReg3': vectorReg('INTRLVREG3',
+ {
+ 'AA64IntrlvReg3P0': vectorRegElem('0'),
+ 'AA64IntrlvReg3P1': vectorRegElem('1'),
+ 'AA64IntrlvReg3P2': vectorRegElem('2'),
+ 'AA64IntrlvReg3P3': vectorRegElem('3'),
+ 'AA64IntrlvReg3S': vectorRegElem('0', 'sf', zeroing = True),
+ 'AA64IntrlvReg3D': vectorRegElem('0', 'df', zeroing = True),
+ 'AA64IntrlvReg3Q': vectorRegElem('0', 'tud', zeroing = True)
+ }),
+
'AA64FpDestMerge': vectorReg('dest',
{
'AA64FpDestMergeP0': vectorRegElem('0'),
diff --git a/src/arch/arm/isa/templates/sve_mem.isa b/src/arch/arm/isa/templates/sve_mem.isa
index 5e2e55333..dced5f4a5 100644
--- a/src/arch/arm/isa/templates/sve_mem.isa
+++ b/src/arch/arm/isa/templates/sve_mem.isa
@@ -815,3 +815,449 @@ def template SveGatherLoadCpySrcVecMicroopExecute {{
return fault;
}
}};
+
+def template SveStructMemSIMicroopDeclare {{
+ template<class _Element>
+ class %(class_name)s : public %(base_class)s
+ {
+ protected:
+ typedef _Element Element;
+ typedef _Element TPElem;
+
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ int64_t imm;
+
+ uint8_t numRegs;
+ int regIndex;
+
+ unsigned memAccessFlags;
+
+ bool baseIsSP;
+
+ public:
+ %(class_name)s(const char* mnem, ExtMachInst machInst,
+ IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+ int64_t _imm, uint8_t _numRegs, int _regIndex)
+ : %(base_class)s(mnem, machInst, %(op_class)s),
+ dest(_dest), gp(_gp), base(_base), imm(_imm),
+ numRegs(_numRegs), regIndex(_regIndex),
+ memAccessFlags(ArmISA::TLB::AllowUnaligned |
+ ArmISA::TLB::MustBeOne)
+ {
+ %(constructor)s;
+ baseIsSP = isSP(_base);
+ }
+
+ Fault execute(ExecContext *, Trace::InstRecord *) const;
+ Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
+ Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const;
+
+ virtual void
+ annotateFault(ArmFault *fault)
+ {
+ %(fa_code)s
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ switch (dest) {
+ case INTRLVREG0:
+ ccprintf(ss, "INTRLV0");
+ break;
+ case INTRLVREG1:
+ ccprintf(ss, "INTRLV1");
+ break;
+ case INTRLVREG2:
+ ccprintf(ss, "INTRLV2");
+ break;
+ case INTRLVREG3:
+ ccprintf(ss, "INTRLV3");
+ break;
+ default:
+ printVecReg(ss, dest, true);
+ break;
+ }
+ ccprintf(ss, "}, ");
+ printVecPredReg(ss, gp);
+ if (_opClass == MemReadOp) {
+ ccprintf(ss, "/z");
+ }
+ ccprintf(ss, ", [");
+ printVecReg(ss, base, true);
+ if (imm != 0) {
+ ccprintf(ss, ", #%d", imm * sizeof(Element));
+ }
+ ccprintf(ss, "] (uop reg %d tfer)", regIndex);
+ return ss.str();
+ }
+ };
+}};
+
+def template SveStructMemExecDeclare {{
+ template
+ Fault %(class_name)s<%(targs)s>::execute(ExecContext *,
+ Trace::InstRecord *) const;
+
+ template
+ Fault %(class_name)s<%(targs)s>::initiateAcc(ExecContext *,
+ Trace::InstRecord *) const;
+
+ template
+ Fault %(class_name)s<%(targs)s>::completeAcc(PacketPtr,
+ ExecContext *, Trace::InstRecord *) const;
+}};
+
+def template SveStructLoadExecute {{
+ template <class Element>
+ Fault %(class_name)s<Element>::execute(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+
+ %(op_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ TheISA::VecRegContainer memData;
+ auto memDataView = memData.as<Element>();
+
+ if (fault == NoFault) {
+ fault = xc->readMem(EA, memData.raw_ptr<uint8_t>(), memAccessSize,
+ this->memAccessFlags);
+ %(memacc_code)s;
+ }
+
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
+
+def template SveStructLoadInitiateAcc {{
+ template <class Element>
+ Fault %(class_name)s<Element>::initiateAcc(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+
+ %(op_src_decl)s;
+ %(op_rd)s;
+
+ %(ea_code)s;
+
+ if (fault == NoFault) {
+ fault = xc->initiateMemRead(EA, memAccessSize,
+ this->memAccessFlags);
+ }
+
+ return fault;
+ }
+}};
+
+def template SveStructLoadCompleteAcc {{
+ template <class Element>
+ Fault %(class_name)s<Element>::completeAcc(PacketPtr pkt,
+ ExecContext *xc, Trace::InstRecord *traceData) const
+ {
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+
+ %(op_decl)s;
+ %(op_rd)s;
+
+ TheISA::VecRegContainer memData;
+ auto memDataView = memData.as<Element>();
+
+ memcpy(memData.raw_ptr<uint8_t>(), pkt->getPtr<uint8_t>(),
+ pkt->getSize());
+
+ if (fault == NoFault) {
+ %(memacc_code)s;
+ }
+
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
+
+def template SveStructStoreExecute {{
+ template <class Element>
+ Fault %(class_name)s<Element>::execute(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+
+ %(op_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ TheISA::VecRegContainer memData;
+ auto memDataView = memData.as<Element>();
+
+ %(wren_code)s;
+
+ if (fault == NoFault) {
+ %(memacc_code)s;
+ }
+
+ if (fault == NoFault) {
+ fault = xc->writeMem(memData.raw_ptr<uint8_t>(), memAccessSize, EA,
+ this->memAccessFlags, NULL, wrEn);
+ }
+
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
+
+def template SveStructStoreInitiateAcc {{
+ template <class Element>
+ Fault %(class_name)s<Element>::initiateAcc(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+
+ %(op_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ TheISA::VecRegContainer memData;
+ auto memDataView = memData.as<Element>();
+
+ %(wren_code)s;
+
+ if (fault == NoFault) {
+ %(memacc_code)s;
+ }
+
+ if (fault == NoFault) {
+ fault = xc->writeMem(memData.raw_ptr<uint8_t>(), memAccessSize, EA,
+ this->memAccessFlags, NULL, wrEn);
+ }
+
+ return fault;
+ }
+}};
+
+def template SveStructStoreCompleteAcc {{
+ template <class Element>
+ Fault %(class_name)s<Element>::completeAcc(PacketPtr pkt,
+ ExecContext *xc, Trace::InstRecord *traceData) const
+ {
+ return NoFault;
+ }
+}};
+
+def template SveStructMemSSMicroopDeclare {{
+ template <class _Element>
+ class %(class_name)s : public %(base_class)s
+ {
+ protected:
+ typedef _Element Element;
+ typedef _Element TPElem;
+
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ IntRegIndex offset;
+
+ uint8_t numRegs;
+ int regIndex;
+
+ unsigned memAccessFlags;
+
+ bool baseIsSP;
+
+ public:
+ %(class_name)s(const char* mnem, ExtMachInst machInst,
+ IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+ IntRegIndex _offset, uint8_t _numRegs, int _regIndex)
+ : %(base_class)s(mnem, machInst, %(op_class)s),
+ dest(_dest), gp(_gp), base(_base), offset(_offset),
+ numRegs(_numRegs), regIndex(_regIndex),
+ memAccessFlags(ArmISA::TLB::AllowUnaligned |
+ ArmISA::TLB::MustBeOne)
+ {
+ %(constructor)s;
+ baseIsSP = isSP(_base);
+ }
+
+ Fault execute(ExecContext *, Trace::InstRecord *) const;
+ Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
+ Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const;
+
+ virtual void
+ annotateFault(ArmFault *fault)
+ {
+ %(fa_code)s
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ switch (dest) {
+ case INTRLVREG0:
+ ccprintf(ss, "INTRLV0");
+ break;
+ case INTRLVREG1:
+ ccprintf(ss, "INTRLV1");
+ break;
+ case INTRLVREG2:
+ ccprintf(ss, "INTRLV2");
+ break;
+ case INTRLVREG3:
+ ccprintf(ss, "INTRLV3");
+ break;
+ default:
+ printVecReg(ss, dest, true);
+ break;
+ }
+ ccprintf(ss, "}, ");
+ printVecPredReg(ss, gp);
+ if (_opClass == MemReadOp) {
+ ccprintf(ss, "/z");
+ }
+ ccprintf(ss, ", [");
+ printIntReg(ss, base);
+ ccprintf(ss, ", ");
+ printVecReg(ss, offset, true);
+ ccprintf(ss, "] (uop reg %d tfer)", regIndex);
+ return ss.str();
+ }
+ };
+}};
+
+def template SveIntrlvMicroopDeclare {{
+ template <class _Element>
+ class %(class_name)s: public %(base_class)s
+ {
+ protected:
+ typedef _Element Element;
+ typedef _Element TPElem;
+ IntRegIndex dest;
+ IntRegIndex op1;
+ uint8_t numRegs;
+ int regIndex;
+
+ StaticInst *macroOp;
+
+ public:
+ %(class_name)s(const char* mnem, ExtMachInst machInst,
+ IntRegIndex _dest, IntRegIndex _op1,
+ uint8_t _numRegs, int _regIndex, StaticInst *_macroOp)
+ : MicroOp(mnem, machInst, SimdAluOp),
+ dest(_dest), op1(_op1), numRegs(_numRegs), regIndex(_regIndex),
+ macroOp(_macroOp)
+ {
+ %(constructor)s;
+ }
+
+ Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ std::stringstream ss;
+ ccprintf(ss, "%s", macroOp->disassemble(pc, symtab));
+ ccprintf(ss, " (uop interleave)");
+ return ss.str();
+ }
+ };
+}};
+
+def template SveDeIntrlvMicroopDeclare {{
+ template <class _Element>
+ class %(class_name)s : public %(base_class)s
+ {
+ protected:
+ typedef _Element Element;
+ typedef _Element TPElem;
+ IntRegIndex dest;
+ uint8_t numRegs;
+ int regIndex;
+
+ StaticInst *macroOp;
+
+ public:
+ %(class_name)s(const char* mnem, ExtMachInst machInst,
+ IntRegIndex _dest, uint8_t _numRegs, int _regIndex,
+ StaticInst *_macroOp)
+ : MicroOp(mnem, machInst, SimdAluOp),
+ dest(_dest), numRegs(_numRegs), regIndex(_regIndex),
+ macroOp(_macroOp)
+ {
+ %(constructor)s;
+ }
+
+ Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ std::stringstream ss;
+ ccprintf(ss, "%s", macroOp->disassemble(pc, symtab));
+ ccprintf(ss, " (uop deinterleave)");
+ return ss.str();
+ }
+ };
+}};
+
+def template SveIntrlvMicroopExecDeclare {{
+ template
+ Fault %(class_name)s<%(targs)s>::execute(
+ ExecContext *, Trace::InstRecord *) const;
+}};
+
+def template SveIntrlvMicroopExecute {{
+ template <class Element>
+ Fault %(class_name)s<Element>::execute(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Fault fault = NoFault;
+ %(op_decl)s;
+ %(op_rd)s;
+
+ %(code)s;
+ if (fault == NoFault)
+ {
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh
index 3790d9d5c..4a8e960d4 100644
--- a/src/arch/arm/registers.hh
+++ b/src/arch/arm/registers.hh
@@ -85,15 +85,20 @@ const int NumVecV7ArchRegs = 64;
const int NumVecV8ArchRegs = 32;
const int NumVecSpecialRegs = 8;
+const int NumVecIntrlvRegs = 4;
const int NumIntRegs = NUM_INTREGS;
const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs;
-const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs;
+const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs + NumVecIntrlvRegs;
const int VECREG_UREG0 = 32;
const int NumVecPredRegs = 18; // P0-P15, FFR, UREG0
const int PREDREG_FFR = 16;
const int PREDREG_UREG0 = 17;
const int NumCCRegs = NUM_CCREGS;
const int NumMiscRegs = NUM_MISCREGS;
+const int INTRLVREG0 = NumVecV8ArchRegs + NumVecSpecialRegs;
+const int INTRLVREG1 = INTRLVREG0 + 1;
+const int INTRLVREG2 = INTRLVREG0 + 2;
+const int INTRLVREG3 = INTRLVREG0 + 3;
#define ISA_HAS_CC_REGS