diff options
author | Giacomo Gabrielli <giacomo.gabrielli@arm.com> | 2018-10-16 16:09:02 +0100 |
---|---|---|
committer | Giacomo Gabrielli <giacomo.gabrielli@arm.com> | 2019-03-14 10:42:27 +0000 |
commit | c4cc3145cd1eeed236b5cd3f7b2424bc0761878e (patch) | |
tree | b38eab6f5f389dfc53c2cf74275a83bacd2e9b18 | |
parent | 91195ae7f637d1d4879cc3bf0860147333846e75 (diff) | |
download | gem5-c4cc3145cd1eeed236b5cd3f7b2424bc0761878e.tar.xz |
arch-arm,cpu: Add initial support for Arm SVE
This changeset adds initial support for the Arm Scalable Vector Extension
(SVE) by implementing:
- support for most data-processing instructions (no loads/stores yet);
- basic system-level support.
Additional authors:
- Javier Setoain <javier.setoain@arm.com>
- Gabor Dozsa <gabor.dozsa@arm.com>
- Giacomo Travaglini <giacomo.travaglini@arm.com>
Thanks to Pau Cabre for his contribution of bugfixes.
Change-Id: I1808b5ff55b401777eeb9b99c9a1129e0d527709
Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/13515
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
46 files changed, 11605 insertions, 61 deletions
diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py index 70be40313..ea2927d5d 100644 --- a/src/arch/arm/ArmISA.py +++ b/src/arch/arm/ArmISA.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012-2013, 2015-2016, 2018 ARM Limited +# Copyright (c) 2012-2013, 2015-2018 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -41,6 +41,7 @@ from m5.proxy import * from m5.SimObject import SimObject from m5.objects.ArmPMU import ArmPMU +from m5.objects.ArmSystem import SveVectorLength from m5.objects.ISACommon import VecRegRenameMode # Enum for DecoderFlavour @@ -115,3 +116,8 @@ class ArmISA(SimObject): # It is rather executed as a NOP. impdef_nop = Param.Bool(False, "Any access to a MISCREG_IMPDEF_UNIMPL register is executed as NOP") + + # This is required because in SE mode a generic System SimObject is + # allocated, instead of an ArmSystem + sve_vl_se = Param.SveVectorLength(1, + "SVE vector length in quadwords (128-bit), SE-mode only") diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py index 7ade1e695..ec30e0bf6 100644 --- a/src/arch/arm/ArmSystem.py +++ b/src/arch/arm/ArmSystem.py @@ -52,6 +52,8 @@ class ArmMachineType(Enum): 'DTOnly' : -1, } +class SveVectorLength(UInt8): min = 1; max = 16 + class ArmSystem(System): type = 'ArmSystem' cxx_header = "arch/arm/system.hh" @@ -80,6 +82,10 @@ class ArmSystem(System): "Supported physical address range in bits when using AArch64 (ARMv8)") have_large_asid_64 = Param.Bool(False, "True if ASID is 16 bits in AArch64 (ARMv8)") + have_sve = Param.Bool(True, + "True if SVE is implemented (ARMv8)") + sve_vl = Param.SveVectorLength(1, + "SVE vector length in quadwords (128-bit)") semihosting = Param.ArmSemihosting(NULL, "Enable support for the Arm semihosting by settings this parameter") diff --git a/src/arch/arm/SConscript b/src/arch/arm/SConscript index a1063f039..58a13cd72 100644 --- a/src/arch/arm/SConscript +++ b/src/arch/arm/SConscript @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2009, 2012-2013, 2018 ARM Limited +# Copyright (c) 2009, 2012-2013, 2017-2018 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -60,6 +60,7 @@ if env['TARGET_ISA'] == 'arm': Source('insts/pred_inst.cc') Source('insts/pseudo.cc') Source('insts/static_inst.cc') + Source('insts/sve.cc') Source('insts/vfp.cc') Source('insts/fplib.cc') Source('insts/crypto.cc') diff --git a/src/arch/arm/decoder.cc b/src/arch/arm/decoder.cc index ce039b731..4c86ee2c6 100644 --- a/src/arch/arm/decoder.cc +++ b/src/arch/arm/decoder.cc @@ -47,6 +47,7 @@ #include "arch/arm/utility.hh" #include "base/trace.hh" #include "debug/Decoder.hh" +#include "sim/full_system.hh" namespace ArmISA { @@ -54,11 +55,13 @@ namespace ArmISA GenericISA::BasicDecodeCache Decoder::defaultCache; Decoder::Decoder(ISA* isa) - : data(0), fpscrLen(0), fpscrStride(0), decoderFlavour(isa - ? isa->decoderFlavour() - : Enums::Generic) + : data(0), fpscrLen(0), fpscrStride(0), + decoderFlavour(isa->decoderFlavour()) { reset(); + + // Initialize SVE vector length + sveLen = (isa->getCurSveVecLenInBitsAtReset() >> 7) - 1; } void @@ -157,6 +160,7 @@ Decoder::moreBytes(const PCState &pc, Addr fetchPC, MachInst inst) emi.aarch64 = pc.aarch64(); emi.fpscrLen = fpscrLen; emi.fpscrStride = fpscrStride; + emi.sveLen = sveLen; const Addr alignment(pc.thumb() ? 0x1 : 0x3); emi.decoderFault = static_cast<uint8_t>( diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh index f8748ab5e..f44a981d9 100644 --- a/src/arch/arm/decoder.hh +++ b/src/arch/arm/decoder.hh @@ -72,6 +72,12 @@ class Decoder int fpscrLen; int fpscrStride; + /** + * SVE vector length, encoded in the same format as the ZCR_EL<x>.LEN + * bitfields. + */ + int sveLen; + Enums::DecoderFlavour decoderFlavour; /// A cache of decoded instruction objects. @@ -197,6 +203,11 @@ class Decoder fpscrLen = fpscr.len; fpscrStride = fpscr.stride; } + + void setSveLen(uint8_t len) + { + sveLen = len; + } }; } // namespace ArmISA diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc index f245cd4f0..cc0e8f3f2 100644 --- a/src/arch/arm/insts/static_inst.cc +++ b/src/arch/arm/insts/static_inst.cc @@ -44,6 +44,7 @@ #include "arch/arm/insts/static_inst.hh" #include "arch/arm/faults.hh" +#include "arch/arm/isa.hh" #include "base/condcodes.hh" #include "base/cprintf.hh" #include "base/loader/symtab.hh" @@ -292,17 +293,20 @@ ArmStaticInst::shift_carry_rs(uint32_t base, uint32_t shamt, } void -ArmStaticInst::printIntReg(std::ostream &os, RegIndex reg_idx) const +ArmStaticInst::printIntReg(std::ostream &os, RegIndex reg_idx, + uint8_t opWidth) const { + if (opWidth == 0) + opWidth = intWidth; if (aarch64) { if (reg_idx == INTREG_UREG0) ccprintf(os, "ureg0"); else if (reg_idx == INTREG_SPX) - ccprintf(os, "%s%s", (intWidth == 32) ? "w" : "", "sp"); + ccprintf(os, "%s%s", (opWidth == 32) ? "w" : "", "sp"); else if (reg_idx == INTREG_X31) - ccprintf(os, "%szr", (intWidth == 32) ? "w" : "x"); + ccprintf(os, "%szr", (opWidth == 32) ? "w" : "x"); else - ccprintf(os, "%s%d", (intWidth == 32) ? "w" : "x", reg_idx); + ccprintf(os, "%s%d", (opWidth == 32) ? "w" : "x", reg_idx); } else { switch (reg_idx) { case PCReg: @@ -341,9 +345,16 @@ ArmStaticInst::printFloatReg(std::ostream &os, RegIndex reg_idx) const } void -ArmStaticInst::printVecReg(std::ostream &os, RegIndex reg_idx) const +ArmStaticInst::printVecReg(std::ostream &os, RegIndex reg_idx, + bool isSveVecReg) const { - ccprintf(os, "v%d", reg_idx); + ccprintf(os, "%s%d", isSveVecReg ? "z" : "v", reg_idx); +} + +void +ArmStaticInst::printVecPredReg(std::ostream &os, RegIndex reg_idx) const +{ + ccprintf(os, "p%d", reg_idx); } void @@ -955,6 +966,55 @@ ArmStaticInst::undefinedFault64(ThreadContext *tc, return NoFault; } +Fault +ArmStaticInst::sveAccessTrap(ExceptionLevel el) const +{ + switch (el) { + case EL1: + return std::make_shared<SupervisorTrap>(machInst, 0, EC_TRAPPED_SVE); + case EL2: + return std::make_shared<HypervisorTrap>(machInst, 0, EC_TRAPPED_SVE); + case EL3: + return std::make_shared<SecureMonitorTrap>(machInst, 0, + EC_TRAPPED_SVE); + + default: + panic("Illegal EL in sveAccessTrap\n"); + } +} + +Fault +ArmStaticInst::checkSveTrap(ThreadContext *tc, CPSR cpsr) const +{ + const ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + + if (ArmSystem::haveVirtualization(tc) && el <= EL2) { + CPTR cptrEnCheck = tc->readMiscReg(MISCREG_CPTR_EL2); + if (cptrEnCheck.tz) + return sveAccessTrap(EL2); + } + + if (ArmSystem::haveSecurity(tc)) { + CPTR cptrEnCheck = tc->readMiscReg(MISCREG_CPTR_EL3); + if (!cptrEnCheck.ez) + return sveAccessTrap(EL3); + } + + return NoFault; +} + +Fault +ArmStaticInst::checkSveEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const +{ + const ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + if ((el == EL0 && cpacr.zen != 0x3) || + (el == EL1 && !(cpacr.zen & 0x1))) + return sveAccessTrap(EL1); + + return checkSveTrap(tc, cpsr); +} + + static uint8_t getRestoredITBits(ThreadContext *tc, CPSR spsr) { @@ -1100,5 +1160,10 @@ ArmStaticInst::generalExceptionsToAArch64(ThreadContext *tc, !ELIs32(tc, EL2) && hcr.tge); } +unsigned +ArmStaticInst::getCurSveVecLenInBits(ThreadContext *tc) +{ + return tc->getIsaPtr()->getCurSveVecLenInBits(tc); +} } diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh index f4f3cbb6d..9caf016dd 100644 --- a/src/arch/arm/insts/static_inst.hh +++ b/src/arch/arm/insts/static_inst.hh @@ -156,9 +156,12 @@ class ArmStaticInst : public StaticInst /// Print a register name for disassembly given the unique /// dependence tag number (FP or int). - void printIntReg(std::ostream &os, RegIndex reg_idx) const; + void printIntReg(std::ostream &os, RegIndex reg_idx, + uint8_t opWidth = 0) const; void printFloatReg(std::ostream &os, RegIndex reg_idx) const; - void printVecReg(std::ostream &os, RegIndex reg_idx) const; + void printVecReg(std::ostream &os, RegIndex reg_idx, + bool isSveVecReg = false) const; + void printVecPredReg(std::ostream &os, RegIndex reg_idx) const; void printCCReg(std::ostream &os, RegIndex reg_idx) const; void printMiscReg(std::ostream &os, RegIndex reg_idx) const; void printMnemonic(std::ostream &os, @@ -467,6 +470,23 @@ class ArmStaticInst : public StaticInst Fault undefinedFault64(ThreadContext *tc, ExceptionLevel el) const; /** + * Trap an access to SVE registers due to access control bits. + * + * @param el Target EL for the trap. + */ + Fault sveAccessTrap(ExceptionLevel el) const; + + /** + * Check an SVE access against CPTR_EL2 and CPTR_EL3. + */ + Fault checkSveTrap(ThreadContext *tc, CPSR cpsr) const; + + /** + * Check an SVE access against CPACR_EL1, CPTR_EL2, and CPTR_EL3. + */ + Fault checkSveEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const; + + /** * Get the new PSTATE from a SPSR register in preparation for an * exception return. * @@ -520,6 +540,21 @@ class ArmStaticInst : public StaticInst { return simpleAsBytes(buf, max_size, machInst); } + + static unsigned getCurSveVecLenInBits(ThreadContext *tc); + + static unsigned + getCurSveVecLenInQWords(ThreadContext *tc) + { + return getCurSveVecLenInBits(tc) >> 6; + } + + template<typename T> + static unsigned + getCurSveVecLen(ThreadContext *tc) + { + return getCurSveVecLenInBits(tc) / (8 * sizeof(T)); + } }; } diff --git a/src/arch/arm/insts/sve.cc b/src/arch/arm/insts/sve.cc new file mode 100644 index 000000000..22ce47f93 --- /dev/null +++ b/src/arch/arm/insts/sve.cc @@ -0,0 +1,957 @@ +/* + * Copyright (c) 2017-2019 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Giacomo Gabrielli + */ + +// TODO: add support for suffixes of register specifiers in disasm strings. + +#include "arch/arm/insts/sve.hh" + +namespace ArmISA { + +const char* +svePredTypeToStr(SvePredType pt) +{ + switch (pt) { + case SvePredType::MERGE: + return "m"; + case SvePredType::ZERO: + return "z"; + default: + return ""; + } +} + +std::string +SvePredCountPredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printIntReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, ", "); + printVecPredReg(ss, op1); + return ss.str(); +} + +std::string +SvePredCountOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + if (destIsVec) { + printVecReg(ss, dest, true); + } else { + printIntReg(ss, dest); + } + ccprintf(ss, ", "); + uint8_t opWidth = 64; + printVecPredReg(ss, gp); + ccprintf(ss, ", "); + if (srcIs32b) + opWidth = 32; + printIntReg(ss, dest, opWidth); + return ss.str(); +} + +std::string +SveIndexIIOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string +SveIndexIROp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", #%d, ", imm1); + printIntReg(ss, op2); + return ss.str(); +} + +std::string +SveIndexRIOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printIntReg(ss, op1); + ccprintf(ss, ", #%d", imm2); + return ss.str(); +} + +std::string +SveIndexRROp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printIntReg(ss, op1); + ccprintf(ss, ", "); + printIntReg(ss, op2); + return ss.str(); +} + +std::string +SveWhileOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + uint8_t opWidth; + if (srcIs32b) + opWidth = 32; + else + opWidth = 64; + printIntReg(ss, op1, opWidth); + ccprintf(ss, ", "); + printIntReg(ss, op2, opWidth); + return ss.str(); +} + +std::string +SveCompTermOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printIntReg(ss, op1); + ccprintf(ss, ", "); + printIntReg(ss, op2); + return ss.str(); +} + +std::string +SveUnaryPredOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, "/m, "); + printVecReg(ss, op1, true); + return ss.str(); +} + +std::string +SveUnaryUnpredOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + return ss.str(); +} + +std::string +SveUnaryWideImmUnpredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", #"); + ss << imm; + return ss.str(); +} + +std::string +SveUnaryWideImmPredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, (isMerging ? "/m" : "/z")); + ccprintf(ss, ", #"); + ss << imm; + return ss.str(); +} + +std::string +SveBinImmUnpredConstrOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecPredReg(ss, op1); + ccprintf(ss, ", #"); + ss << imm; + return ss.str(); +} + +std::string +SveBinImmPredOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, "/m, "); + printVecReg(ss, dest, true); + ccprintf(ss, ", #"); + ss << imm; + return ss.str(); +} + +std::string +SveBinWideImmUnpredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, dest, true); + ccprintf(ss, ", #"); + ss << imm; + return ss.str(); +} + +std::string +SveBinDestrPredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, "/m, "); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, op2, true); + return ss.str(); +} + +std::string +SveBinConstrPredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + if (predType == SvePredType::MERGE || predType == SvePredType::ZERO) { + ccprintf(ss, "/%s", svePredTypeToStr(predType)); + } + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + ccprintf(ss, ", "); + printVecReg(ss, op2, true); + return ss.str(); +} + +std::string +SveBinUnpredOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + ccprintf(ss, ", "); + printVecReg(ss, op2, true); + return ss.str(); +} + +std::string +SveBinIdxUnpredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + ccprintf(ss, ", "); + printVecReg(ss, op2, true); + ccprintf(ss, "["); + ss << (uint64_t)index; + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +SvePredLogicalOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + if (isSel) { + ccprintf(ss, ", "); + } else { + ccprintf(ss, "/z, "); + } + printVecPredReg(ss, op1); + ccprintf(ss, ", "); + printVecPredReg(ss, op2); + return ss.str(); +} + +std::string +SvePredBinPermOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, op1); + ccprintf(ss, ", "); + printVecPredReg(ss, op2); + return ss.str(); +} + +std::string +SveCmpOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, "/z, "); + printVecReg(ss, op1, true); + ccprintf(ss, ", "); + printVecReg(ss, op2, true); + return ss.str(); +} + +std::string +SveCmpImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, "/z, "); + printVecReg(ss, op1, true); + ccprintf(ss, ", #"); + ss << imm; + return ss.str(); +} + +std::string +SveTerPredOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, "/m, "); + printVecReg(ss, op1, true); + ccprintf(ss, ", "); + printVecReg(ss, op2, true); + return ss.str(); +} + +std::string +SveTerImmUnpredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, op2, true); + ccprintf(ss, ", #"); + ss << imm; + return ss.str(); +} + +std::string +SveReducOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printFloatReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + return ss.str(); +} + +std::string +SveOrdReducOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printFloatReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, ", "); + printFloatReg(ss, dest); + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + return ss.str(); +} + +std::string +SvePtrueOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + if (imm != 0x1f) { + ccprintf(ss, ", "); + ss << sveDisasmPredCountImm(imm); + } + return ss.str(); +} + +std::string +SveIntCmpOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, "/z, "); + printVecReg(ss, op1, true); + ccprintf(ss, ", "); + if (op2IsWide) { + printVecReg(ss, op2, true); + } else { + printVecReg(ss, op2, true); + } + return ss.str(); +} + +std::string +SveIntCmpImmOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, "/z, "); + printVecPredReg(ss, gp); + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + ccprintf(ss, ", #"); + ss << imm; + return ss.str(); +} + +std::string +SveAdrOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", ["); + printVecReg(ss, op1, true); + ccprintf(ss, ", "); + printVecReg(ss, op2, true); + if (offsetFormat == SveAdrOffsetUnpackedSigned) { + ccprintf(ss, ", sxtw"); + } else if (offsetFormat == SveAdrOffsetUnpackedUnsigned) { + ccprintf(ss, ", uxtw"); + } else if (mult != 1) { + ccprintf(ss, ", lsl"); + } + if (mult != 1) { + ss << __builtin_ctz(mult); + } + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +SveElemCountOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + static const char suffix[9] = + {'\0', 'b', 'h', '\0', 'w', '\0', '\0', '\0', 'd'}; + std::stringstream ss; + ss << " " << mnemonic << suffix[esize] << " "; + if (dstIsVec) { + printVecReg(ss, dest, true); + } else { + if (dstIs32b) { + printIntReg(ss, dest, 32); + } else { + printIntReg(ss, dest, 64); + } + } + if (pattern != 0x1f) { + ccprintf(ss, ", "); + ss << sveDisasmPredCountImm(pattern); + if (imm != 1) { + ccprintf(ss, ", mul #"); + ss << std::to_string(imm); + } + } + return ss.str(); +} + +std::string +SvePartBrkOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, isMerging ? "/m, " : "/z, "); + printVecPredReg(ss, op1); + return ss.str(); +} + +std::string +SvePartBrkPropOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, "/z, "); + printVecPredReg(ss, op1); + ccprintf(ss, ", "); + printVecPredReg(ss, op2); + return ss.str(); +} + +std::string +SveSelectOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + if (scalar) + printIntReg(ss, dest, scalar_width); + else if (simdFp) + printFloatReg(ss, dest); + else + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + if (conditional) { + ccprintf(ss, ", "); + if (scalar) + printIntReg(ss, dest, scalar_width); + else + printVecReg(ss, dest, true); + } + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + return ss.str(); +} + +std::string +SveUnaryPredPredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, ", "); + printVecPredReg(ss, op1); + return ss.str(); +} + +std::string +SveTblOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", { "); + printVecReg(ss, op1, true); + ccprintf(ss, " }, "); + printVecReg(ss, op2, true); + return ss.str(); +} + +std::string +SveUnpackOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, op1); + return ss.str(); +} + +std::string +SvePredTestOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, gp); + ccprintf(ss, ", "); + printVecPredReg(ss, op1); + return ss.str(); +} + +std::string +SvePredUnaryWImplicitSrcOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + return ss.str(); +} + +std::string +SvePredUnaryWImplicitSrcPredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, "/z, "); + return ss.str(); +} + +std::string +SvePredUnaryWImplicitDstOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, op1); + return ss.str(); +} + +std::string +SveWImplicitSrcDstOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + return ss.str(); +} + +std::string +SveBinImmUnpredDestrOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + ccprintf(ss, ", #"); + ss << imm; + return ss.str(); +} + +std::string +SveBinImmIdxUnpredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + ccprintf(ss, "["); + ss << imm; + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +SveUnarySca2VecUnpredOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + if (simdFp) { + printFloatReg(ss, op1); + } else { + printIntReg(ss, op1); + } + return ss.str(); +} + +std::string +SveDotProdIdxOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + ccprintf(ss, ", "); + printVecReg(ss, op2, true); + ccprintf(ss, "["); + ccprintf(ss, "%lu", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +SveDotProdOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + ccprintf(ss, ", "); + printVecReg(ss, op2, true); + return ss.str(); +} + +std::string +SveComplexOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, gp); + ccprintf(ss, "/m, "); + printVecPredReg(ss, op1); + ccprintf(ss, ", "); + printVecPredReg(ss, op2); + ccprintf(ss, ", #"); + const char* rotstr[4] = {"0", "90", "180", "270"}; + ccprintf(ss, rotstr[rot]); + + return ss.str(); +} + +std::string +SveComplexIdxOp::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ccprintf(ss, ", "); + printVecPredReg(ss, op1); + ccprintf(ss, ", "); + printVecPredReg(ss, op2); + ccprintf(ss, "["); + ss << imm; + ccprintf(ss, "], #"); + const char* rotstr[4] = {"0", "90", "180", "270"}; + ccprintf(ss, rotstr[rot]); + return ss.str(); +} + +std::string +sveDisasmPredCountImm(uint8_t imm) +{ + switch (imm) { + case 0x0: + return "POW2"; + case 0x1: + case 0x2: + case 0x3: + case 0x4: + case 0x5: + case 0x6: + case 0x7: + return "VL" + std::to_string(imm); + case 0x8: + case 0x9: + case 0xa: + case 0xb: + case 0xc: + case 0xd: + return "VL" + std::to_string(1 << ((imm & 0x7) + 3)); + case 0x1d: + return "MUL4"; + case 0x1e: + return "MUL3"; + case 0x1f: + return "ALL"; + default: + return "#" + std::to_string(imm); + } +} + +unsigned int +sveDecodePredCount(uint8_t imm, unsigned int num_elems) +{ + assert(num_elems > 0); + + switch (imm) { + case 0x0: + // POW2 + return 1 << (31 - __builtin_clz((uint32_t) num_elems)); + case 0x1: + case 0x2: + case 0x3: + case 0x4: + case 0x5: + case 0x6: + case 0x7: + // VL1, VL2, VL3, VL4, VL5, VL6, VL7 + return (num_elems >= imm) ? imm : 0; + case 0x8: + case 0x9: + case 0xa: + case 0xb: + case 0xc: + case 0xd: + // VL8, VL16, VL32, VL64, VL128, VL256 + { + unsigned int pcount = 1 << ((imm & 0x7) + 3); + return (num_elems >= pcount) ? pcount : 0; + } + case 0x1d: + // MUL4 + return num_elems - (num_elems % 4); + case 0x1e: + // MUL3 + return num_elems - (num_elems % 3); + case 0x1f: + // ALL + return num_elems; + default: + return 0; + } +} + +uint64_t +sveExpandFpImmAddSub(uint8_t imm, uint8_t size) +{ + static constexpr uint16_t fpOne16 = 0x3c00; + static constexpr uint16_t fpPointFive16 = 0x3800; + static constexpr uint32_t fpOne32 = 0x3f800000; + static constexpr uint32_t fpPointFive32 = 0x3f000000; + static constexpr uint64_t fpOne64 = 0x3ff0000000000000; + static constexpr uint64_t fpPointFive64 = 0x3fe0000000000000; + + switch (size) { + case 0x1: + return imm ? fpOne16 : fpPointFive16; + case 0x2: + return imm ? fpOne32 : fpPointFive32; + case 0x3: + return imm ? fpOne64 : fpPointFive64; + default: + panic("Unsupported size"); + } +} + +uint64_t +sveExpandFpImmMaxMin(uint8_t imm, uint8_t size) +{ + static constexpr uint16_t fpOne16 = 0x3c00; + static constexpr uint32_t fpOne32 = 0x3f800000; + static constexpr uint64_t fpOne64 = 0x3ff0000000000000; + + switch (size) { + case 0x1: + return imm ? fpOne16 : 0x0; + case 0x2: + return imm ? fpOne32 : 0x0; + case 0x3: + return imm ? fpOne64 : 0x0; + default: + panic("Unsupported size"); + } +} + +uint64_t +sveExpandFpImmMul(uint8_t imm, uint8_t size) +{ + static constexpr uint16_t fpTwo16 = 0x4000; + static constexpr uint16_t fpPointFive16 = 0x3800; + static constexpr uint32_t fpTwo32 = 0x40000000; + static constexpr uint32_t fpPointFive32 = 0x3f000000; + static constexpr uint64_t fpTwo64 = 0x4000000000000000; + static constexpr uint64_t fpPointFive64 = 0x3fe0000000000000; + + switch (size) { + case 0x1: + return imm ? fpTwo16 : fpPointFive16; + case 0x2: + return imm ? fpTwo32 : fpPointFive32; + case 0x3: + return imm ? fpTwo64 : fpPointFive64; + default: + panic("Unsupported size"); + } +} + +} // namespace ArmISA diff --git a/src/arch/arm/insts/sve.hh b/src/arch/arm/insts/sve.hh new file mode 100644 index 000000000..f72e3ccd7 --- /dev/null +++ b/src/arch/arm/insts/sve.hh @@ -0,0 +1,883 @@ +/* + * Copyright (c) 2017-2019 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Giacomo Gabrielli + */ + +#ifndef __ARCH_ARM_INSTS_SVE_HH__ +#define __ARCH_ARM_INSTS_SVE_HH__ + +#include "arch/arm/insts/static_inst.hh" + +namespace ArmISA { + +enum class SvePredType { + NONE, + MERGE, + ZERO, + SELECT +}; + +/// Returns the specifier for the predication type `pt` as a string. +const char* svePredTypeToStr(SvePredType pt); + +/// Index generation instruction, immediate operands +class SveIndexIIOp : public ArmStaticInst { + protected: + IntRegIndex dest; + int8_t imm1; + int8_t imm2; + + SveIndexIIOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, + int8_t _imm1, int8_t _imm2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), imm1(_imm1), imm2(_imm2) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class SveIndexIROp : public ArmStaticInst { + protected: + IntRegIndex dest; + int8_t imm1; + IntRegIndex op2; + + SveIndexIROp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, + int8_t _imm1, IntRegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), imm1(_imm1), op2(_op2) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class SveIndexRIOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex op1; + int8_t imm2; + + SveIndexRIOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, + IntRegIndex _op1, int8_t _imm2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm2(_imm2) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class SveIndexRROp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex op2; + + SveIndexRROp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Predicate count SVE instruction. +class SvePredCountOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex gp; + bool srcIs32b; + bool destIsVec; + + SvePredCountOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp, + bool _srcIs32b = false, bool _destIsVec = false) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), gp(_gp), + srcIs32b(_srcIs32b), destIsVec(_destIsVec) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Predicate count SVE instruction (predicated). +class SvePredCountPredOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex gp; + + SvePredCountPredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), gp(_gp) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// While predicate generation SVE instruction. +class SveWhileOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, op2; + bool srcIs32b; + + SveWhileOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + bool _srcIs32b) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), srcIs32b(_srcIs32b) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Compare and terminate loop SVE instruction. +class SveCompTermOp : public ArmStaticInst { + protected: + IntRegIndex op1, op2; + + SveCompTermOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1, IntRegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1), op2(_op2) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Unary, constructive, predicated (merging) SVE instruction. +class SveUnaryPredOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, gp; + + SveUnaryPredOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), gp(_gp) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Unary, constructive, unpredicated SVE instruction. +class SveUnaryUnpredOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1; + + SveUnaryUnpredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Unary with wide immediate, constructive, unpredicated SVE instruction. +class SveUnaryWideImmUnpredOp : public ArmStaticInst { + protected: + IntRegIndex dest; + uint64_t imm; + + SveUnaryWideImmUnpredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, + uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Unary with wide immediate, constructive, predicated SVE instruction. +class SveUnaryWideImmPredOp : public ArmStaticInst { + protected: + IntRegIndex dest; + uint64_t imm; + IntRegIndex gp; + + bool isMerging; + + SveUnaryWideImmPredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, + uint64_t _imm, IntRegIndex _gp, bool _isMerging) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), imm(_imm), gp(_gp), isMerging(_isMerging) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Binary with immediate, destructive, unpredicated SVE instruction. +class SveBinImmUnpredConstrOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1; + uint64_t imm; + + SveBinImmUnpredConstrOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Binary with immediate, destructive, predicated (merging) SVE instruction. +class SveBinImmPredOp : public ArmStaticInst { + protected: + IntRegIndex dest, gp; + uint64_t imm; + + SveBinImmPredOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, uint64_t _imm, IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), gp(_gp), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Binary with wide immediate, destructive, unpredicated SVE instruction. +class SveBinWideImmUnpredOp : public ArmStaticInst { + protected: + IntRegIndex dest; + uint64_t imm; + + SveBinWideImmUnpredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, + uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Binary, destructive, predicated (merging) SVE instruction. +class SveBinDestrPredOp : public ArmStaticInst { + protected: + IntRegIndex dest, op2, gp; + + SveBinDestrPredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op2, + IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op2(_op2), gp(_gp) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Binary, constructive, predicated SVE instruction. +class SveBinConstrPredOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, op2, gp; + SvePredType predType; + + SveBinConstrPredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, IntRegIndex _gp, + SvePredType _predType) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), gp(_gp), predType(_predType) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Binary, unpredicated SVE instruction with indexed operand +class SveBinUnpredOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, op2; + + SveBinUnpredOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Binary, unpredicated SVE instruction +class SveBinIdxUnpredOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, op2; + uint8_t index; + + SveBinIdxUnpredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, uint8_t _index) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), index(_index) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Predicate logical instruction. +class SvePredLogicalOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, op2, gp; + bool isSel; + + SvePredLogicalOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, IntRegIndex _gp, bool _isSel = false) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), gp(_gp), isSel(_isSel) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Predicate binary permute instruction. +class SvePredBinPermOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, op2; + + SvePredBinPermOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE compare instructions, predicated (zeroing). +class SveCmpOp : public ArmStaticInst { + protected: + IntRegIndex dest, gp, op1, op2; + + SveCmpOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), gp(_gp), op1(_op1), op2(_op2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE compare-with-immediate instructions, predicated (zeroing). +class SveCmpImmOp : public ArmStaticInst { + protected: + IntRegIndex dest, gp, op1; + uint64_t imm; + + SveCmpImmOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm, + IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), gp(_gp), op1(_op1), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Ternary, destructive, predicated (merging) SVE instruction. +class SveTerPredOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, op2, gp; + + SveTerPredOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), gp(_gp) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Ternary with immediate, destructive, unpredicated SVE instruction. +class SveTerImmUnpredOp : public ArmStaticInst { + protected: + IntRegIndex dest, op2; + uint64_t imm; + + SveTerImmUnpredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op2, + uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op2(_op2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE reductions. +class SveReducOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, gp; + + SveReducOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), gp(_gp) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE ordered reductions. +class SveOrdReducOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, gp; + + SveOrdReducOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), gp(_gp) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// PTRUE, PTRUES. +class SvePtrueOp : public ArmStaticInst { + protected: + IntRegIndex dest; + uint8_t imm; + + SvePtrueOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, uint8_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Integer compare SVE instruction. +class SveIntCmpOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex op1, op2; + IntRegIndex gp; + bool op2IsWide; + + SveIntCmpOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _gp, bool _op2IsWide = false) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), gp(_gp), op2IsWide(_op2IsWide) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Integer compare with immediate SVE instruction. +class SveIntCmpImmOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex op1; + int64_t imm; + IntRegIndex gp; + + SveIntCmpImmOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, int64_t _imm, + IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm(_imm), gp(_gp) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// ADR. +class SveAdrOp : public ArmStaticInst { + public: + enum SveAdrOffsetFormat { + SveAdrOffsetPacked, + SveAdrOffsetUnpackedSigned, + SveAdrOffsetUnpackedUnsigned + }; + + protected: + IntRegIndex dest, op1, op2; + uint8_t mult; + SveAdrOffsetFormat offsetFormat; + + SveAdrOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, uint8_t _mult, + SveAdrOffsetFormat _offsetFormat) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), mult(_mult), + offsetFormat(_offsetFormat) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Element count SVE instruction. +class SveElemCountOp : public ArmStaticInst { + protected: + IntRegIndex dest; + uint8_t pattern; + uint8_t imm; + bool dstIsVec; + bool dstIs32b; + uint8_t esize; + + SveElemCountOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, uint8_t _pattern, uint8_t _imm, + bool _dstIsVec, bool _dstIs32b) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), pattern(_pattern), imm(_imm), dstIsVec(_dstIsVec), + dstIs32b(_dstIs32b) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Partition break SVE instruction. +class SvePartBrkOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex gp; + IntRegIndex op1; + bool isMerging; + + SvePartBrkOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _op1, + bool _isMerging) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), gp(_gp), op1(_op1), isMerging(_isMerging) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Partition break with propagation SVE instruction. +class SvePartBrkPropOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex op2; + IntRegIndex gp; + + SvePartBrkPropOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), gp(_gp) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Scalar element select SVE instruction. +class SveSelectOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex gp; + bool conditional; + bool scalar; + bool simdFp; + size_t scalar_width; + + SveSelectOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _gp, + bool _conditional, bool _scalar, + bool _simdFp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), gp(_gp), conditional(_conditional), + scalar(_scalar), simdFp(_simdFp) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE unary operation on predicate (predicated) +class SveUnaryPredPredOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex gp; + + SveUnaryPredPredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), gp(_gp) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE table lookup/permute using vector of element indices (TBL) +class SveTblOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex op2; + + SveTblOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE unpack and widen predicate +class SveUnpackOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex op1; + + SveUnpackOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE predicate test +class SvePredTestOp : public ArmStaticInst { + protected: + IntRegIndex op1; + IntRegIndex gp; + + SvePredTestOp(const char* mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1, IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1), gp(_gp) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE unary predicate instructions with implicit source operand +class SvePredUnaryWImplicitSrcOp : public ArmStaticInst { + protected: + IntRegIndex dest; + + SvePredUnaryWImplicitSrcOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE unary predicate instructions, predicated, with implicit source operand +class SvePredUnaryWImplicitSrcPredOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex gp; + + SvePredUnaryWImplicitSrcPredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, + IntRegIndex _gp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), gp(_gp) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE unary predicate instructions with implicit destination operand +class SvePredUnaryWImplicitDstOp : public ArmStaticInst { + protected: + IntRegIndex op1; + + SvePredUnaryWImplicitDstOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE unary predicate instructions with implicit destination operand +class SveWImplicitSrcDstOp : public ArmStaticInst { + protected: + SveWImplicitSrcDstOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) : + ArmStaticInst(mnem, _machInst, __opClass) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE vector - immediate binary operation +class SveBinImmUnpredDestrOp : public ArmStaticInst { + protected: + IntRegIndex dest; + IntRegIndex op1; + uint64_t imm; + + SveBinImmUnpredDestrOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm(_imm) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Binary with immediate index, destructive, unpredicated SVE instruction. +class SveBinImmIdxUnpredOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1; + uint64_t imm; + + SveBinImmIdxUnpredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// Unary unpredicated scalar to vector instruction +class SveUnarySca2VecUnpredOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1; + bool simdFp; + + SveUnarySca2VecUnpredOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + bool _simdFp) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), simdFp(_simdFp) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE dot product instruction (indexed) +class SveDotProdIdxOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, op2; + uint64_t imm; + uint8_t esize; + + public: + SveDotProdIdxOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE dot product instruction (vectors) +class SveDotProdOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, op2; + uint8_t esize; + + public: + SveDotProdOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE Complex Instructions (vectors) +class SveComplexOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, op2, gp; + uint8_t rot; + + public: + SveComplexOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, IntRegIndex _gp, uint8_t _rot) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), gp(_gp), rot(_rot) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +/// SVE Complex Instructions (indexed) +class SveComplexIdxOp : public ArmStaticInst { + protected: + IntRegIndex dest, op1, op2; + uint8_t rot, imm; + + public: + SveComplexIdxOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, uint8_t _rot, uint8_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), rot(_rot), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + + +/// Returns the symbolic name associated with pattern `imm` for PTRUE(S) +/// instructions. +std::string sveDisasmPredCountImm(uint8_t imm); + +/// Returns the actual number of elements active for PTRUE(S) instructions. +/// @param imm 5-bit immediate encoding the predicate pattern. +/// @param num_elems Current number of elements per vector (depending on +/// current vector length and element size). +unsigned int sveDecodePredCount(uint8_t imm, unsigned int num_elems); + +/// Expand 1-bit floating-point immediate to 0.5 or 1.0 (FADD, FSUB, FSUBR). +/// @param imm 1-bit immediate. +/// @param size Encoding of the vector element size. +/// @return Encoding of the expanded value. +uint64_t sveExpandFpImmAddSub(uint8_t imm, uint8_t size); + +/// Expand 1-bit floating-point immediate to 0.0 or 1.0 (FMAX, FMAXNM, FMIN, +/// FMINNM). +/// @param imm 1-bit immediate. +/// @param size Encoding of the vector element size. +/// @return Encoding of the expanded value. +uint64_t sveExpandFpImmMaxMin(uint8_t imm, uint8_t size); + +/// Expand 1-bit floating-point immediate to 0.5 or 2.0 (FMUL). +/// @param imm 1-bit immediate. +/// @param size Encoding of the vector element size. +/// @return Encoding of the expanded value. +uint64_t sveExpandFpImmMul(uint8_t imm, uint8_t size); + +} // namespace ArmISA + +#endif // __ARCH_ARM_INSTS_SVE_HH__ diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index 3b10f68a4..38fbae142 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -89,12 +89,16 @@ ISA::ISA(Params *p) haveVirtualization = system->haveVirtualization(); haveLargeAsid64 = system->haveLargeAsid64(); physAddrRange = system->physAddrRange(); + haveSVE = system->haveSVE(); + sveVL = system->sveVL(); } else { highestELIs64 = true; // ArmSystem::highestELIs64 does the same haveSecurity = haveLPAE = haveVirtualization = false; haveCrypto = true; haveLargeAsid64 = false; physAddrRange = 32; // dummy value + haveSVE = true; + sveVL = p->sve_vl_se; } // Initial rename mode depends on highestEL @@ -350,6 +354,16 @@ ISA::initID64(const ArmISAParams *p) miscRegs[MISCREG_ID_DFR0] = miscRegs[MISCREG_ID_DFR0_EL1]; + // SVE + miscRegs[MISCREG_ID_AA64ZFR0_EL1] = 0; // SVEver 0 + if (haveSecurity) { + miscRegs[MISCREG_ZCR_EL3] = sveVL - 1; + } else if (haveVirtualization) { + miscRegs[MISCREG_ZCR_EL2] = sveVL - 1; + } else { + miscRegs[MISCREG_ZCR_EL1] = sveVL - 1; + } + // Enforce consistency with system-level settings... // EL3 @@ -360,6 +374,10 @@ ISA::initID64(const ArmISAParams *p) miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits( miscRegs[MISCREG_ID_AA64PFR0_EL1], 11, 8, haveVirtualization ? 0x2 : 0x0); + // SVE + miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64PFR0_EL1], 35, 32, + haveSVE ? 0x1 : 0x0); // Large ASID support miscRegs[MISCREG_ID_AA64MMFR0_EL1] = insertBits( miscRegs[MISCREG_ID_AA64MMFR0_EL1], 7, 4, @@ -576,6 +594,7 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc) const uint32_t ones = (uint32_t)(-1); FPSCR fpscrMask = 0; fpscrMask.len = ones; + fpscrMask.fz16 = ones; fpscrMask.stride = ones; fpscrMask.rMode = ones; fpscrMask.fz = ones; @@ -683,6 +702,7 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc) 0x0000000000000020 | // EL1 (haveVirtualization ? 0x0000000000000200 : 0) | // EL2 (haveSecurity ? 0x0000000000002000 : 0) | // EL3 + (haveSVE ? 0x0000000100000000 : 0) | // SVE (haveGICv3CPUInterface ? 0x0000000001000000 : 0); case MISCREG_ID_AA64PFR1_EL1: return 0; // bits [63:0] RES0 (reserved for future use) @@ -757,6 +777,8 @@ ISA::setMiscReg(int misc_reg, RegVal val, ThreadContext *tc) pc.nextJazelle(cpsr.j); pc.illegalExec(cpsr.il == 1); + tc->getDecoderPtr()->setSveLen((getCurSveVecLenInBits(tc) >> 7) - 1); + // Follow slightly different semantics if a CheckerCPU object // is connected CheckerCPU *checker = tc->getCheckerCpuPtr(); @@ -807,6 +829,20 @@ ISA::setMiscReg(int misc_reg, RegVal val, ThreadContext *tc) miscRegName[misc_reg], newVal); } break; + case MISCREG_CPACR_EL1: + { + const uint32_t ones = (uint32_t)(-1); + CPACR cpacrMask = 0; + cpacrMask.tta = ones; + cpacrMask.fpen = ones; + if (haveSVE) { + cpacrMask.zen = ones; + } + newVal &= cpacrMask; + DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", + miscRegName[misc_reg], newVal); + } + break; case MISCREG_CPTR_EL2: { const uint32_t ones = (uint32_t)(-1); @@ -814,10 +850,17 @@ ISA::setMiscReg(int misc_reg, RegVal val, ThreadContext *tc) cptrMask.tcpac = ones; cptrMask.tta = ones; cptrMask.tfp = ones; + if (haveSVE) { + cptrMask.tz = ones; + } newVal &= cptrMask; cptrMask = 0; cptrMask.res1_13_12_el2 = ones; - cptrMask.res1_9_0_el2 = ones; + cptrMask.res1_7_0_el2 = ones; + if (!haveSVE) { + cptrMask.res1_8_el2 = ones; + } + cptrMask.res1_9_el2 = ones; newVal |= cptrMask; DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", miscRegName[misc_reg], newVal); @@ -830,6 +873,9 @@ ISA::setMiscReg(int misc_reg, RegVal val, ThreadContext *tc) cptrMask.tcpac = ones; cptrMask.tta = ones; cptrMask.tfp = ones; + if (haveSVE) { + cptrMask.ez = ones; + } newVal &= cptrMask; DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", miscRegName[misc_reg], newVal); @@ -860,6 +906,7 @@ ISA::setMiscReg(int misc_reg, RegVal val, ThreadContext *tc) fpscrMask.ixe = ones; fpscrMask.ide = ones; fpscrMask.len = ones; + fpscrMask.fz16 = ones; fpscrMask.stride = ones; fpscrMask.rMode = ones; fpscrMask.fz = ones; @@ -902,6 +949,7 @@ ISA::setMiscReg(int misc_reg, RegVal val, ThreadContext *tc) const uint32_t ones = (uint32_t)(-1); FPSCR fpscrMask = 0; fpscrMask.len = ones; + fpscrMask.fz16 = ones; fpscrMask.stride = ones; fpscrMask.rMode = ones; fpscrMask.fz = ones; @@ -1986,11 +2034,16 @@ ISA::setMiscReg(int misc_reg, RegVal val, ThreadContext *tc) case MISCREG_CNTVOFF_EL2 ... MISCREG_CNTPS_CVAL_EL1: getGenericTimer(tc).setMiscReg(misc_reg, newVal); break; - case MISCREG_ICC_PMR_EL1 ... MISCREG_ICC_IGRPEN1_EL3: case MISCREG_ICH_AP0R0_EL2 ... MISCREG_ICH_LR15_EL2: getGICv3CPUInterface(tc).setMiscReg(misc_reg, newVal); return; + case MISCREG_ZCR_EL3: + case MISCREG_ZCR_EL2: + case MISCREG_ZCR_EL1: + tc->getDecoderPtr()->setSveLen( + (getCurSveVecLenInBits(tc) >> 7) - 1); + break; } } setMiscRegNoEffect(misc_reg, newVal); @@ -2024,8 +2077,61 @@ ISA::getGICv3CPUInterface(ThreadContext *tc) return *gicv3CpuInterface.get(); } +unsigned +ISA::getCurSveVecLenInBits(ThreadContext *tc) const +{ + if (!FullSystem) { + return sveVL * 128; + } + + panic_if(!tc, + "A ThreadContext is needed to determine the SVE vector length " + "in full-system mode"); + + CPSR cpsr = miscRegs[MISCREG_CPSR]; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + + unsigned len = 0; + + if (el == EL1 || (el == EL0 && !ELIsInHost(tc, el))) { + len = static_cast<ZCR>(miscRegs[MISCREG_ZCR_EL1]).len; + } + + if (el == EL2 || (el == EL0 && ELIsInHost(tc, el))) { + len = static_cast<ZCR>(miscRegs[MISCREG_ZCR_EL2]).len; + } else if (haveVirtualization && !inSecureState(tc) && + (el == EL0 || el == EL1)) { + len = std::min( + len, + static_cast<unsigned>( + static_cast<ZCR>(miscRegs[MISCREG_ZCR_EL2]).len)); + } + + if (el == EL3) { + len = static_cast<ZCR>(miscRegs[MISCREG_ZCR_EL3]).len; + } else if (haveSecurity) { + len = std::min( + len, + static_cast<unsigned>( + static_cast<ZCR>(miscRegs[MISCREG_ZCR_EL3]).len)); + } + + len = std::min(len, sveVL - 1); + + return (len + 1) * 128; +} + +void +ISA::zeroSveVecRegUpperPart(VecRegContainer &vc, unsigned eCount) +{ + auto vv = vc.as<uint64_t>(); + for (int i = 2; i < eCount; ++i) { + vv[i] = 0; + } } +} // namespace ArmISA + ArmISA::ISA * ArmISAParams::create() { diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index b98610bfc..1931306f9 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -94,6 +94,10 @@ namespace ArmISA bool haveLargeAsid64; bool haveGICv3CPUInterface; uint8_t physAddrRange; + bool haveSVE; + + /** SVE vector length in quadwords */ + unsigned sveVL; /** * If true, accesses to IMPLEMENTATION DEFINED registers are treated @@ -660,6 +664,13 @@ namespace ArmISA return std::make_pair(lower, upper); } + unsigned getCurSveVecLenInBits(ThreadContext *tc) const; + + unsigned getCurSveVecLenInBitsAtReset() const { return sveVL * 128; } + + static void zeroSveVecRegUpperPart(VecRegContainer &vc, + unsigned eCount); + void serialize(CheckpointOut &cp) const { DPRINTF(Checkpoint, "Serializing Arm Misc Registers\n"); @@ -671,6 +682,8 @@ namespace ArmISA SERIALIZE_SCALAR(haveVirtualization); SERIALIZE_SCALAR(haveLargeAsid64); SERIALIZE_SCALAR(physAddrRange); + SERIALIZE_SCALAR(haveSVE); + SERIALIZE_SCALAR(sveVL); } void unserialize(CheckpointIn &cp) { @@ -685,6 +698,8 @@ namespace ArmISA UNSERIALIZE_SCALAR(haveVirtualization); UNSERIALIZE_SCALAR(haveLargeAsid64); UNSERIALIZE_SCALAR(physAddrRange); + UNSERIALIZE_SCALAR(haveSVE); + UNSERIALIZE_SCALAR(sveVL); } void startup(ThreadContext *tc); diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa index 3f4e33711..15cbe90dc 100644 --- a/src/arch/arm/isa/formats/aarch64.isa +++ b/src/arch/arm/isa/formats/aarch64.isa @@ -53,6 +53,10 @@ namespace Aarch64 StaticInstPtr decodeAdvSIMD(ExtMachInst machInst); StaticInstPtr decodeAdvSIMDScalar(ExtMachInst machInst); + StaticInstPtr decodeSveInt(ExtMachInst machInst); + StaticInstPtr decodeSveFp(ExtMachInst machInst); + StaticInstPtr decodeSveMem(ExtMachInst machInst); + StaticInstPtr decodeGem5Ops(ExtMachInst machInst); } }}; @@ -2280,9 +2284,23 @@ def format Aarch64() {{ { using namespace Aarch64; if (bits(machInst, 27) == 0x0) { - if (bits(machInst, 28) == 0x0) - return new Unknown64(machInst); - else if (bits(machInst, 26) == 0) + if (bits(machInst, 28) == 0x0) { + if (bits(machInst, 26, 25) != 0x2) { + return new Unknown64(machInst); + } + if (bits(machInst, 31) == 0x0) { + switch (bits(machInst, 30, 29)) { + case 0x0: + case 0x1: + case 0x2: + return decodeSveInt(machInst); + case 0x3: + return decodeSveFp(machInst); + } + } else { + return decodeSveMem(machInst); + } + } else if (bits(machInst, 26) == 0) // bit 28:26=100 return decodeDataProcImm(machInst); else diff --git a/src/arch/arm/isa/formats/formats.isa b/src/arch/arm/isa/formats/formats.isa index 25ace4be3..935500dc2 100644 --- a/src/arch/arm/isa/formats/formats.isa +++ b/src/arch/arm/isa/formats/formats.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2011 ARM Limited +// Copyright (c) 2010-2011, 2017 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -50,6 +50,10 @@ //Include support for decoding AArch64 NEON instructions ##include "neon64.isa" +//Include support for decoding SVE instructions (AArch64-only) +##include "sve_top_level.isa" +##include "sve_2nd_level.isa" + //Include support for predicated instructions ##include "pred.isa" diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa new file mode 100644 index 000000000..3c5e01c6b --- /dev/null +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -0,0 +1,2917 @@ +// Copyright (c) 2017-2019 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli + +/// @file +/// SVE 2nd-level decoder. + +output decoder {{ +namespace Aarch64 +{ + + StaticInstPtr + decodeSveIntArithBinPred(ExtMachInst machInst) + { + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + switch (bits(machInst, 20, 19)) { + case 0x0: + { + uint8_t size = bits(machInst, 23, 22); + uint8_t opc = bits(machInst, 18, 16); + switch (opc) { + case 0x0: + return decodeSveBinDestrPredU<SveAddPred>( + size, machInst, zdn, zm, pg); + case 0x1: + return decodeSveBinDestrPredU<SveSubPred>( + size, machInst, zdn, zm, pg); + case 0x3: + return decodeSveBinDestrPredU<SveSubr>( + size, machInst, zdn, zm, pg); + default: + return new Unknown64(machInst); + } + } + case 0x1: + { + uint8_t size = bits(machInst, 23, 22); + uint8_t u = bits(machInst, 16); + uint8_t opc = bits(machInst, 18, 17); + switch (opc) { + case 0x0: + return decodeSveBinDestrPred<SveSmax, SveUmax>( + size, u, machInst, zdn, zm, pg); + case 0x1: + return decodeSveBinDestrPred<SveSmin, SveUmin>( + size, u, machInst, zdn, zm, pg); + case 0x2: + return decodeSveBinDestrPred<SveSabd, SveUabd>( + size, u, machInst, zdn, zm, pg); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + uint8_t size = bits(machInst, 23, 22); + uint8_t u = bits(machInst, 16); + uint8_t opc = bits(machInst, 18, 17); + switch (opc) { + case 0x0: + if (u == 0) { + return decodeSveBinDestrPredU<SveMul>( + size, machInst, zdn, zm, pg); + } else { + return new Unknown64(machInst); + } + case 0x1: + return decodeSveBinDestrPred<SveSmulh, SveUmulh>( + size, u, machInst, zdn, zm, pg); + case 0x2: + if (size == 0x2 || size == 0x3) { + return decodeSveBinDestrPred<SveSdiv, SveUdiv>( + size, u, machInst, zdn, zm, pg); + } else { + return new Unknown64(machInst); + } + case 0x3: + if (size == 0x2 || size == 0x3) { + return decodeSveBinDestrPred<SveSdivr, SveUdivr>( + size, u, machInst, zdn, zm, pg); + } else { + return new Unknown64(machInst); + } + } + } + case 0x3: + { + uint8_t size = bits(machInst, 23, 22); + uint8_t opc = bits(machInst, 18, 16); + + switch (opc) { + case 0x0: + return decodeSveBinDestrPredU<SveOrrPred>( + size, machInst, zdn, zm, pg); + case 0x1: + return decodeSveBinDestrPredU<SveEorPred>( + size, machInst, zdn, zm, pg); + case 0x2: + return decodeSveBinDestrPredU<SveAndPred>( + size, machInst, zdn, zm, pg); + case 0x3: + return decodeSveBinDestrPredU<SveBicPred>( + size, machInst, zdn, zm, pg); + default: + return new Unknown64(machInst); + } + } + } + return new Unknown64(machInst); + } // decodeSveArithBinPred + + StaticInstPtr + decodeSveIntReduc(ExtMachInst machInst) + { + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + uint8_t size = bits(machInst, 23, 22); + + switch (bits(machInst, 20, 19)) { + case 0x0: + { + uint8_t u = bits(machInst, 16); + uint8_t opc = bits(machInst, 18, 17); + if (opc != 0x0 || (!u && size == 0x3)) { + return new Unknown64(machInst); + } else { + return decodeSveWideningReduc<SveSaddv, SveUaddv>( + size, u, machInst, vd, zn, pg); + } + } + case 0x1: + { + uint8_t u = bits(machInst, 16); + uint8_t opc = bits(machInst, 18, 17); + switch (opc) { + case 0x0: + return decodeSveUnaryPred<SveSmaxv, SveUmaxv>( + size, u, machInst, vd, zn, pg); + case 0x1: + return decodeSveUnaryPred<SveSminv, SveUminv>( + size, u, machInst, vd, zn, pg); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + uint8_t opc = bits(machInst, 18, 17); + uint8_t merge = bits(machInst, 16); + switch (opc) { + case 0x0: + if (merge) { + return decodeSveUnaryPredU<SveMovprfxPredM>( + size, machInst, vd /* zd */, zn, pg); + } else { + return decodeSveUnaryPredU<SveMovprfxPredZ>( + size, machInst, vd /* zd */, zn, pg); + } + default: + return new Unknown64(machInst); + } + } + case 0x3: + { + uint8_t opc = bits(machInst, 18, 16); + switch (opc) { + case 0x0: + return decodeSveUnaryPredU<SveOrv>( + size, machInst, vd, zn, pg); + case 0x1: + return decodeSveUnaryPredU<SveEorv>( + size, machInst, vd, zn, pg); + case 0x2: + return decodeSveUnaryPredU<SveAndv>( + size, machInst, vd, zn, pg); + default: + return new Unknown64(machInst); + } + } + } + return new Unknown64(machInst); + } // decodeSveIntReduc + + StaticInstPtr + decodeSveIntMulAdd(ExtMachInst machInst) + { + IntRegIndex zda = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + uint8_t size = bits(machInst, 23, 22); + uint8_t opc = (bits(machInst, 15) << 1) | bits(machInst, 13); + switch (opc) { + case 0x0: + return decodeSveTerPredS<SveMla>( + size, machInst, zda, zn, zm, pg); + case 0x1: + return decodeSveTerPredS<SveMls>( + size, machInst, zda, zn, zm, pg); + case 0x2: + return decodeSveTerPredS<SveMad>( + size, machInst, zda /* zdn */, zn /* za */, zm, pg); + case 0x3: + return decodeSveTerPredS<SveMsb>( + size, machInst, zda /* zdn */, zn /* za */, zm, pg); + } + return new Unknown64(machInst); + } // decodeSveIntMulAdd + + StaticInstPtr + decodeSveShiftByImmPred0(ExtMachInst machInst) + { + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + uint8_t imm3 = (uint8_t) bits(machInst, 7, 5); + + uint8_t tsize = (bits(machInst, 23, 22) << 2) | bits(machInst, 9, 8); + uint8_t esize = 0; + uint8_t size = 0; + + if (tsize == 0x0) { + return new Unknown64(machInst); + } else if (tsize == 0x1) { + esize = 8; + } else if ((tsize & 0x0E) == 0x2) { + esize = 16; + size = 1; + } else if ((tsize & 0x0C) == 0x4) { + esize = 32; + size = 2; + } else if ((tsize & 0x08) == 0x8) { + esize = 64; + size = 3; + } + + uint8_t opc = bits(machInst, 18, 16); + switch (opc) { + case 0x0: + { + unsigned shiftAmt = 2 * esize - ((tsize << 3) | imm3); + return decodeSveBinImmPredU<SveAsrImmPred>( + size, machInst, zdn, shiftAmt, pg); + } + case 0x01: + { + unsigned shiftAmt = 2 * esize - ((tsize << 3) | imm3); + return decodeSveBinImmPredU<SveLsrImmPred>( + size, machInst, zdn, shiftAmt, pg); + } + case 0x03: + { + unsigned shiftAmt = ((tsize << 3) | imm3) - esize; + return decodeSveBinImmPredU<SveLslImmPred>( + size, machInst, zdn, shiftAmt, pg); + } + case 0x04: + { + unsigned shiftAmt = 2 * esize - ((tsize << 3) | imm3); + return decodeSveBinImmPredS<SveAsrd>( + size, machInst, zdn, shiftAmt, pg); + } + } + return new Unknown64(machInst); + } // decodeSveShiftByImmPred0 + + StaticInstPtr + decodeSveShiftByVectorPred(ExtMachInst machInst) + { + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + uint8_t size = bits(machInst, 23, 22); + uint8_t opc = bits(machInst, 18, 16); + switch (opc) { + case 0: + return decodeSveBinDestrPredU<SveAsrPred>( + size, machInst, zdn, zm, pg); + case 1: + return decodeSveBinDestrPredU<SveLsrPred>( + size, machInst, zdn, zm, pg); + case 3: + return decodeSveBinDestrPredU<SveLslPred>( + size, machInst, zdn, zm, pg); + case 4: + return decodeSveBinDestrPredU<SveAsrr>( + size, machInst, zdn, zm, pg); + case 5: + return decodeSveBinDestrPredU<SveLsrr>( + size, machInst, zdn, zm, pg); + case 7: + return decodeSveBinDestrPredU<SveLslr>( + size, machInst, zdn, zm, pg); + } + return new Unknown64(machInst); + } // decodeSveShiftByVectorPred + + StaticInstPtr + decodeSveShiftByWideElemsPred(ExtMachInst machInst) + { + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + uint8_t size = bits(machInst, 23, 22); + uint8_t opc = bits(machInst, 18, 16); + switch (opc) { + case 0x0: + return decodeSveBinDestrPredU<SveAsrWidePred>( + size, machInst, zdn, zm, pg); + case 0x1: + return decodeSveBinDestrPredU<SveLsrWidePred>( + size, machInst, zdn, zm, pg); + case 0x3: + return decodeSveBinDestrPredU<SveLslWidePred>( + size, machInst, zdn, zm, pg); + } + return new Unknown64(machInst); + } // decodeSveShiftByWideElemsPred + + StaticInstPtr + decodeSveShiftByImmPred(ExtMachInst machInst) + { + uint8_t b20_19 = bits(machInst, 20, 19); + uint8_t b23_22 = bits(machInst, 23, 22); + + if (b20_19 == 0x0) { + return decodeSveShiftByImmPred0(machInst); + } else if (b20_19 == 0x2) { + return decodeSveShiftByVectorPred(machInst); + } else if (b20_19 == 0x3 && b23_22 != 0x3) { + return decodeSveShiftByWideElemsPred(machInst); + } + return new Unknown64(machInst); + } // decodeSveShiftByImmPred + + StaticInstPtr + decodeSveIntArithUnaryPred(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + unsigned esize = bits(machInst, 23, 22); + uint8_t opg = bits(machInst, 20, 19); + uint8_t opc = bits(machInst, 18, 16); + if (opg == 0x2) { + bool unsig = static_cast<bool>(opc & 1); + switch (opc) { + case 0: + case 1: + if (esize == 0) break; + if (unsig) { + return decodeSveUnaryExtendFromBPredU<SveUxtb>( + esize, machInst, zd, zn, pg); + } else { + return decodeSveUnaryExtendFromBPredU<SveSxtb>( + esize, machInst, zd, zn, pg); + } + case 2: + case 3: + if (esize < 2) break; + if (unsig) { + return decodeSveUnaryExtendFromHPredU<SveUxth>( + esize, machInst, zd, zn, pg); + } else { + return decodeSveUnaryExtendFromHPredU<SveSxth>( + esize, machInst, zd, zn, pg); + } + case 4: + case 5: + if (esize != 3) break; + if (unsig) { + return new SveUxtw<uint32_t, uint64_t>( + machInst, zd, zn, pg); + } else { + return new SveSxtw<uint32_t, uint64_t>( + machInst, zd, zn, pg); + } + case 6: + return decodeSveUnaryPredS<SveAbs>( + esize, machInst, zd, zn, pg); + case 7: + return decodeSveUnaryPredS<SveNeg>( + esize, machInst, zd, zn, pg); + } + } else if (opg == 0x3) { + switch (opc) { + case 0: + return decodeSveUnaryPredS<SveCls>( + esize, machInst, zd, zn, pg); + case 1: + return decodeSveUnaryPredS<SveClz>( + esize, machInst, zd, zn, pg); + case 2: + return decodeSveUnaryPredU<SveCnt>( + esize, machInst, zd, zn, pg); + case 3: + return decodeSveUnaryPredU<SveCnot>( + esize, machInst, zd, zn, pg); + case 4: + return decodeSveUnaryPredF<SveFabs>( + esize, machInst, zd, zn, pg); + case 5: + return decodeSveUnaryPredF<SveFneg>( + esize, machInst, zd, zn, pg); + case 6: + return decodeSveUnaryPredU<SveNot>( + esize, machInst, zd, zn, pg); + break; + } + } + return new Unknown64(machInst); + } // decodeSveIntArithUnaryPred + + StaticInstPtr + decodeSveIntArithUnpred(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t opc = (uint8_t) bits(machInst, 12, 10); + uint8_t size = (uint8_t) bits(machInst, 23, 22); + + switch (opc) { + case 0x0: + return decodeSveBinUnpredU<SveAddUnpred>(size, machInst, + zd, zn, zm); + case 0x1: + return decodeSveBinUnpredU<SveSubUnpred>(size, machInst, + zd, zn, zm); + case 0x4: + return decodeSveBinUnpredS<SveSqadd>(size, machInst, + zd, zn, zm); + case 0x5: + return decodeSveBinUnpredU<SveUqadd>(size, machInst, + zd, zn, zm); + case 0x6: + return decodeSveBinUnpredS<SveSqsub>(size, machInst, + zd, zn, zm); + case 0x7: + return decodeSveBinUnpredU<SveUqsub>(size, machInst, + zd, zn, zm); + } + + return new Unknown64(machInst); + } // decodeSveIntArithUnpred + + StaticInstPtr + decodeSveIntLogUnpred(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t opc = (uint8_t) (bits(machInst, 23, 22) << 3 + | bits(machInst, 12, 10)); + + switch (opc) { + case 0x4: + return new SveAndUnpred<uint64_t>(machInst, zd, zn, zm); + case 0xc: + return new SveOrrUnpred<uint64_t>(machInst, zd, zn, zm); + case 0x14: + return new SveEorUnpred<uint64_t>(machInst, zd, zn, zm); + case 0x1c: + return new SveBicUnpred<uint64_t>(machInst, zd, zn, zm); + } + + return new Unknown64(machInst); + } // decodeSveIntLogUnpred + + StaticInstPtr + decodeSveIndexGen(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + uint8_t size = (uint8_t) bits(machInst, 23, 22); + uint8_t grp = (uint8_t) bits(machInst, 11, 10); + + switch (grp) { + case 0: + { // INDEX (immediate) + int8_t imm5 = sext<5>(bits(machInst, 9, 5)); + int8_t imm5b = sext<5>(bits(machInst, 20, 16)); + switch (size) { + case 0: + return new SveIndexII<int8_t>(machInst, + zd, imm5, imm5b); + case 1: + return new SveIndexII<int16_t>(machInst, + zd, imm5, imm5b); + case 2: + return new SveIndexII<int32_t>(machInst, + zd, imm5, imm5b); + case 3: + return new SveIndexII<int64_t>(machInst, + zd, imm5, imm5b); + } + } + case 1: + { // INDEX (scalar, immediate) + int8_t imm5 = sext<5>(bits(machInst, 20, 16)); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits( + machInst, 9, 5); + switch (size) { + case 0: + return new SveIndexRI<int8_t>(machInst, + zd, zn, imm5); + case 1: + return new SveIndexRI<int16_t>(machInst, + zd, zn, imm5); + case 2: + return new SveIndexRI<int32_t>(machInst, + zd, zn, imm5); + case 3: + return new SveIndexRI<int64_t>(machInst, + zd, zn, imm5); + } + } + case 2: + { // INDEX (immediate, scalar) + int8_t imm5 = sext<5>(bits(machInst, 9, 5)); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits( + machInst, 20, 16); + switch (size) { + case 0: + return new SveIndexIR<int8_t>(machInst, + zd, imm5, zm); + case 1: + return new SveIndexIR<int16_t>(machInst, + zd, imm5, zm); + case 2: + return new SveIndexIR<int32_t>(machInst, + zd, imm5, zm); + case 3: + return new SveIndexIR<int64_t>(machInst, + zd, imm5, zm); + } + } + case 3: + { // INDEX (scalars) + IntRegIndex zn = (IntRegIndex) (uint8_t) bits( + machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits( + machInst, 20, 16); + switch (size) { + case 0: + return new SveIndexRR<int8_t>(machInst, + zd, zn, zm); + case 1: + return new SveIndexRR<int16_t>(machInst, + zd, zn, zm); + case 2: + return new SveIndexRR<int32_t>(machInst, + zd, zn, zm); + case 3: + return new SveIndexRR<int64_t>(machInst, + zd, zn, zm); + } + } + } + return new Unknown64(machInst); + } // decodeSveIndexGen + + StaticInstPtr + decodeSveStackAlloc(ExtMachInst machInst) + { + uint8_t b23_22 = bits(machInst, 23, 22); + uint8_t b11 = bits(machInst, 11); + if ((b23_22 & 0x2) == 0x0 && b11 == 0x0) { + IntRegIndex rd = makeSP( + (IntRegIndex) (uint8_t) bits(machInst, 4, 0)); + IntRegIndex rn = makeSP( + (IntRegIndex) (uint8_t) bits(machInst, 20, 16)); + uint64_t imm = sext<6>(bits(machInst, 10, 5)); + if ((b23_22 & 0x1) == 0x0) { + return new AddvlXImm(machInst, rd, rn, imm); + } else { + return new AddplXImm(machInst, rd, rn, imm); + } + } else if (b23_22 == 0x2 && b11 == 0x0) { + IntRegIndex rd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + uint64_t imm = sext<6>(bits(machInst, 10, 5)); + if (bits(machInst, 20, 16) == 0x1f) { + return new SveRdvl(machInst, rd, imm); + } + } + return new Unknown64(machInst); + } // decodeSveStackAlloc + + StaticInstPtr + decodeSveShiftByWideElemsUnpred(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t size = bits(machInst, 23, 22); + uint8_t opc = (uint8_t) bits(machInst, 11, 10); + switch (opc) { + case 0x0: + return decodeSveBinUnpredU<SveAsrWideUnpred>( + size, machInst, zd, zn, zm); + case 0x1: + return decodeSveBinUnpredU<SveLsrWideUnpred>( + size, machInst, zd, zn, zm); + case 0x3: + return decodeSveBinUnpredU<SveLslWideUnpred>( + size, machInst, zd, zn, zm); + } + return new Unknown64(machInst); + } // decodeSveShiftByWideElemsUnpred + + StaticInstPtr + decodeSveShiftByImmUnpredB(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + uint8_t imm3 = (uint8_t) bits(machInst, 18, 16); + + uint8_t tsize = (bits(machInst, 23, 22) << 2) | bits(machInst, 20, 19); + uint8_t esize = 0; + uint8_t size = 0; + if (tsize == 0x0) { + return new Unknown64(machInst); + } else if (tsize == 0x1) { + esize = 8; + } else if ((tsize & 0x0E) == 0x2) { + esize = 16; + size = 1; + } else if ((tsize & 0x0C) == 0x4) { + esize = 32; + size = 2; + } else if ((tsize & 0x08) == 0x8) { + esize = 64; + size = 3; + } + + uint8_t opc = bits(machInst, 11, 10); + switch (opc) { + case 0x00: + { + unsigned shiftAmt = 2 * esize - ((tsize << 3) | imm3); + return decodeSveBinImmUnpredU<SveAsrImmUnpred>( + size, machInst, zd, zn, shiftAmt); + } + case 0x01: + { + unsigned shiftAmt = 2 * esize - ((tsize << 3) | imm3); + return decodeSveBinImmUnpredU<SveLsrImmUnpred>( + size, machInst, zd, zn, shiftAmt); + } + case 0x03: + { + unsigned shiftAmt = ((tsize << 3) | imm3) - esize; + return decodeSveBinImmUnpredU<SveLslImmUnpred>( + size, machInst, zd, zn, shiftAmt); + } + } + + return new Unknown64(machInst); + } // decodeSveShiftByImmUnpredB + + StaticInstPtr + decodeSveShiftByImmUnpred(ExtMachInst machInst) + { + if (bits(machInst, 12)) { + return decodeSveShiftByImmUnpredB(machInst); + } else { + return decodeSveShiftByWideElemsUnpred(machInst); + } + return new Unknown64(machInst); + } // decodeSveShiftByImmUnpred + + StaticInstPtr + decodeSveCompVecAddr(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t mult = 1 << bits(machInst, 11, 10); + + uint8_t opc = bits(machInst, 23, 22); + + switch (opc) { + case 0x0: + return new SveAdr<uint64_t>(machInst, zd, zn, zm, mult, + SveAdr<uint64_t>::SveAdrOffsetUnpackedSigned); + case 0x1: + return new SveAdr<uint64_t>(machInst, zd, zn, zm, mult, + SveAdr<uint64_t>::SveAdrOffsetUnpackedUnsigned); + case 0x2: + return new SveAdr<uint32_t>(machInst, zd, zn, zm, mult, + SveAdr<uint32_t>::SveAdrOffsetPacked); + case 0x3: + return new SveAdr<uint64_t>(machInst, zd, zn, zm, mult, + SveAdr<uint64_t>::SveAdrOffsetPacked); + } + return new Unknown64(machInst); + } // decodeSveCompVecAddr + + StaticInstPtr + decodeSveIntMiscUnpred(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size = bits(machInst, 23, 22); + uint8_t opc = bits(machInst, 11, 10); + switch (opc) { + case 0x0: + // SVE floating-point trig select coefficient + { + if (size == 0) { + break; + } + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, + 20, 16); + return decodeSveBinUnpredF<SveFtssel>( + size, machInst, zd, zn, zm); + } + case 0x2: + // SVE floating-point exponential accelerator + if (size == 0) { + break; + } + return decodeSveUnaryUnpredF<SveFexpa>(size, machInst, zd, zn); + case 0x3: + // SVE constructive prefix (unpredicated) + if (size == 0x0 && bits(machInst, 20, 16) == 0x0) { + return new SveMovprfxUnpred<uint64_t>(machInst, zd, zn); + } + break; + } + return new Unknown64(machInst); + } // decodeSveIntMiscUnpred + + StaticInstPtr + decodeSveElemCount(ExtMachInst machInst) + { + uint8_t opc20 = (uint8_t) bits(machInst, 20); + uint8_t b13_12 = (uint8_t) bits(machInst, 13, 12); + uint8_t opc11 = (uint8_t) bits(machInst, 11); + uint8_t opc10 = (uint8_t) bits(machInst, 10); + uint8_t opc11_10 = (uint8_t) bits(machInst, 11, 10); + if (b13_12 == 0) { + uint8_t pattern = (uint8_t) bits(machInst, 9, 5); + uint8_t imm4 = (uint8_t) bits(machInst, 19, 16) + 1; + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + unsigned size = (unsigned) bits(machInst, 23, 22); + if (opc20) { + if (opc11 == 0) { + if (opc10) { + return decodeSveElemIntCountLU<SveDecv>(size, + machInst, zdn, pattern, imm4); + } else { + return decodeSveElemIntCountLU<SveIncv>(size, + machInst, zdn, pattern, imm4); + } + } + } else { + if (opc11) { + if (opc10) { + return decodeSveElemIntCountLU<SveUqdecv>(size, + machInst, zdn, pattern, imm4); + } else { + return decodeSveElemIntCountLS<SveSqdecv>(size, + machInst, zdn, pattern, imm4); + } + } else { + if (opc10) { + return decodeSveElemIntCountLU<SveUqincv>(size, + machInst, zdn, pattern, imm4); + } else { + return decodeSveElemIntCountLS<SveSqincv>(size, + machInst, zdn, pattern, imm4); + } + } + } + } else if (b13_12 == 3) { + uint8_t pattern = (uint8_t) bits(machInst, 9, 5); + uint8_t imm4 = (uint8_t) bits(machInst, 19, 16) + 1; + IntRegIndex rdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + unsigned size = (unsigned) bits(machInst, 23, 22); + switch (opc11_10) { + case 0: + if (opc20) { + return decodeSveElemIntCountS<SveSqinc>(size, + machInst, rdn, pattern, imm4); + } else { + return decodeSveElemIntCountS<SveSqinc32>(size, + machInst, rdn, pattern, imm4); + } + case 1: + if (opc20) { + return decodeSveElemIntCountU<SveUqinc>(size, + machInst, rdn, pattern, imm4); + } else { + return decodeSveElemIntCountU<SveUqinc32>(size, + machInst, rdn, pattern, imm4); + } + case 2: + if (opc20) { + return decodeSveElemIntCountS<SveSqdec>(size, + machInst, rdn, pattern, imm4); + } else { + return decodeSveElemIntCountS<SveSqdec32>(size, + machInst, rdn, pattern, imm4); + } + case 3: + if (opc20) { + return decodeSveElemIntCountU<SveUqdec>(size, + machInst, rdn, pattern, imm4); + } else { + return decodeSveElemIntCountU<SveUqdec32>(size, + machInst, rdn, pattern, imm4); + } + } + } else if (opc20 && b13_12 == 2 && !(opc11_10 & 0x2)) { + uint8_t pattern = (uint8_t) bits(machInst, 9, 5); + uint8_t imm4 = (uint8_t) bits(machInst, 19, 16) + 1; + IntRegIndex rdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + unsigned size = (unsigned) bits(machInst, 23, 22); + if (opc11_10 & 0x1) { + return decodeSveElemIntCountU<SveDec>(size, machInst, + rdn, pattern, imm4); + } else { + return decodeSveElemIntCountU<SveInc>(size, machInst, + rdn, pattern, imm4); + } + } else if (!opc20 && b13_12 == 2 && opc11_10 == 0) { + uint8_t pattern = (uint8_t) bits(machInst, 9, 5); + uint8_t imm4 = (uint8_t) bits(machInst, 19, 16) + 1; + IntRegIndex rd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + unsigned size = (unsigned) bits(machInst, 23, 22); + return decodeSveElemIntCountU<SveCntx>(size, machInst, + rd, pattern, imm4); + } + return new Unknown64(machInst); + } // decodeSveElemCount + + StaticInstPtr + decodeSveLogMaskImm(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + bool n = bits(machInst, 17); + uint8_t immr = bits(machInst, 16, 11); + uint8_t imms = bits(machInst, 10, 5); + + // Decode bitmask + // len = MSB(n:NOT(imms)), len < 1 is undefined + uint8_t len = 0; + if (n) { + len = 6; + } else if (imms == 0x3f || imms == 0x3e) { + return new Unknown64(machInst); + } else { + len = findMsbSet(imms ^ 0x3f); + } + // Generate r, s, and size + uint64_t r = bits(immr, len - 1, 0); + uint64_t s = bits(imms, len - 1, 0); + uint8_t size = 1 << len; + if (s == size - 1) + return new Unknown64(machInst); + // Generate the pattern with s 1s, rotated by r, with size bits + uint64_t pattern = mask(s + 1); + if (r) { + pattern = (pattern >> r) | (pattern << (size - r)); + pattern &= mask(size); + } + // Replicate that to fill up the immediate + for (unsigned i = 1; i < (64 / size); i *= 2) + pattern |= (pattern << (i * size)); + uint64_t imm = pattern; + + if (bits(machInst, 19, 18) == 0x0) { + if (bits(machInst, 23, 22) == 0x3) { + return new SveDupm<uint64_t>(machInst, zd, imm); + } else { + switch (bits(machInst, 23, 22)) { + case 0x0: + return new SveOrrImm<uint64_t>(machInst, zd, imm); + case 0x1: + return new SveEorImm<uint64_t>(machInst, zd, imm); + case 0x2: + return new SveAndImm<uint64_t>(machInst, zd, imm); + } + } + } + + return new Unknown64(machInst); + } // decodeSveLogMaskImm + + StaticInstPtr + decodeSveIntWideImmPred(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + uint8_t size = bits(machInst, 23, 22); + + if (bits(machInst, 15) == 0x0) { + uint64_t imm = bits(machInst, 12, 5); + uint8_t sh = bits(machInst, 13); + uint8_t m = bits(machInst, 14); + if (sh) { + if (size == 0x0) { + return new Unknown64(machInst); + } + imm <<= 8; + } + if (m) { + if (sh) { + return decodeSveWideImmPredU<SveCpyImmMerge>( + size, machInst, zd, sext<16>(imm), pg); + } else { + return decodeSveWideImmPredU<SveCpyImmMerge>( + size, machInst, zd, sext<8>(imm), pg); + } + } else { + if (sh) { + return decodeSveWideImmPredU<SveCpyImmZero>( + size, machInst, zd, sext<16>(imm), pg, + false /* isMerging */); + } else { + return decodeSveWideImmPredU<SveCpyImmZero>( + size, machInst, zd, sext<8>(imm), pg, + false /* isMerging */); + } + } + } else if (bits(machInst, 15, 13) == 0x6 && size != 0x0) { + uint64_t imm = vfp_modified_imm(bits(machInst, 12, 5), + decode_fp_data_type(size)); + return decodeSveWideImmPredF<SveFcpy>( + size, machInst, zd, imm, pg); + } + + return new Unknown64(machInst); + } // decodeSveIntWideImmPred + + StaticInstPtr + decodeSvePermExtract(ExtMachInst machInst) + { + uint8_t b23_22 = (unsigned) bits(machInst, 23, 22); + if (!b23_22) { + uint8_t position = + bits(machInst, 20, 16) << 3 | bits(machInst, 12, 10); + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + return new SveExt<uint8_t>(machInst, zdn, zm, position); + } + return new Unknown64(machInst); + } // decodeSvePermExtract + + StaticInstPtr + decodeSvePermUnpred(ExtMachInst machInst) + { + uint8_t b12_10 = bits(machInst, 12, 10); + if (b12_10 == 0x4) { + unsigned size = (unsigned) bits(machInst, 23, 22); + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn, zm); + } else if (bits(machInst, 20, 16) == 0x0 && b12_10 == 0x6) { + uint8_t size = bits(machInst, 23, 22); + IntRegIndex rn = makeSP( + (IntRegIndex) (uint8_t) bits(machInst, 9, 5)); + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + return decodeSveUnaryUnpredU<SveDupScalar>(size, machInst, zd, rn); + } else if (bits(machInst, 20, 16) == 0x4 && b12_10 == 0x6) { + uint8_t size = bits(machInst, 23, 22); + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + return decodeSveUnaryUnpredU<SveInsr>(size, machInst, zdn, rm); + } else if (bits(machInst, 20, 16) == 0x14 && b12_10 == 0x6) { + uint8_t size = bits(machInst, 23, 22); + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + return decodeSveUnaryUnpredU<SveInsrf>(size, machInst, zdn, vm); + } else if (bits(machInst, 20, 16) == 0x18 && b12_10 == 0x6) { + uint8_t size = bits(machInst, 23, 22); + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + return decodeSveUnaryUnpredU<SveRevv>(size, machInst, zd, zn); + } else if (b12_10 == 0x0 && bits(machInst, 20, 16) != 0x0) { + uint8_t imm = + bits(machInst, 23, 22) << 5 | // imm3h + bits(machInst, 20) << 4 | // imm3l + bits(machInst, 19, 16); // tsz + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + if (imm & 0x1) { + imm >>= 1; + return new SveDupIdx<uint8_t>(machInst, zd, zn, imm); + } else if (imm & 0x2) { + imm >>= 2; + return new SveDupIdx<uint16_t>(machInst, zd, zn, imm); + } else if (imm & 0x4) { + imm >>= 3; + return new SveDupIdx<uint32_t>(machInst, zd, zn, imm); + } else if (imm & 0x8) { + imm >>= 4; + return new SveDupIdx<uint64_t>(machInst, zd, zn, imm); + } else if (imm & 0x10) { + imm >>= 5; + return new SveDupIdx<__uint128_t>(machInst, zd, zn, imm); + } + return new Unknown64(machInst); + } else if (bits(machInst, 23, 22) != 0x0 && + bits(machInst, 20, 18) == 0x4 && b12_10 == 0x6) { + unsigned size = (unsigned) bits(machInst, 23, 22); + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + if (bits(machInst, 17)) { + if (bits(machInst, 16)) { + return decodeSveUnpackU<SveUunpkhi>(size, machInst, + zd, zn); + } else { + return decodeSveUnpackU<SveUunpklo>(size, machInst, + zd, zn); + } + } else { + if (bits(machInst, 16)) { + return decodeSveUnpackS<SveSunpkhi>(size, machInst, + zd, zn); + } else { + return decodeSveUnpackS<SveSunpklo>(size, machInst, + zd, zn); + } + } + } + return new Unknown64(machInst); + } // decodeSvePermUnpred + + StaticInstPtr + decodeSvePermPredicates(ExtMachInst machInst) + { + if (bits(machInst, 20) == 0x0 && bits(machInst, 12, 11) != 0x3 && + bits(machInst, 9) == 0x0 && bits(machInst, 4) == 0x0) { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t size = bits(machInst, 23, 22); + + uint8_t opc = bits(machInst, 12, 10); + + switch (opc) { + case 0x0: + return decodeSveBinUnpredU<SveZip1Pred>(size, + machInst, zd, zn, zm); + case 0x1: + return decodeSveBinUnpredU<SveZip2Pred>(size, + machInst, zd, zn, zm); + case 0x2: + return decodeSveBinUnpredU<SveUzp1Pred>(size, + machInst, zd, zn, zm); + case 0x3: + return decodeSveBinUnpredU<SveUzp2Pred>(size, + machInst, zd, zn, zm); + case 0x4: + return decodeSveBinUnpredU<SveTrn1Pred>(size, + machInst, zd, zn, zm); + case 0x5: + return decodeSveBinUnpredU<SveTrn2Pred>(size, + machInst, zd, zn, zm); + } + } else if (bits(machInst, 23, 22) == 0x0 && + bits(machInst, 20, 17) == 0x8 && bits(machInst, 12, 9) == 0x0 + && bits(machInst, 4) == 0x0) { + IntRegIndex pd = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); + IntRegIndex pn = (IntRegIndex) (uint8_t) bits(machInst, 8, 5); + if (bits(machInst, 16)) { + return new SvePunpkhi<uint8_t, uint16_t>(machInst, pd, pn); + } else { + return new SvePunpklo<uint8_t, uint16_t>(machInst, pd, pn); + } + } else if (bits(machInst, 20, 16) == 0x14 && + bits(machInst, 12, 9) == 0x00 && bits(machInst, 4) == 0) { + uint8_t size = bits(machInst, 23, 22); + IntRegIndex pd = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); + IntRegIndex pn = (IntRegIndex) (uint8_t) bits(machInst, 8, 5); + return decodeSveUnaryUnpredU<SveRevp>(size, machInst, pd, pn); + } + return new Unknown64(machInst); + } // decodeSvePermPredicates + + StaticInstPtr + decodeSvePermIntlv(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t size = bits(machInst, 23, 22); + + uint8_t opc = bits(machInst, 12, 10); + + switch (opc) { + case 0x0: + return decodeSveBinUnpredU<SveZip1>(size, machInst, zd, zn, zm); + case 0x1: + return decodeSveBinUnpredU<SveZip2>(size, machInst, zd, zn, zm); + case 0x2: + return decodeSveBinUnpredU<SveUzp1>(size, machInst, zd, zn, zm); + case 0x3: + return decodeSveBinUnpredU<SveUzp2>(size, machInst, zd, zn, zm); + case 0x4: + return decodeSveBinUnpredU<SveTrn1>(size, machInst, zd, zn, zm); + case 0x5: + return decodeSveBinUnpredU<SveTrn2>(size, machInst, zd, zn, zm); + } + return new Unknown64(machInst); + } // decodeSvePermIntlv + + StaticInstPtr + decodeSvePermPred(ExtMachInst machInst) + { + uint8_t b13 = bits(machInst, 13); + uint8_t b23 = bits(machInst, 23); + switch (bits(machInst, 20, 16)) { + case 0x0: + if (!b13) { + uint8_t size = bits(machInst, 23, 22); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + IntRegIndex vn = (IntRegIndex)(uint8_t) bits(machInst, 9, 5); + IntRegIndex zd = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + return decodeSveUnaryPredU<SveCpySimdFpScalar>(size, + machInst, zd, vn, pg); + } + break; + case 0x1: + if (!b13 && b23) { + // sve_int_perm_compact + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + IntRegIndex zn = (IntRegIndex)(uint8_t) bits(machInst, 9, 5); + IntRegIndex zd = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + if (bits(machInst, 22)) { + return new SveCompact<uint64_t>(machInst, zd, zn, pg); + } else { + return new SveCompact<uint32_t>(machInst, zd, zn, pg); + } + } + break; + case 0x8: + if (b13) { + uint8_t size = bits(machInst, 23, 22); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + IntRegIndex rn = makeSP( + (IntRegIndex)(uint8_t) bits(machInst, 9, 5)); + IntRegIndex zd = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + return decodeSveUnaryPredU<SveCpyScalar>(size, + machInst, zd, rn, pg); + } + break; + case 0xC: + if (!b13) { + uint8_t size = bits(machInst, 23, 22); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + IntRegIndex zdn = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + IntRegIndex zm = (IntRegIndex)(uint8_t) bits(machInst, 9, 5); + return decodeSveBinDestrPredU<SveSplice>(size, machInst, + zdn, zm, pg); + } + break; + } + switch (bits(machInst, 20, 17)) { + case 0x0: + if (b13) { + uint8_t AB = bits(machInst, 16); + uint8_t size = bits(machInst, 23, 22); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + IntRegIndex zn = (IntRegIndex)(uint8_t) bits(machInst, 9, 5); + IntRegIndex rd = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + if (!AB) { + return decodeSveUnaryPredU<SveLasta>(size, + machInst, rd, zn, pg); + } else { + return decodeSveUnaryPredU<SveLastb>(size, + machInst, rd, zn, pg); + } + } + break; + case 0x1: + if (!b13) { + uint8_t AB = bits(machInst, 16); + uint8_t size = bits(machInst, 23, 22); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + IntRegIndex zn = (IntRegIndex)(uint8_t) bits(machInst, 9, 5); + IntRegIndex vd = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + if (!AB) { + return decodeSveUnaryPredU<SveLastaf>(size, + machInst, vd, zn, pg); + } else { + return decodeSveUnaryPredU<SveLastbf>(size, + machInst, vd, zn, pg); + } + } + break; + case 0x4: + if (!b13) { + uint8_t AB = bits(machInst, 16); + uint8_t size = bits(machInst, 23, 22); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + IntRegIndex zm = (IntRegIndex)(uint8_t) bits(machInst, 9, 5); + IntRegIndex zdn = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + if (!AB) { + return decodeSveUnaryPredU<SveClastav>(size, + machInst, zdn, zm, pg); + } else { + return decodeSveUnaryPredU<SveClastbv>(size, + machInst, zdn, zm, pg); + } + } + break; + case 0x5: + if (!b13) { + uint8_t AB = bits(machInst, 16); + uint8_t size = bits(machInst, 23, 22); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + IntRegIndex zm = (IntRegIndex)(uint8_t) bits(machInst, 9, 5); + IntRegIndex zdn = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + if (!AB) { + return decodeSveUnaryPredU<SveClastaf>(size, + machInst, zdn, zm, pg); + } else { + return decodeSveUnaryPredU<SveClastbf>(size, + machInst, zdn, zm, pg); + } + } + break; + case 0x8: + if (b13) { + uint8_t AB = bits(machInst, 16); + uint8_t size = bits(machInst, 23, 22); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + IntRegIndex zm = (IntRegIndex)(uint8_t) bits(machInst, 9, 5); + IntRegIndex rdn = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + if (!AB) { + return decodeSveUnaryPredU<SveClasta>(size, + machInst, rdn, zm, pg); + } else { + return decodeSveUnaryPredU<SveClastb>(size, + machInst, rdn, zm, pg); + } + } + break; + } + if (bits(machInst, 20, 18) == 0x1 && !b13) { + unsigned size = (unsigned) bits(machInst, 23, 22); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + IntRegIndex zn = (IntRegIndex)(uint8_t) bits(machInst, 9, 5); + IntRegIndex zd = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + uint8_t opc17_16 = bits(machInst, 17, 16); + switch (opc17_16) { + case 0x00: + switch (size) { + case 1: + return new SveRevb<uint16_t>(machInst, zd, zn, pg); + case 2: + return new SveRevb<uint32_t>(machInst, zd, zn, pg); + case 3: + return new SveRevb<uint64_t>(machInst, zd, zn, pg); + } + break; + case 0x01: + switch (size) { + case 2: + return new SveRevh<uint32_t>(machInst, zd, zn, pg); + case 3: + return new SveRevh<uint64_t>(machInst, zd, zn, pg); + } + break; + case 0x02: + if (size == 3) { + return new SveRevw<uint64_t>(machInst, zd, zn, pg); + } + break; + case 0x03: + return decodeSveUnaryPredU<SveRbit>( + size, machInst, zd, zn, pg); + } + } + return new Unknown64(machInst); + } // decodeSvePermPred + + StaticInstPtr + decodeSveSelVec(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 13, 10); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t size = bits(machInst, 23, 22); + + return decodeSveBinConstrPredU<SveSel>(size, + machInst, zd, zn, zm, pg, SvePredType::SELECT); + } // decodeSveSelVec + + StaticInstPtr + decodeSveIntCmpVec(ExtMachInst machInst) + { + uint8_t size = bits(machInst, 23, 22); + uint8_t b14 = bits(machInst, 14); + uint8_t opc = + bits(machInst, 15) << 2 | + bits(machInst, 13) << 1 | + bits(machInst, 4); + IntRegIndex pd = (IntRegIndex) (uint8_t)bits(machInst, 3, 0); + IntRegIndex pg = (IntRegIndex) (uint8_t)bits(machInst, 12, 10); + IntRegIndex zn = (IntRegIndex) (uint8_t)bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t)bits(machInst, 20, 16); + if (b14 && size != 3) { + // sve_int_cmp_1 + switch (opc) { + case 0: + return decodeSveTerPredWS<SveCmpgew>(size, + machInst, pd, zn, zm, pg); + case 1: + return decodeSveTerPredWS<SveCmpgtw>(size, + machInst, pd, zn, zm, pg); + case 2: + return decodeSveTerPredWS<SveCmpltw>(size, + machInst, pd, zn, zm, pg); + case 3: + return decodeSveTerPredWS<SveCmplew>(size, + machInst, pd, zn, zm, pg); + case 4: + return decodeSveTerPredWU<SveCmphsw>(size, + machInst, pd, zn, zm, pg); + case 5: + return decodeSveTerPredWU<SveCmphiw>(size, + machInst, pd, zn, zm, pg); + case 6: + return decodeSveTerPredWU<SveCmplow>(size, + machInst, pd, zn, zm, pg); + case 7: + return decodeSveTerPredWU<SveCmplsw>(size, + machInst, pd, zn, zm, pg); + } + } else if (!b14) { + switch (opc) { + case 0: + return decodeSveTerPredU<SveCmphs>(size, + machInst, pd, zn, zm, pg); + case 1: + return decodeSveTerPredU<SveCmphi>(size, + machInst, pd, zn, zm, pg); + case 2: + if (size != 3) { + return decodeSveTerPredWU<SveCmpeqw>(size, + machInst, pd, zn, zm, pg); + } + break; + case 3: + if (size != 3) { + return decodeSveTerPredWU<SveCmpnew>(size, + machInst, pd, zn, zm, pg); + } + break; + case 4: + return decodeSveTerPredS<SveCmpge>(size, + machInst, pd, zn, zm, pg); + case 5: + return decodeSveTerPredS<SveCmpgt>(size, + machInst, pd, zn, zm, pg); + case 6: + return decodeSveTerPredU<SveCmpeq>(size, + machInst, pd, zn, zm, pg); + case 7: + return decodeSveTerPredU<SveCmpne>(size, + machInst, pd, zn, zm, pg); + } + } + return new Unknown64(machInst); + } // decodeSveIntCmpVec + + StaticInstPtr + decodeSveIntCmpUImm(ExtMachInst machInst) + { + uint8_t cmp = bits(machInst, 13) << 1 | bits(machInst, 4); + IntRegIndex pd = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + int64_t imm = (int64_t) bits(machInst, 20, 14); + uint8_t size = bits(machInst, 23, 22); + switch (cmp) { + case 0: + return decodeSveTerImmPredU<SveCmphsi>(size, + machInst, pd, zn, imm, pg); + case 1: + return decodeSveTerImmPredU<SveCmphii>(size, + machInst, pd, zn, imm, pg); + case 2: + return decodeSveTerImmPredU<SveCmploi>(size, + machInst, pd, zn, imm, pg); + case 3: + return decodeSveTerImmPredU<SveCmplsi>(size, + machInst, pd, zn, imm, pg); + } + return new Unknown64(machInst); + } // decodeSveIntCmpUImm + + StaticInstPtr + decodeSveIntCmpSImm(ExtMachInst machInst) + { + uint8_t opc = bits(machInst, 15) << 2 | bits(machInst, 13) << 1 | + bits(machInst, 4); + IntRegIndex pd = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + int64_t imm = sext<5>(bits(machInst, 20, 16)); + uint8_t size = bits(machInst, 23, 22); + switch (opc) { + case 0: + return decodeSveTerImmPredS<SveCmpgei>(size, + machInst, pd, zn, imm, pg); + case 1: + return decodeSveTerImmPredS<SveCmpgti>(size, + machInst, pd, zn, imm, pg); + case 2: + return decodeSveTerImmPredS<SveCmplti>(size, + machInst, pd, zn, imm, pg); + case 3: + return decodeSveTerImmPredS<SveCmplei>(size, + machInst, pd, zn, imm, pg); + case 4: + return decodeSveTerImmPredU<SveCmpeqi>(size, + machInst, pd, zn, imm, pg); + case 5: + return decodeSveTerImmPredU<SveCmpnei>(size, + machInst, pd, zn, imm, pg); + default: + return new Unknown64(machInst); + } + return new Unknown64(machInst); + } // decodeSveIntCmpSImm + + StaticInstPtr + decodeSvePredLogicalOps(ExtMachInst machInst) + { + IntRegIndex pd = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); + IntRegIndex pn = (IntRegIndex) (uint8_t) bits(machInst, 8, 5); + IntRegIndex pm = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 13, 10); + uint8_t opc = (bits(machInst, 23, 22) << 2) | + (bits(machInst, 9) << 1) | + bits(machInst, 4); + switch (opc) { + case 0x0: + return new SvePredAnd<uint8_t>(machInst, pd, pn, pm, pg); + case 0x1: + return new SvePredBic<uint8_t>(machInst, pd, pn, pm, pg); + case 0x2: + return new SvePredEor<uint8_t>(machInst, pd, pn, pm, pg); + case 0x3: + return new SvePredSel<uint8_t>(machInst, pd, pn, pm, pg, true); + case 0x4: + return new SvePredAnds<uint8_t>(machInst, pd, pn, pm, pg); + case 0x5: + return new SvePredBics<uint8_t>(machInst, pd, pn, pm, pg); + case 0x6: + return new SvePredEors<uint8_t>(machInst, pd, pn, pm, pg); + case 0x8: + return new SvePredOrr<uint8_t>(machInst, pd, pn, pm, pg); + case 0x9: + return new SvePredOrn<uint8_t>(machInst, pd, pn, pm, pg); + case 0xa: + return new SvePredNor<uint8_t>(machInst, pd, pn, pm, pg); + case 0xb: + return new SvePredNand<uint8_t>(machInst, pd, pn, pm, pg); + case 0xc: + return new SvePredOrrs<uint8_t>(machInst, pd, pn, pm, pg); + case 0xd: + return new SvePredOrns<uint8_t>(machInst, pd, pn, pm, pg); + case 0xe: + return new SvePredNors<uint8_t>(machInst, pd, pn, pm, pg); + case 0xf: + return new SvePredNands<uint8_t>(machInst, pd, pn, pm, pg); + } + + return new Unknown64(machInst); + } // decodeSvePredLogicalOps + + StaticInstPtr + decodeSvePropBreakFromPrevPartition(ExtMachInst machInst) + { + if (bits(machInst, 23) == 0x0 && bits(machInst, 9) == 0x0) { + uint8_t opc = (bits(machInst, 22) << 1) | bits(machInst, 4); + IntRegIndex pm = (IntRegIndex)(uint8_t) bits(machInst, 19, 16); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 13, 10); + IntRegIndex pn = (IntRegIndex)(uint8_t) bits(machInst, 8, 5); + IntRegIndex pd = (IntRegIndex)(uint8_t) bits(machInst, 3, 0); + switch (opc) { + case 0x0: + // BRKPA + return new SveBrkpa(machInst, pd, pn, pm, pg); + case 0x1: + // BRKPB + return new SveBrkpb(machInst, pd, pn, pm, pg); + case 0x2: + // BRKPAS + return new SveBrkpas(machInst, pd, pn, pm, pg); + case 0x3: + // BRKPBS + return new SveBrkpbs(machInst, pd, pn, pm, pg); + } + } + return new Unknown64(machInst); + } // decodeSvePropBreakFromPrevPartition + + StaticInstPtr + decodeSvePartitionBreakCond(ExtMachInst machInst) + { + if (bits(machInst, 18, 16) == 0x0 && bits(machInst, 9) == 0x0) { + bool flagset = bits(machInst, 22); + bool merging = bits(machInst, 4); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 13, 10); + IntRegIndex pn = (IntRegIndex)(uint8_t) bits(machInst, 8, 5); + IntRegIndex pd = (IntRegIndex)(uint8_t) bits(machInst, 3, 0); + if (bits(machInst, 23)) { + if (flagset) { + if (!merging) { + return new SveBrkbs(machInst, pd, pg, pn); + } + } else { + if (merging) { + return new SveBrkbm(machInst, pd, pg, pn); + } else { + return new SveBrkbz(machInst, pd, pg, pn); + } + } + } else { + if (flagset) { + if (!merging) { + return new SveBrkas(machInst, pd, pg, pn); + } + } else { + if (merging) { + return new SveBrkam(machInst, pd, pg, pn); + } else { + return new SveBrkaz(machInst, pd, pg, pn); + } + } + } + return new Unknown64(machInst); + } + return new Unknown64(machInst); + } // decodeSvePartitionBreakCond + + StaticInstPtr + decodeSvePredTest(ExtMachInst machInst) + { + if (bits(machInst, 23, 22) == 0x1 && + bits(machInst, 18, 16) == 0x0 && + bits(machInst, 9) == 0x0) { + IntRegIndex pn = (IntRegIndex) (uint8_t) bits(machInst, 8, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 13, 10); + return new SvePtest(machInst, pn, pg); + } + return new Unknown64(machInst); + } // decodeSvePredTest + + StaticInstPtr + decodeSvePredIteration(ExtMachInst machInst) + { + uint8_t size = bits(machInst, 23, 22); + uint8_t opc18_16 = bits(machInst, 18, 16); + uint8_t opc10_9 = bits(machInst, 10, 9); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 8, 5); + IntRegIndex pdn = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); + if (opc18_16 == 0x1 && opc10_9 == 0x2) { + return decodeSveUnaryPredU<SvePnext>(size, + machInst, pdn, pdn, pg); + } else if (size == 0x1 && opc18_16 == 0x0 && opc10_9 == 0) { + return new SvePfirst<uint8_t>(machInst, pdn, pdn, pg); + } + return new Unknown64(machInst); + } // decodeSvePredIteration + + StaticInstPtr + decodeSveInitPred(ExtMachInst machInst) + { + IntRegIndex pd = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); + unsigned size = bits(machInst, 23, 22); + uint8_t imm = bits(machInst, 9, 5); + + if (bits(machInst, 16) == 0x0) { + return decodeSvePtrue<SvePtrue>(size, machInst, pd, imm); + } else { + return decodeSvePtrue<SvePtrues>(size, machInst, pd, imm); + } + return new Unknown64(machInst); + } // decodeSveInitPred + + StaticInstPtr + decodeSveZeroPredReg(ExtMachInst machInst) + { + if (bits(machInst, 23, 22) == 0x0 && bits(machInst, 18, 16) == 0x0) { + IntRegIndex pd = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); + return new SvePfalse(machInst, pd); + } + return new Unknown64(machInst); + } // decodeSveZeroPredReg + + StaticInstPtr + decodeSvePropBreakToNextPartition(ExtMachInst machInst) + { + if (bits(machInst, 23) == 0x0 && + bits(machInst, 18, 16) == 0x0 && + bits(machInst, 9) == 0x0 && + bits(machInst, 4) == 0x0) { + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 13, 10); + IntRegIndex pn = (IntRegIndex)(uint8_t) bits(machInst, 8, 5); + IntRegIndex pdm = (IntRegIndex)(uint8_t) bits(machInst, 3, 0); + if (bits(machInst, 22) == 0x0) { + return new SveBrkn(machInst, pdm, pn, pdm, pg); + } else { + return new SveBrkns(machInst, pdm, pn, pdm, pg); + } + return new Unknown64(machInst); + } + return new Unknown64(machInst); + } // decodeSvePropBreakToNextPartition + + StaticInstPtr + decodeSveReadPredFromFFRPred(ExtMachInst machInst) + { + if (bits(machInst, 23)) { + return new Unknown64(machInst); + } + IntRegIndex pd = (IntRegIndex)(uint8_t) bits(machInst, 3, 0); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 8, 5); + if (bits(machInst, 22)) { + return new SveRdffrsPred(machInst, pd, pg); + } else { + return new SveRdffrPred(machInst, pd, pg); + } + } // decodeSveReadPredFromFFRPred + + StaticInstPtr + decodeSveReadPredFromFFRUnpred(ExtMachInst machInst) + { + if (bits(machInst, 23, 22) != 0) { + return new Unknown64(machInst); + } + IntRegIndex pd = (IntRegIndex)(uint8_t) bits(machInst, 3, 0); + return new SveRdffrUnpred(machInst, pd); + } // decodeSveReadPredFromFFRUnpred + + StaticInstPtr + decodeSvePredGen(ExtMachInst machInst) + { + uint8_t b_20_15 = (bits(machInst, 20) << 1) | bits(machInst, 15); + switch (b_20_15) { + case 0x0: + return decodeSvePredLogicalOps(machInst); + case 0x1: + return decodeSvePropBreakFromPrevPartition(machInst); + case 0x2: + if (bits(machInst, 19) == 0x0) { + return decodeSvePartitionBreakCond(machInst); + } else { + return decodeSvePropBreakToNextPartition(machInst); + } + case 0x3: + if (bits(machInst, 19) == 0x0) { + if (bits(machInst, 4, 0) == 0x0) { + return decodeSvePredTest(machInst); + } else { + break; + } + } else { + switch (bits(machInst, 13, 12)) { + case 0x0: + if (bits(machInst, 11) == 0x0 && + bits(machInst, 4) == 0x0) { + return decodeSvePredIteration(machInst); + } else { + break; + } + case 0x1: + break; + case 0x2: + if (bits(machInst, 11, 10) == 0x0 && + bits(machInst, 4) == 0x0) { + return decodeSveInitPred(machInst); + } else if (bits(machInst, 11, 4) == 0x40) { + return decodeSveZeroPredReg(machInst); + } + break; + case 0x3: + if (bits(machInst, 11) == 0x0) { + if (bits(machInst, 16) == 0x0) { + return decodeSveReadPredFromFFRPred(machInst); + } else if (bits(machInst, 8, 4) == 0x0) { + return decodeSveReadPredFromFFRUnpred(machInst); + } + } + break; + } + } + break; + } + return new Unknown64(machInst); + } // decodeSvePredGen + + StaticInstPtr + decodeSvePredCount(ExtMachInst machInst) + { + uint8_t b19 = bits(machInst, 19); + if (b19) { + uint8_t b13_11 = bits(machInst, 13, 11); + switch (b13_11) { + case 0x0: + { + if (bits(machInst, 10, 9) != 0x0) { + return new Unknown64(machInst); + } + IntRegIndex zdn = (IntRegIndex) (uint8_t) + bits(machInst, 4, 0); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 8, 5); + uint8_t esize = bits(machInst, 23, 22); + if (esize == 0x0) { + return new Unknown64(machInst); + } + uint8_t opc = bits(machInst, 18, 17); + if (opc == 0x0) { + uint8_t u = bits(machInst, 16); + if (u) { + return decodeSvePredCountVU<SveUqincpv>(esize, + machInst, zdn, pg); + } else { + return decodeSvePredCountVS<SveSqincpv>(esize, + machInst, zdn, pg); + } + } else if (opc == 0x1) { + uint8_t u = bits(machInst, 16); + if (u) { + return decodeSvePredCountVU<SveUqdecpv>(esize, + machInst, zdn, pg); + } else { + return decodeSvePredCountVS<SveSqdecpv>(esize, + machInst, zdn, pg); + } + } else if (opc == 0x2) { + uint8_t d = bits(machInst, 16); + if (d) { + return decodeSvePredCountVU<SveDecpv>(esize, + machInst, zdn, pg); + } else { + return decodeSvePredCountVU<SveIncpv>(esize, + machInst, zdn, pg); + } + } + } + break; + case 0x1: + { + IntRegIndex rdn = (IntRegIndex) (uint8_t) + bits(machInst, 4, 0); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 8, 5); + uint8_t esize = bits(machInst, 23, 22); + uint8_t opc = bits(machInst, 18, 17); + uint8_t opc2 = bits(machInst, 10, 9); + if (opc == 0x0) { + uint8_t u = bits(machInst, 16); + if (opc2 == 0x0) { + if (u) { + return decodeSvePredCountU<SveUqincp32>(esize, + machInst, rdn, pg); + } else { + return decodeSvePredCountS<SveSqincp32>(esize, + machInst, rdn, pg); + } + } else if (opc2 == 0x2) { + if (u) { + return decodeSvePredCountU<SveUqincp64>(esize, + machInst, rdn, pg); + } else { + return decodeSvePredCountS<SveSqincp64>(esize, + machInst, rdn, pg); + } + } + } else if (opc == 0x1) { + uint8_t u = bits(machInst, 16); + if (opc2 == 0x0) { + if (u) { + return decodeSvePredCountU<SveUqdecp32>(esize, + machInst, rdn, pg); + } else { + return decodeSvePredCountS<SveSqdecp32>(esize, + machInst, rdn, pg); + } + } else if (opc2 == 0x2) { + if (u) { + return decodeSvePredCountU<SveUqdecp64>(esize, + machInst, rdn, pg); + } else { + return decodeSvePredCountS<SveSqdecp64>(esize, + machInst, rdn, pg); + } + } + } else if (opc == 0x2) { + if (opc2 == 0x0) { + if (bits(machInst, 16)) { + return decodeSvePredCountU<SveDecp>(esize, + machInst, rdn, pg); + } else { + return decodeSvePredCountU<SveIncp>(esize, + machInst, rdn, pg); + } + } + } + } + break; + case 0x2: + if (bits(machInst, 23, 22) == 0x0 && + bits(machInst, 10, 9) == 0x0 && + bits(machInst, 4, 0) == 0x0) { + uint8_t opc = bits(machInst, 18, 16); + if (opc == 0x0) { + IntRegIndex pn = (IntRegIndex)(uint8_t) + bits(machInst, 8, 5); + return new SveWrffr(machInst, pn); + } else if (opc == 0x4 && bits(machInst, 8, 5) == 0x0) { + return new SveSetffr(machInst); + } + } + break; + } + } else { + uint8_t opc = bits(machInst, 18, 16); + if (opc == 0 && bits(machInst, 9) == 0) { + IntRegIndex rd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex pn = (IntRegIndex) (uint8_t) bits(machInst, 8, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 13, + 10); + uint8_t esize = bits(machInst, 23, 22); + return decodeSveUnaryPredU<SveCntp>(esize, + machInst, rd, pn, pg); + } + } + return new Unknown64(machInst); + } // decodeSvePredCount + + StaticInstPtr + decodeSveIntCmpSca(ExtMachInst machInst) + { + uint16_t b23_13_12_11_10_3_2_1_0 = (uint16_t) + (bits(machInst, 23) << 8) | (bits(machInst, 13, 10) << 4) | + bits(machInst, 3, 0); + uint8_t b10 = (uint8_t) bits(machInst, 10); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + if (b23_13_12_11_10_3_2_1_0 == 0x180) { + uint8_t s64b = bits(machInst, 22); + uint8_t ne = bits(machInst, 4); + if (ne) { + if (s64b) { + return new SveCtermne<uint64_t>(machInst, rn, rm); + } else { + return new SveCtermne<uint32_t>(machInst, rn, rm); + } + } else { + if (s64b) { + return new SveCtermeq<uint64_t>(machInst, rn, rm); + } else { + return new SveCtermeq<uint32_t>(machInst, rn, rm); + } + } + } else if (b10) { + IntRegIndex pd = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); + uint8_t size = (uint8_t) bits(machInst, 23, 22); + uint8_t s64b = (uint8_t) bits(machInst, 12); + uint8_t opc = (uint8_t) bits(machInst, 11) << 1 | + bits(machInst, 4); + if (s64b) { + switch (opc) { + case 0: + return decodeSveBinUnpredS<SveWhilelt64>(size, + machInst, pd, rn, rm); + case 1: + return decodeSveBinUnpredS<SveWhilele64>(size, + machInst, pd, rn, rm); + case 2: + return decodeSveBinUnpredU<SveWhilelo64>(size, + machInst, pd, rn, rm); + case 3: + return decodeSveBinUnpredU<SveWhilels64>(size, + machInst, pd, rn, rm); + } + } else { + switch (opc) { + case 0: + return decodeSveBinUnpredS<SveWhilelt32>(size, + machInst, pd, rn, rm); + case 1: + return decodeSveBinUnpredS<SveWhilele32>(size, + machInst, pd, rn, rm); + case 2: + return decodeSveBinUnpredU<SveWhilelo32>(size, + machInst, pd, rn, rm); + case 3: + return decodeSveBinUnpredU<SveWhilels32>(size, + machInst, pd, rn, rm); + } + } + } + return new Unknown64(machInst); + } // decodeSveIntCmpSca + + StaticInstPtr + decodeSveIntWideImmUnpred0(ExtMachInst machInst) + { + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + uint64_t imm = bits(machInst, 12, 5); + uint8_t sh = bits(machInst, 13); + uint8_t size = bits(machInst, 23, 22); + + if (sh) { + if (size == 0x0) { + return new Unknown64(machInst); + } + imm <<= 8; + } + + switch (bits(machInst, 18, 16)) { + case 0x0: + return decodeSveWideImmUnpredU<SveAddImm>( + size, machInst, zdn, imm); + case 0x1: + return decodeSveWideImmUnpredU<SveSubImm>( + size, machInst, zdn, imm); + case 0x3: + return decodeSveWideImmUnpredU<SveSubrImm>( + size, machInst, zdn, imm); + case 0x4: + return decodeSveWideImmUnpredS<SveSqaddImm>( + size, machInst, zdn, imm); + case 0x5: + return decodeSveWideImmUnpredU<SveUqaddImm>( + size, machInst, zdn, imm); + case 0x6: + return decodeSveWideImmUnpredS<SveSqsubImm>( + size, machInst, zdn, imm); + case 0x7: + return decodeSveWideImmUnpredU<SveUqsubImm>( + size, machInst, zdn, imm); + } + + return new Unknown64(machInst); + } // decodeSveIntWideImmUnpred0 + + StaticInstPtr + decodeSveIntWideImmUnpred1(ExtMachInst machInst) + { + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + uint64_t imm = bits(machInst, 12, 5); + uint8_t size = bits(machInst, 23, 22); + + switch (bits(machInst, 18, 16)) { + case 0x0: + return decodeSveWideImmUnpredS<SveSmaxImm>( + size, machInst, zdn, sext<8>(imm)); + case 0x1: + return decodeSveWideImmUnpredU<SveUmaxImm>( + size, machInst, zdn, imm); + case 0x2: + return decodeSveWideImmUnpredS<SveSminImm>( + size, machInst, zdn, sext<8>(imm)); + case 0x3: + return decodeSveWideImmUnpredU<SveUminImm>( + size, machInst, zdn, imm); + } + + return new Unknown64(machInst); + } // decodeSveIntWideImmUnpred1 + + StaticInstPtr + decodeSveIntWideImmUnpred2(ExtMachInst machInst) + { + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + uint64_t imm = bits(machInst, 12, 5); + uint8_t size = bits(machInst, 23, 22); + + if (bits(machInst, 18, 16) == 0x0) { + return decodeSveWideImmUnpredU<SveMulImm>( + size, machInst, zdn, sext<8>(imm)); + } + + return new Unknown64(machInst); + } // decodeSveIntWideImmUnpred2 + + StaticInstPtr + decodeSveIntWideImmUnpred3(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + uint64_t imm = bits(machInst, 12, 5); + uint8_t sh = bits(machInst, 13); + uint8_t size = bits(machInst, 23, 22); + + if (sh) { + if (size == 0x0) { + return new Unknown64(machInst); + } + imm <<= 8; + } + + if (bits(machInst, 18, 17) == 0x0) { + if (sh) { + return decodeSveWideImmUnpredU<SveDupImm>( + size, machInst, zd, sext<16>(imm)); + } else { + return decodeSveWideImmUnpredU<SveDupImm>( + size, machInst, zd, sext<8>(imm)); + } + } + + return new Unknown64(machInst); + } // decodeSveIntWideImmUnpred3 + + StaticInstPtr + decodeSveIntWideImmUnpred4(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + uint8_t size = bits(machInst, 23, 22); + + if (bits(machInst, 18, 17) == 0x0 && size != 0x0) { + uint64_t imm = vfp_modified_imm(bits(machInst, 12, 5), + decode_fp_data_type(size)); + return decodeSveWideImmUnpredF<SveFdup>(size, machInst, zd, imm); + } + + return new Unknown64(machInst); + } // decodeSveIntWideImmUnpred4 + + StaticInstPtr + decodeSveIntWideImmUnpred(ExtMachInst machInst) + { + switch (bits(machInst, 20, 19)) { + case 0x0: + if (bits(machInst, 18, 16) != 0x2) { + return decodeSveIntWideImmUnpred0(machInst); + } + break; + case 0x1: + if (bits(machInst, 13) == 0x0) { + return decodeSveIntWideImmUnpred1(machInst); + } + break; + case 0x2: + if (bits(machInst, 13) == 0x0) { + return decodeSveIntWideImmUnpred2(machInst); + } + break; + case 0x3: + if (bits(machInst, 16) == 0x0) { + return decodeSveIntWideImmUnpred3(machInst); + } else if (bits(machInst, 13) == 0x0) { + return decodeSveIntWideImmUnpred4(machInst); + } + break; + } + return new Unknown64(machInst); + } // decodeSveIntWideImmUnpred + + StaticInstPtr + decodeSveMultiplyAddUnpred(ExtMachInst machInst) + { + IntRegIndex zda = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t size = (uint8_t) bits(machInst, 23, 22); + + if (bits(machInst, 12, 11) != 0 || !(size & 0x2)) { + return new Unknown64(machInst); + } + + uint8_t usig = (uint8_t) bits(machInst, 10); + if (size & 0x1) { + if (usig) { + return new SveUdotv<uint16_t, uint64_t>(machInst, + zda, zn, zm); + } else { + return new SveSdotv<int16_t, int64_t>(machInst, + zda, zn, zm); + } + } else { + if (usig) { + return new SveUdotv<uint8_t, uint32_t>(machInst, + zda, zn, zm); + } else { + return new SveSdotv<int8_t, int32_t>(machInst, + zda, zn, zm); + } + } + + return new Unknown64(machInst); + } // decodeSveMultiplyAddUnpred + + StaticInstPtr + decodeSveMultiplyIndexed(ExtMachInst machInst) + { + IntRegIndex zda = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size = (uint8_t) bits(machInst, 23, 22); + + if (bits(machInst, 12, 11) != 0 || !(size & 0x2)) { + return new Unknown64(machInst); + } + + uint8_t usig = (uint8_t) bits(machInst, 10); + if (size & 0x1) { + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + uint8_t i1 = (uint8_t) bits(machInst, 20); + if (usig) { + return new SveUdoti<uint16_t, uint64_t>(machInst, + zda, zn, zm, i1); + } else { + return new SveSdoti<int16_t, int64_t>(machInst, + zda, zn, zm, i1); + } + } else { + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 18, 16); + uint8_t i2 = (uint8_t) bits(machInst, 20, 19); + if (usig) { + return new SveUdoti<uint8_t, uint32_t>(machInst, + zda, zn, zm, i2); + } else { + return new SveSdoti<int8_t, int32_t>(machInst, + zda, zn, zm, i2); + } + } + return new Unknown64(machInst); + } // decodeSveMultiplyIndexed + + StaticInstPtr + decodeSveFpFastReduc(ExtMachInst machInst) + { + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + uint8_t size = bits(machInst, 23, 22); + + if (size == 0x0) { + return new Unknown64(machInst); + } + + switch (bits(machInst, 18, 16)) { + case 0x0: + return decodeSveUnaryPredF<SveFaddv>(size, machInst, vd, zn, pg); + case 0x4: + return decodeSveUnaryPredF<SveFmaxnmv>(size, machInst, vd, zn, pg); + case 0x5: + return decodeSveUnaryPredF<SveFminnmv>(size, machInst, vd, zn, pg); + case 0x6: + return decodeSveUnaryPredF<SveFmaxv>(size, machInst, vd, zn, pg); + case 0x7: + return decodeSveUnaryPredF<SveFminv>(size, machInst, vd, zn, pg); + } + + return new Unknown64(machInst); + } // decodeSveFpFastReduc + + StaticInstPtr + decodeSveFpUnaryUnpred(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size = (uint8_t) bits(machInst, 23, 22); + if (size == 0) { + return new Unknown64(machInst); + } + uint8_t opc = (uint8_t) bits(machInst, 18, 16); + + switch (opc) { + case 0x6: + return decodeSveUnaryUnpredF<SveFrecpe>( + size, machInst, zd, zn); + case 0x7: + return decodeSveUnaryUnpredF<SveFrsqrte>( + size, machInst, zd, zn); + } + return new Unknown64(machInst); + } // decodeSveFpUnaryUnpred + + StaticInstPtr + decodeSveFpCmpZero(ExtMachInst machInst) + { + IntRegIndex pd = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + uint8_t size = bits(machInst, 23, 22); + if (size == 0) { + return new Unknown64(machInst); + } + uint8_t opc = (bits(machInst, 17, 16) << 1) | bits(machInst, 4); + + switch (opc) { + case 0x0: + return decodeSveCmpImmF<SveFcmgeZero>( + size, machInst, pd, zn, 0x0, pg); + case 0x1: + return decodeSveCmpImmF<SveFcmgtZero>( + size, machInst, pd, zn, 0x0, pg); + case 0x2: + return decodeSveCmpImmF<SveFcmltZero>( + size, machInst, pd, zn, 0x0, pg); + case 0x3: + return decodeSveCmpImmF<SveFcmleZero>( + size, machInst, pd, zn, 0x0, pg); + case 0x4: + return decodeSveCmpImmF<SveFcmeqZero>( + size, machInst, pd, zn, 0x0, pg); + case 0x6: + return decodeSveCmpImmF<SveFcmneZero>( + size, machInst, pd, zn, 0x0, pg); + } + return new Unknown64(machInst); + } // decodeSveFpCmpZero + + StaticInstPtr + decodeSveFpAccumReduc(ExtMachInst machInst) + { + uint8_t opc = bits(machInst, 18, 16); + uint8_t size = bits(machInst, 23, 22); + if (opc != 0 || size == 0) { + return new Unknown64(machInst); + } + + IntRegIndex vdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + return decodeSveUnaryPredF<SveFadda>(size, machInst, vdn, zm, pg); + } // decodeSveFpAccumReduc + + StaticInstPtr + decodeSveFpArithUnpred(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t size = bits(machInst, 23, 22); + if (size == 0) { + return new Unknown64(machInst); + } + uint8_t opc = (uint8_t) bits(machInst, 12, 10); + + switch (opc) { + case 0x0: + return decodeSveBinUnpredF<SveFaddUnpred>( + size, machInst, zd, zn, zm); + case 0x1: + return decodeSveBinUnpredF<SveFsubUnpred>( + size, machInst, zd, zn, zm); + case 0x2: + return decodeSveBinUnpredF<SveFmulUnpred>( + size, machInst, zd, zn, zm); + case 0x3: + return decodeSveBinUnpredF<SveFtsmul>( + size, machInst, zd, zn, zm); + case 0x6: + return decodeSveBinUnpredF<SveFrecps>( + size, machInst, zd, zn, zm); + case 0x7: + return decodeSveBinUnpredF<SveFrsqrts>( + size, machInst, zd, zn, zm); + } + return new Unknown64(machInst); + } // decodeSveFpArithUnpred + + StaticInstPtr + decodeSveFpArithPred0(ExtMachInst machInst) + { + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + uint8_t size = (uint8_t) bits(machInst, 23, 22); + if (size == 0) { + return new Unknown64(machInst); + } + uint8_t opc = (uint8_t) bits(machInst, 19, 16); + + switch (opc) { + case 0x0: + return decodeSveBinDestrPredF<SveFaddPred>( + size, machInst, zdn, zm, pg); + case 0x1: + return decodeSveBinDestrPredF<SveFsubPred>( + size, machInst, zdn, zm, pg); + case 0x2: + return decodeSveBinDestrPredF<SveFmulPred>( + size, machInst, zdn, zm, pg); + case 0x3: + return decodeSveBinDestrPredF<SveFsubr>( + size, machInst, zdn, zm, pg); + case 0x4: + return decodeSveBinDestrPredF<SveFmaxnm>( + size, machInst, zdn, zm, pg); + case 0x5: + return decodeSveBinDestrPredF<SveFminnm>( + size, machInst, zdn, zm, pg); + case 0x6: + return decodeSveBinDestrPredF<SveFmax>( + size, machInst, zdn, zm, pg); + case 0x7: + return decodeSveBinDestrPredF<SveFmin>( + size, machInst, zdn, zm, pg); + case 0x8: + return decodeSveBinDestrPredF<SveFabd>( + size, machInst, zdn, zm, pg); + case 0x9: + return decodeSveBinDestrPredF<SveFscale>( + size, machInst, zdn, zm, pg); + case 0xa: + return decodeSveBinDestrPredF<SveFmulx>( + size, machInst, zdn, zm, pg); + case 0xc: + return decodeSveBinDestrPredF<SveFdivr>( + size, machInst, zdn, zm, pg); + case 0xd: + return decodeSveBinDestrPredF<SveFdiv>( + size, machInst, zdn, zm, pg); + } + return new Unknown64(machInst); + } // decodeSveFpArithPred0 + + StaticInstPtr + decodeSveFpTrigMAddCoeff(ExtMachInst machInst) + { + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + uint8_t imm = (uint8_t) bits(machInst, 18, 16); + + uint8_t size = (uint8_t) bits(machInst, 23, 22); + if (size == 0) { + return new Unknown64(machInst); + } + + return decodeSveTerImmUnpredF<SveFtmad>(size, machInst, zdn, zm, imm); + } // decodeSveFpTrigMAddCoeff + + StaticInstPtr + decodeSveFpArithImmPred(ExtMachInst machInst) + { + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + uint64_t imm; + + uint8_t size = (uint8_t) bits(machInst, 23, 22); + if (size == 0) { + return new Unknown64(machInst); + } + + uint8_t opc = (uint8_t) bits(machInst, 18, 16); + + switch (opc) { + case 0x0: + imm = sveExpandFpImmAddSub((uint8_t) bits(machInst, 5), size); + return decodeSveBinImmPredF<SveFaddImm>( + size, machInst, zdn, imm, pg); + case 0x1: + imm = sveExpandFpImmAddSub((uint8_t) bits(machInst, 5), size); + return decodeSveBinImmPredF<SveFsubImm>( + size, machInst, zdn, imm, pg); + case 0x2: + imm = sveExpandFpImmMul((uint8_t) bits(machInst, 5), size); + return decodeSveBinImmPredF<SveFmulImm>( + size, machInst, zdn, imm, pg); + case 0x3: + imm = sveExpandFpImmAddSub((uint8_t) bits(machInst, 5), size); + return decodeSveBinImmPredF<SveFsubrImm>( + size, machInst, zdn, imm, pg); + case 0x4: + imm = sveExpandFpImmMaxMin((uint8_t) bits(machInst, 5), size); + return decodeSveBinImmPredF<SveFmaxnmImm>( + size, machInst, zdn, imm, pg); + case 0x5: + imm = sveExpandFpImmMaxMin((uint8_t) bits(machInst, 5), size); + return decodeSveBinImmPredF<SveFminnmImm>( + size, machInst, zdn, imm, pg); + case 0x6: + imm = sveExpandFpImmMaxMin((uint8_t) bits(machInst, 5), size); + return decodeSveBinImmPredF<SveFmaxImm>( + size, machInst, zdn, imm, pg); + case 0x7: + imm = sveExpandFpImmMaxMin((uint8_t) bits(machInst, 5), size); + return decodeSveBinImmPredF<SveFminImm>( + size, machInst, zdn, imm, pg); + } + return new Unknown64(machInst); + } // decodeSveFpArithImmPred + + StaticInstPtr + decodeSveFpArithPred(ExtMachInst machInst) + { + if (bits(machInst, 20) == 0) { + return decodeSveFpArithPred0(machInst); + } else if (bits(machInst, 19) == 0) { + return decodeSveFpTrigMAddCoeff(machInst); + } else { + return decodeSveFpArithImmPred(machInst); + } + } // decodeSveFpArithPred + + StaticInstPtr + decodeSveFpUnaryPred(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + uint8_t size = (uint8_t) bits(machInst, 23, 22); + if (size == 0) { + return new Unknown64(machInst); + } + + uint8_t b20_19 = bits(machInst, 20, 19); + switch (b20_19) { + case 0x0: + { + if (bits(machInst, 18, 16) == 0x5) { + return new Unknown64(machInst); + } + // SVE floating-point round to integral value + uint8_t opc = (uint8_t) bits(machInst, 18, 16); + switch (opc) { + case 0x0: + return decodeSveUnaryPredF<SveFrintn>( + size, machInst, zd, zn, pg); + case 0x1: + return decodeSveUnaryPredF<SveFrintp>( + size, machInst, zd, zn, pg); + case 0x2: + return decodeSveUnaryPredF<SveFrintm>( + size, machInst, zd, zn, pg); + case 0x3: + return decodeSveUnaryPredF<SveFrintz>( + size, machInst, zd, zn, pg); + case 0x4: + return decodeSveUnaryPredF<SveFrinta>( + size, machInst, zd, zn, pg); + case 0x6: + return decodeSveUnaryPredF<SveFrintx>( + size, machInst, zd, zn, pg); + case 0x7: + return decodeSveUnaryPredF<SveFrinti>( + size, machInst, zd, zn, pg); + } + } + break; + case 0x1: + { + // SVE floating-point unary operations (predicated) + uint8_t b18_16 = bits(machInst, 18, 16); + switch (b18_16) { + case 0x0: + if (size == 0x2) { + return new SveFcvtNarrow<uint32_t, uint16_t>( + machInst, zd, zn, pg); + } else if (size == 0x3) { + return new SveFcvtNarrow<uint64_t, uint16_t>( + machInst, zd, zn, pg); + } + break; + case 0x1: + if (size == 0x2) { + return new SveFcvtWiden<uint16_t, uint32_t>( + machInst, zd, zn, pg); + } else if (size == 0x3) { + return new SveFcvtWiden<uint16_t, uint64_t>( + machInst, zd, zn, pg); + } + break; + case 0x2: + if (size == 0x3) { + return new SveFcvtNarrow<uint64_t, uint32_t>( + machInst, zd, zn, pg); + } + break; + case 0x3: + if (size == 0x3) { + return new SveFcvtWiden<uint32_t, uint64_t>( + machInst, zd, zn, pg); + } + break; + case 0x4: + if (size != 0x0) { + return decodeSveUnaryPredF<SveFrecpx>( + size, machInst, zd, zn, pg); + } + break; + case 0x5: + if (size != 0x0) { + return decodeSveUnaryPredF<SveFsqrt>( + size, machInst, zd, zn, pg); + } + break; + } + } + break; + case 0x2: + { + // SVE integer convert to floating-point + uint8_t opc = (size << 3) | bits(machInst, 18, 16); + switch (opc) { + case 0xa: + return new SveScvtfNarrow<uint16_t, uint16_t>( + machInst, zd, zn, pg); + case 0xb: + return new SveUcvtfNarrow<uint16_t, uint16_t>( + machInst, zd, zn, pg); + case 0xc: + return new SveScvtfNarrow<uint32_t, uint16_t>( + machInst, zd, zn, pg); + case 0xd: + return new SveUcvtfNarrow<uint32_t, uint16_t>( + machInst, zd, zn, pg); + case 0xe: + return new SveScvtfNarrow<uint64_t, uint16_t>( + machInst, zd, zn, pg); + case 0xf: + return new SveUcvtfNarrow<uint64_t, uint16_t>( + machInst, zd, zn, pg); + case 0x14: + return new SveScvtfNarrow<uint32_t, uint32_t>( + machInst, zd, zn, pg); + case 0x15: + return new SveUcvtfNarrow<uint32_t, uint32_t>( + machInst, zd, zn, pg); + case 0x18: + return new SveScvtfWiden<uint32_t, uint64_t>( + machInst, zd, zn, pg); + case 0x19: + return new SveUcvtfWiden<uint32_t, uint64_t>( + machInst, zd, zn, pg); + case 0x1c: + return new SveScvtfNarrow<uint64_t, uint32_t>( + machInst, zd, zn, pg); + case 0x1d: + return new SveUcvtfNarrow<uint64_t, uint32_t>( + machInst, zd, zn, pg); + case 0x1e: + return new SveScvtfNarrow<uint64_t, uint64_t>( + machInst, zd, zn, pg); + case 0x1f: + return new SveUcvtfNarrow<uint64_t, uint64_t>( + machInst, zd, zn, pg); + } + } + break; + case 0x3: + { + // SVE floating-point convert to integer + uint8_t opc = (size << 3) | bits(machInst, 18, 16); + switch (opc) { + case 0xa: + return new SveFcvtzsNarrow<uint16_t, uint16_t>( + machInst, zd, zn, pg); + case 0xb: + return new SveFcvtzuNarrow<uint16_t, uint16_t>( + machInst, zd, zn, pg); + case 0xc: + return new SveFcvtzsWiden<uint16_t, uint32_t>( + machInst, zd, zn, pg); + case 0xd: + return new SveFcvtzuWiden<uint16_t, uint32_t>( + machInst, zd, zn, pg); + case 0xe: + return new SveFcvtzsWiden<uint16_t, uint64_t>( + machInst, zd, zn, pg); + case 0xf: + return new SveFcvtzuWiden<uint16_t, uint64_t>( + machInst, zd, zn, pg); + case 0x14: + return new SveFcvtzsNarrow<uint32_t, uint32_t>( + machInst, zd, zn, pg); + case 0x15: + return new SveFcvtzuNarrow<uint32_t, uint32_t>( + machInst, zd, zn, pg); + case 0x18: + return new SveFcvtzsNarrow<uint64_t, uint32_t>( + machInst, zd, zn, pg); + case 0x19: + return new SveFcvtzuNarrow<uint64_t, uint32_t>( + machInst, zd, zn, pg); + case 0x1c: + return new SveFcvtzsWiden<uint32_t, uint64_t>( + machInst, zd, zn, pg); + case 0x1d: + return new SveFcvtzuWiden<uint32_t, uint64_t>( + machInst, zd, zn, pg); + case 0x1e: + return new SveFcvtzsNarrow<uint64_t, uint64_t>( + machInst, zd, zn, pg); + case 0x1f: + return new SveFcvtzuNarrow<uint64_t, uint64_t>( + machInst, zd, zn, pg); + } + } + break; + } + return new Unknown64(machInst); + } // decodeSveFpUnaryPred + + StaticInstPtr + decodeSveFpCmpVec(ExtMachInst machInst) + { + IntRegIndex pd = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + uint8_t size = bits(machInst, 23, 22); + if (size == 0) { + return new Unknown64(machInst); + } + uint8_t opc = (bits(machInst, 15) << 2) | + (bits(machInst, 13) << 1) | + bits(machInst, 4); + + switch (opc) { + case 0x0: + return decodeSveCmpF<SveFcmge>(size, machInst, pd, zn, zm, pg); + case 0x1: + return decodeSveCmpF<SveFcmgt>(size, machInst, pd, zn, zm, pg); + case 0x2: + return decodeSveCmpF<SveFcmeq>(size, machInst, pd, zn, zm, pg); + case 0x3: + return decodeSveCmpF<SveFcmne>(size, machInst, pd, zn, zm, pg); + case 0x4: + return decodeSveCmpF<SveFcmuo>(size, machInst, pd, zn, zm, pg); + case 0x5: + return decodeSveCmpF<SveFacge>(size, machInst, pd, zn, zm, pg); + case 0x7: + return decodeSveCmpF<SveFacgt>(size, machInst, pd, zn, zm, pg); + } + return new Unknown64(machInst); + } // decodeSveFpCmpVec + + StaticInstPtr + decodeSveFpFusedMulAdd(ExtMachInst machInst) + { + IntRegIndex zda = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + uint8_t size = bits(machInst, 23, 22); + if (size == 0) { + return new Unknown64(machInst); + } + uint8_t opc = bits(machInst, 15, 13); + + switch (opc) { + case 0x0: + return decodeSveTerPredF<SveFmla>( + size, machInst, zda, zn, zm, pg); + case 0x1: + return decodeSveTerPredF<SveFmls>( + size, machInst, zda, zn, zm, pg); + case 0x2: + return decodeSveTerPredF<SveFnmla>( + size, machInst, zda, zn, zm, pg); + case 0x3: + return decodeSveTerPredF<SveFnmls>( + size, machInst, zda, zn, zm, pg); + case 0x4: + return decodeSveTerPredF<SveFmad>( + size, machInst, zda /* zdn */, zm /* za */, zn, pg); + case 0x5: + return decodeSveTerPredF<SveFmsb>( + size, machInst, zda /* zdn */, zm /* za */, zn, pg); + case 0x6: + return decodeSveTerPredF<SveFnmad>( + size, machInst, zda /* zdn */, zm /* za */, zn, pg); + case 0x7: + return decodeSveTerPredF<SveFnmsb>( + size, machInst, zda /* zdn */, zm /* za */, zn, pg); + } + return new Unknown64(machInst); + } // decodeSveFpFusedMulAdd + + StaticInstPtr + decodeSveFpCplxAdd(ExtMachInst machInst) + { + uint8_t size = bits(machInst, 23, 22); + uint8_t rot = bits(machInst, 16) << 1 | 0x01; + IntRegIndex zdn = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + switch (size) { + case 1: + return new SveFcadd<uint16_t>(machInst, + zdn, zdn, zm, pg, rot); + case 2: + return new SveFcadd<uint32_t>(machInst, + zdn, zdn, zm, pg, rot); + case 3: + return new SveFcadd<uint64_t>(machInst, + zdn, zdn, zm, pg, rot); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSveFpCplxMulAddVec(ExtMachInst machInst) + { + uint8_t size = bits(machInst, 23, 22); + if (size == 0) { + return new Unknown64(machInst); + } + + IntRegIndex zda = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + IntRegIndex zm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t rot = bits(machInst, 14, 13); + switch (size) { + case 1: + return new SveFcmlav<uint16_t>(machInst, + zda, zn, zm, pg, rot); + case 2: + return new SveFcmlav<uint32_t>(machInst, + zda, zn, zm, pg, rot); + case 3: + return new SveFcmlav<uint64_t>(machInst, + zda, zn, zm, pg, rot); + } + + return new Unknown64(machInst); + } // decodeSveFpCplxMulAddVec + + StaticInstPtr + decodeSveFpCplxMulAddIndexed(ExtMachInst machInst) + { + uint8_t size = bits(machInst, 23, 22); + if (size < 2) { + return new Unknown64(machInst); + } + + IntRegIndex zda = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm; + uint8_t rot = bits(machInst, 11, 10); + uint8_t imm; + + switch (size) { + case 2: + zm = (IntRegIndex) (uint8_t) bits(machInst, 18, 16); + imm = bits(machInst, 20, 19); + return new SveFcmlai<uint32_t>(machInst, + zda, zn, zm, rot, imm); + case 3: + zm = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + imm = bits(machInst, 20); + return new SveFcmlai<uint64_t>(machInst, + zda, zn, zm, rot, imm); + } + return new Unknown64(machInst); + } // decodeSveFpCplxMulAddIndexed + + StaticInstPtr + decodeSveFpMulIndexed(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size = bits(machInst, 23, 22); + switch (size) { + case 0x0: + case 0x1: + return new SveFmulIdx<uint16_t>( + machInst, zd, zn, + (IntRegIndex) (uint8_t) bits(machInst, 18, 16), + bits(machInst, 20, 19) | (bits(machInst, 22) << 2)); + case 0x2: + return new SveFmulIdx<uint32_t>( + machInst, zd, zn, + (IntRegIndex) (uint8_t) bits(machInst, 18, 16), + bits(machInst, 20, 19)); + case 0x3: + return new SveFmulIdx<uint64_t>( + machInst, zd, zn, + (IntRegIndex) (uint8_t) bits(machInst, 19, 16), + bits(machInst, 20)); + default: + return new Unknown64(machInst); + } + + } // decodeSveFpMulIndexed + + StaticInstPtr + decodeSveFpMulAddIndexed(ExtMachInst machInst) + { + IntRegIndex zd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + const uint8_t op = bits(machInst, 10); + + uint8_t size = bits(machInst, 23, 22); + switch (size) { + case 0x0: + case 0x1: + if (op) { + return new SveFmlsIdx<uint16_t>( + machInst, zd, zn, + (IntRegIndex) (uint8_t) bits(machInst, 18, 16), + bits(machInst, 20, 19) | (bits(machInst, 22) << 2)); + } else { + return new SveFmlaIdx<uint16_t>( + machInst, zd, zn, + (IntRegIndex) (uint8_t) bits(machInst, 18, 16), + bits(machInst, 20, 19) | (bits(machInst, 22) << 2)); + } + case 0x2: + if (op) { + return new SveFmlsIdx<uint32_t>( + machInst, zd, zn, + (IntRegIndex) (uint8_t) bits(machInst, 18, 16), + bits(machInst, 20, 19)); + } else { + return new SveFmlaIdx<uint32_t>( + machInst, zd, zn, + (IntRegIndex) (uint8_t) bits(machInst, 18, 16), + bits(machInst, 20, 19)); + } + case 0x3: + if (op) { + return new SveFmlsIdx<uint64_t>( + machInst, zd, zn, + (IntRegIndex) (uint8_t) bits(machInst, 19, 16), + bits(machInst, 20)); + } else { + return new SveFmlaIdx<uint64_t>( + machInst, zd, zn, + (IntRegIndex) (uint8_t) bits(machInst, 19, 16), + bits(machInst, 20)); + } + default: + return new Unknown64(machInst); + } + } // decodeSveFpMulAddIndexed + + StaticInstPtr + decodeSveMemGather32(ExtMachInst machInst) + { + return new Unknown64(machInst); + } // decodeSveMemGather32 + + StaticInstPtr + decodeSveMemContigLoad(ExtMachInst machInst) + { + return new Unknown64(machInst); + } // decodeSveMemContigLoad + + StaticInstPtr + decodeSveMemGather64(ExtMachInst machInst) + { + return new Unknown64(machInst); + } // decodeSveMemGather64 + + StaticInstPtr + decodeSveMemStore(ExtMachInst machInst) + { + return new Unknown64(machInst); + } // decodeSveMemStore + +} // namespace Aarch64 +}}; diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa new file mode 100644 index 000000000..f4f1ab531 --- /dev/null +++ b/src/arch/arm/isa/formats/sve_top_level.isa @@ -0,0 +1,314 @@ +// Copyright (c) 2017-2019 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli + +/// @file +/// SVE top-level decoder. + +output header {{ +namespace Aarch64 +{ + StaticInstPtr decodeSveIntArithBinPred(ExtMachInst machInst); + StaticInstPtr decodeSveIntReduc(ExtMachInst machInst); + StaticInstPtr decodeSveShiftByImmPred(ExtMachInst machInst); + StaticInstPtr decodeSveIntArithUnaryPred(ExtMachInst machInst); + StaticInstPtr decodeSveIntMulAdd(ExtMachInst machInst); + StaticInstPtr decodeSveIntArithUnpred(ExtMachInst machInst); + StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst); + StaticInstPtr decodeSveIndexGen(ExtMachInst machInst); + StaticInstPtr decodeSveStackAlloc(ExtMachInst machInst); + StaticInstPtr decodeSveShiftByImmUnpred(ExtMachInst machInst); + StaticInstPtr decodeSveCompVecAddr(ExtMachInst machInst); + StaticInstPtr decodeSveIntMiscUnpred(ExtMachInst machInst); + StaticInstPtr decodeSveElemCount(ExtMachInst machInst); + StaticInstPtr decodeSveLogMaskImm(ExtMachInst machInst); + StaticInstPtr decodeSveIntWideImmPred(ExtMachInst machInst); + StaticInstPtr decodeSvePermExtract(ExtMachInst machInst); + StaticInstPtr decodeSvePermUnpred(ExtMachInst machInst); + StaticInstPtr decodeSvePermPredicates(ExtMachInst machInst); + StaticInstPtr decodeSvePermIntlv(ExtMachInst machInst); + StaticInstPtr decodeSvePermPred(ExtMachInst machInst); + StaticInstPtr decodeSveSelVec(ExtMachInst machInst); + StaticInstPtr decodeSveIntCmpVec(ExtMachInst machInst); + StaticInstPtr decodeSveIntCmpUImm(ExtMachInst machInst); + StaticInstPtr decodeSveIntCmpSImm(ExtMachInst machInst); + StaticInstPtr decodeSvePredGen(ExtMachInst machInst); + StaticInstPtr decodeSvePredCount(ExtMachInst machInst); + StaticInstPtr decodeSveIntCmpSca(ExtMachInst machInst); + StaticInstPtr decodeSveIntWideImmUnpred(ExtMachInst machInst); + + StaticInstPtr decodeSveMultiplyAddUnpred(ExtMachInst machInst); + StaticInstPtr decodeSveMultiplyIndexed(ExtMachInst machInst); + + StaticInstPtr decodeSveFpFastReduc(ExtMachInst machInst); + StaticInstPtr decodeSveFpUnaryUnpred(ExtMachInst machInst); + StaticInstPtr decodeSveFpCmpZero(ExtMachInst machInst); + StaticInstPtr decodeSveFpAccumReduc(ExtMachInst machInst); + StaticInstPtr decodeSveFpArithUnpred(ExtMachInst machInst); + StaticInstPtr decodeSveFpArithPred(ExtMachInst machInst); + StaticInstPtr decodeSveFpUnaryPred(ExtMachInst machInst); + StaticInstPtr decodeSveFpCmpVec(ExtMachInst machInst); + StaticInstPtr decodeSveFpFusedMulAdd(ExtMachInst machInst); + StaticInstPtr decodeSveFpCplxAdd(ExtMachInst machInst); + StaticInstPtr decodeSveFpCplxMulAddVec(ExtMachInst machInst); + StaticInstPtr decodeSveFpMulAddIndexed(ExtMachInst machInst); + StaticInstPtr decodeSveFpCplxMulAddIndexed(ExtMachInst machInst); + StaticInstPtr decodeSveFpMulIndexed(ExtMachInst machInst); + + StaticInstPtr decodeSveMemGather32(ExtMachInst machInst); + StaticInstPtr decodeSveMemContigLoad(ExtMachInst machInst); + StaticInstPtr decodeSveMemGather64(ExtMachInst machInst); + StaticInstPtr decodeSveMemStore(ExtMachInst machInst); +} +}}; + +output decoder {{ +namespace Aarch64 +{ + + StaticInstPtr + decodeSveInt(ExtMachInst machInst) + { + uint8_t b_29_24_21 = (bits(machInst, 29) << 2) | + (bits(machInst, 24) << 1) | + bits(machInst, 21); + switch (b_29_24_21) { + case 0x0: + { + if (bits(machInst, 14)) { + return decodeSveIntMulAdd(machInst); + } else { + uint8_t b_15_13 = (bits(machInst, 15) << 1) | + bits(machInst, 13); + switch (b_15_13) { + case 0x0: + if (bits(machInst, 30)) { + return decodeSveMultiplyAddUnpred(machInst); + } else { + return decodeSveIntArithBinPred(machInst); + } + case 0x1: + return decodeSveIntReduc(machInst); + case 0x2: + return decodeSveShiftByImmPred(machInst); + case 0x3: + return decodeSveIntArithUnaryPred(machInst); + } + } + } + case 0x1: + { + uint8_t b_15_14 = bits(machInst, 15, 14); + uint8_t b_13 = bits(machInst, 13); + uint8_t b_12 = bits(machInst, 12); + switch (b_15_14) { + case 0x0: + if (b_13) { + return decodeSveIntLogUnpred(machInst); + } else { + if (bits(machInst, 30)) { + return decodeSveMultiplyIndexed(machInst); + } else { + return decodeSveIntArithUnpred(machInst); + } + } + case 0x1: + if (b_13) { + return new Unknown64(machInst); + } else if (b_12) { + return decodeSveStackAlloc(machInst); + } else { + return decodeSveIndexGen(machInst); + } + case 0x2: + if (b_13) { + if (b_12) { + return decodeSveIntMiscUnpred(machInst); + } else { + return decodeSveCompVecAddr(machInst); + } + } else { + return decodeSveShiftByImmUnpred(machInst); + } + case 0x3: + return decodeSveElemCount(machInst); + } + } + case 0x2: + if (bits(machInst, 20)) { + return decodeSveIntWideImmPred(machInst); + } else { + return decodeSveLogMaskImm(machInst); + } + case 0x3: + { + uint8_t b_15_14 = bits(machInst, 15, 14); + uint8_t b_13 = bits(machInst, 13); + switch (b_15_14) { + case 0x0: + if (b_13) { + return decodeSvePermUnpred(machInst); + } else { + return decodeSvePermExtract(machInst); + } + case 0x1: + if (b_13) { + return decodeSvePermIntlv(machInst); + } else { + return decodeSvePermPredicates(machInst); + } + case 0x2: + return decodeSvePermPred(machInst); + case 0x3: + return decodeSveSelVec(machInst); + } + } + case 0x4: + return decodeSveIntCmpVec(machInst); + case 0x5: + return decodeSveIntCmpUImm(machInst); + case 0x6: + if (bits(machInst, 14)) { + return decodeSvePredGen(machInst); + } else { + return decodeSveIntCmpSImm(machInst); + } + case 0x7: + { + uint8_t b_15_14 = bits(machInst, 15, 14); + switch (b_15_14) { + case 0x0: + return decodeSveIntCmpSca(machInst); + case 0x1: + return new Unknown64(machInst); + case 0x2: + return decodeSvePredCount(machInst); + case 0x3: + return decodeSveIntWideImmUnpred(machInst); + } + } + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSveFp(ExtMachInst machInst) + { + uint8_t b_24_21 = (bits(machInst, 24) << 1) | + bits(machInst, 21); + switch (b_24_21) { + case 0x0: + if (!bits(machInst, 15)) { + return decodeSveFpCplxMulAddVec(machInst); + } else if((bits(machInst, 20, 17) | bits(machInst, 14, 13)) == 0) { + return decodeSveFpCplxAdd(machInst); + } + return new Unknown64(machInst); + case 0x1: + if (bits(machInst, 15, 12) == 1) { + return decodeSveFpCplxMulAddIndexed(machInst); + } + switch (bits(machInst, 13, 11)) { + case 0: + return decodeSveFpMulAddIndexed(machInst); + case 4: + if (!bits(machInst, 10)) + return decodeSveFpMulIndexed(machInst); + M5_FALLTHROUGH; + default: + return new Unknown64(machInst); + } + case 0x2: + { + if (bits(machInst, 14)) { + return decodeSveFpCmpVec(machInst); + } else { + uint8_t b_15_13 = (bits(machInst, 15) << 1) | + bits(machInst, 13); + switch (b_15_13) { + case 0x0: + return decodeSveFpArithUnpred(machInst); + case 0x1: + { + uint8_t b_20_19 = (bits(machInst, 20) << 1) | + bits(machInst, 19); + switch (b_20_19) { + case 0x0: + return decodeSveFpFastReduc(machInst); + case 0x1: + if (bits(machInst, 12)) { + return decodeSveFpUnaryUnpred(machInst); + } else { + return new Unknown64(machInst); + } + case 0x2: + return decodeSveFpCmpZero(machInst); + case 0x3: + return decodeSveFpAccumReduc(machInst); + } + } + case 0x2: + return decodeSveFpArithPred(machInst); + case 0x3: + return decodeSveFpUnaryPred(machInst); + } + } + } + case 0x3: + return decodeSveFpFusedMulAdd(machInst); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSveMem(ExtMachInst machInst) + { + uint8_t b_30_29 = bits(machInst, 30, 29); + switch (b_30_29) { + case 0x0: + return decodeSveMemGather32(machInst); + case 0x1: + return decodeSveMemContigLoad(machInst); + case 0x2: + return decodeSveMemGather64(machInst); + case 0x3: + return decodeSveMemStore(machInst); + } + return new Unknown64(machInst); + } + +} // namespace Aarch64 +}}; diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa index 37578f6ce..b89a67432 100644 --- a/src/arch/arm/isa/includes.isa +++ b/src/arch/arm/isa/includes.isa @@ -64,6 +64,7 @@ output header {{ #include "arch/arm/insts/pred_inst.hh" #include "arch/arm/insts/pseudo.hh" #include "arch/arm/insts/static_inst.hh" +#include "arch/arm/insts/sve.hh" #include "arch/arm/insts/vfp.hh" #include "arch/arm/isa_traits.hh" #include "mem/packet.hh" @@ -92,6 +93,7 @@ output exec {{ #include <cmath> #include "arch/arm/faults.hh" +#include "arch/arm/isa.hh" #include "arch/arm/isa_traits.hh" #include "arch/arm/utility.hh" #include "arch/generic/memhelpers.hh" diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa index 6c0c6b808..26803e7e5 100644 --- a/src/arch/arm/isa/insts/fp64.isa +++ b/src/arch/arm/isa/insts/fp64.isa @@ -44,6 +44,11 @@ let {{ decoder_output = "" exec_output = "" + zeroSveVecRegUpperPartCode = ''' + TheISA::ISA::zeroSveVecRegUpperPart(%s, + ArmStaticInst::getCurSveVecLen<uint64_t>(xc->tcBase())); + ''' + fmovImmSCode = vfp64EnabledCheckCode + ''' AA64FpDestP0_uw = bits(imm, 31, 0); AA64FpDestP1_uw = 0; @@ -53,6 +58,7 @@ let {{ fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp", { "code": fmovImmSCode, "op_class": "FloatMiscOp" }, []) + fmovImmSIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += FpRegImmOpDeclare.subst(fmovImmSIop); decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop); exec_output += BasicExecute.subst(fmovImmSIop); @@ -66,6 +72,7 @@ let {{ fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp", { "code": fmovImmDCode, "op_class": "FloatMiscOp" }, []) + fmovImmDIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += FpRegImmOpDeclare.subst(fmovImmDIop); decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop); exec_output += BasicExecute.subst(fmovImmDIop); @@ -79,6 +86,7 @@ let {{ fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp", { "code": fmovRegSCode, "op_class": "FloatMiscOp" }, []) + fmovRegSIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += FpRegRegOpDeclare.subst(fmovRegSIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop); exec_output += BasicExecute.subst(fmovRegSIop); @@ -92,6 +100,7 @@ let {{ fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp", { "code": fmovRegDCode, "op_class": "FloatMiscOp" }, []) + fmovRegDIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += FpRegRegOpDeclare.subst(fmovRegDIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop); exec_output += BasicExecute.subst(fmovRegDIop); @@ -105,6 +114,8 @@ let {{ fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp", { "code": fmovCoreRegWCode, "op_class": "FloatMiscOp" }, []) + fmovCoreRegWIop.snippets["code"] += zeroSveVecRegUpperPartCode % \ + "AA64FpDest" header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop); exec_output += BasicExecute.subst(fmovCoreRegWIop); @@ -118,6 +129,8 @@ let {{ fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp", { "code": fmovCoreRegXCode, "op_class": "FloatMiscOp" }, []) + fmovCoreRegXIop.snippets["code"] += zeroSveVecRegUpperPartCode % \ + "AA64FpDest" header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop); exec_output += BasicExecute.subst(fmovCoreRegXIop); @@ -131,6 +144,8 @@ let {{ fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp", { "code": fmovUCoreRegXCode, "op_class": "FloatMiscOp" }, []) + fmovUCoreRegXIop.snippets["code"] += zeroSveVecRegUpperPartCode % \ + "AA64FpDest" header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop); exec_output += BasicExecute.subst(fmovUCoreRegXIop); @@ -305,6 +320,7 @@ let {{ iop = InstObjParams(name.lower(), name + suffix, "FpRegRegRegRegOp", { "code": code, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += AA64FpRegRegRegRegOpDeclare.subst(iop) decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop) @@ -337,16 +353,19 @@ let {{ hIop = InstObjParams(name, Name + "H", base, { "code": code, "op_class": opClass }, []) + hIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" code = singleIntConvCode2 % { "op": singleOp } sIop = InstObjParams(name, Name + "S", base, { "code": code, "op_class": opClass }, []) + sIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" code = doubleIntConvCode2 % { "op": doubleOp } dIop = InstObjParams(name, Name + "D", base, { "code": code, "op_class": opClass }, []) + dIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" declareTempl = eval( base + "Declare"); constructorTempl = eval("AA64" + base + "Constructor"); @@ -403,14 +422,17 @@ let {{ hIop = InstObjParams(name, Name + "H", base, { "code": code, "op_class": opClass }, []) + hIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" code = singleIntConvCode % { "op": singleOp } sIop = InstObjParams(name, Name + "S", base, { "code": code, "op_class": opClass }, []) + sIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" code = doubleIntConvCode % { "op": doubleOp } dIop = InstObjParams(name, Name + "D", base, { "code": code, "op_class": opClass }, []) + dIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" declareTempl = eval( base + "Declare"); constructorTempl = eval("AA64" + base + "Constructor"); @@ -446,6 +468,7 @@ let {{ iop = InstObjParams(name, Name + suffix, base, { "code": code % { "op": op }, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" declareTempl = eval( base + "Declare"); constructorTempl = eval("AA64" + base + "Constructor"); @@ -535,6 +558,9 @@ let {{ fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp", { "code": fcvtIntFpDCode, "op_class": "FloatCvtOp" }, []) + fcvtIntFpDIop.snippets["code"] += \ + zeroSveVecRegUpperPartCode % "AA64FpDest" + header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop); decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop); exec_output += BasicExecute.subst(fcvtIntFpDIop); @@ -597,6 +623,7 @@ let {{ fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp", { "code": fcvtFpSFpDCode, "op_class": "FloatCvtOp" }, []) + fcvtFpSFpDIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop); decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop); exec_output += BasicExecute.subst(fcvtFpSFpDIop); @@ -614,6 +641,7 @@ let {{ fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp", {"code": fcvtFpDFpSCode, "op_class": "FloatCvtOp" }, []) + fcvtFpDFpSIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop); decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop); exec_output += BasicExecute.subst(fcvtFpDFpSIop); @@ -646,6 +674,8 @@ let {{ fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp", { "code": code, "op_class": "FloatCvtOp" }, []) + fcvtFpHFpIop.snippets["code"] += zeroSveVecRegUpperPartCode % \ + "AA64FpDest" header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop); decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop); exec_output += BasicExecute.subst(fcvtFpHFpIop); @@ -669,6 +699,8 @@ let {{ fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp", { "code": code, "op_class": "FloatCvtOp" }, []) + fcvtFpFpHIop.snippets["code"] += zeroSveVecRegUpperPartCode % \ + "AA64FpDest" header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop); decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop); exec_output += BasicExecute.subst(fcvtFpFpHIop); @@ -842,6 +874,8 @@ let {{ fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp", { "code": fcvtFixedFpCode, "op_class": "FloatCvtOp" }, []) + fcvtFixedFpIop.snippets["code"] += zeroSveVecRegUpperPartCode % \ + "AA64FpDest" header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop); decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop); exec_output += BasicExecute.subst(fcvtFixedFpIop); @@ -888,6 +922,7 @@ let {{ iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"), "FpCondSelOp", { "code": code, "op_class": "FloatCvtOp" }) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += DataXCondSelDeclare.subst(iop) decoder_output += DataXCondSelConstructor.subst(iop) exec_output += BasicExecute.subst(iop) diff --git a/src/arch/arm/isa/insts/insts.isa b/src/arch/arm/isa/insts/insts.isa index 007d99ff2..623657efc 100644 --- a/src/arch/arm/isa/insts/insts.isa +++ b/src/arch/arm/isa/insts/insts.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2014 ARM Limited +// Copyright (c) 2010-2014, 2017 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -97,6 +97,9 @@ split exec; split decoder; ##include "neon64_mem.isa" +//SVE +##include "sve.isa" + //m5 Pseudo-ops ##include "m5ops.isa" diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa index 54e50d73e..fe7eaf0f8 100644 --- a/src/arch/arm/isa/insts/ldr64.isa +++ b/src/arch/arm/isa/insts/ldr64.isa @@ -179,10 +179,16 @@ let {{ def emit(self): self.buildEACode() + accEpilogCode = None # Code that actually handles the access if self.flavor in ("dprefetch", "iprefetch", "mprefetch"): accCode = 'uint64_t temp M5_VAR_USED = Mem%s;' elif self.flavor == "fp": + accEpilogCode = ''' + TheISA::ISA::zeroSveVecRegUpperPart(AA64FpDest, + ArmStaticInst::getCurSveVecLen<uint64_t>( + xc->tcBase())); + ''' if self.size in (1, 2, 4): accCode = ''' AA64FpDestP0_uw = cSwap(Mem%s, @@ -216,6 +222,8 @@ let {{ accCode = accCode % buildMemSuffix(self.sign, self.size) self.codeBlobs["memacc_code"] = accCode + if accEpilogCode: + self.codeBlobs["memacc_epilog_code"] = accEpilogCode # Push it out to the output files wbDecl = None @@ -227,8 +235,17 @@ let {{ def emit(self): self.buildEACode() + accEpilogCode = None # Code that actually handles the access if self.flavor == "fp": + accEpilogCode = ''' + TheISA::ISA::zeroSveVecRegUpperPart(AA64FpDest, + ArmStaticInst::getCurSveVecLen<uint64_t>( + xc->tcBase())); + TheISA::ISA::zeroSveVecRegUpperPart(AA64FpDest2, + ArmStaticInst::getCurSveVecLen<uint64_t>( + xc->tcBase())); + ''' if self.size == 4: accCode = ''' uint64_t data = cSwap(Mem_ud, isBigEndian64(xc->tcBase())); @@ -300,6 +317,8 @@ let {{ isBigEndian64(xc->tcBase())); ''' self.codeBlobs["memacc_code"] = accCode + if accEpilogCode: + self.codeBlobs["memacc_epilog_code"] = accEpilogCode # Push it out to the output files wbDecl = None diff --git a/src/arch/arm/isa/insts/mem.isa b/src/arch/arm/isa/insts/mem.isa index 7323b02c9..5510c6c72 100644 --- a/src/arch/arm/isa/insts/mem.isa +++ b/src/arch/arm/isa/insts/mem.isa @@ -106,10 +106,13 @@ let {{ if rasPop: is_ras_pop = "1" codeBlobsCopy['is_ras_pop'] = is_ras_pop + if 'memacc_epilog_code' in codeBlobsCopy: + del codeBlobsCopy['memacc_epilog_code'] iop = InstObjParams(name, Name, base, codeBlobsCopy, instFlagsCopy) - + if 'memacc_epilog_code' in codeBlobs: + iop.snippets['memacc_code'] += codeBlobs['memacc_epilog_code'] header_output = self.declareTemplate.subst(iop) decoder_output = self.constructTemplate.subst(iop) exec_output = self.fullExecTemplate.subst(iop) + \ diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa index 855952d9e..2e0ec4495 100644 --- a/src/arch/arm/isa/insts/neon64.isa +++ b/src/arch/arm/isa/insts/neon64.isa @@ -48,6 +48,11 @@ let {{ floatTypes = ("uint16_t", "uint32_t", "uint64_t") smallFloatTypes = ("uint32_t",) + zeroSveVecRegUpperPartCode = ''' + TheISA::ISA::zeroSveVecRegUpperPart(%s, + ArmStaticInst::getCurSveVecLen<uint64_t>(xc->tcBase())); + ''' + def threeEqualRegInstX(name, Name, opClass, types, rCount, op, readDest=False, pairwise=False, scalar=False, byElem=False, decoder='Generic'): @@ -132,6 +137,7 @@ let {{ { "code": eWalkCode, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" if byElem: header_output += NeonX2RegImmOpDeclare.subst(iop) else: @@ -240,6 +246,7 @@ let {{ { "code": eWalkCode, "r_count": 2, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" if byElem: header_output += NeonX2RegImmOpDeclare.subst(iop) else: @@ -327,6 +334,7 @@ let {{ { "code": eWalkCode, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" if hasImm: header_output += NeonX1RegImmOpDeclare.subst(iop) else: @@ -378,6 +386,7 @@ let {{ { "code": eWalkCode, "r_count": 2, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" if hasImm: header_output += NeonX1RegImmOpDeclare.subst(iop) else: @@ -449,6 +458,7 @@ let {{ { "code": eWalkCode, "r_count": 2, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" if hasImm: header_output += NeonX1RegImmOpDeclare.subst(iop) else: @@ -484,6 +494,7 @@ let {{ { "code": eWalkCode, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += NeonX2RegOpDeclare.subst(iop) exec_output += NeonXEqualRegOpExecute.subst(iop) for type in types: @@ -519,6 +530,7 @@ let {{ { "code": eWalkCode, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += NeonX1Reg2ImmOpDeclare.subst(iop) exec_output += NeonXEqualRegOpExecute.subst(iop) for type in types: @@ -556,6 +568,7 @@ let {{ { "code": eWalkCode, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += NeonX1RegOpDeclare.subst(iop) exec_output += NeonXEqualRegOpExecute.subst(iop) for type in types: @@ -602,6 +615,7 @@ let {{ { "code": eWalkCode, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += NeonX1RegOpDeclare.subst(iop) if long: exec_output += NeonXUnequalRegOpExecute.subst(iop) @@ -654,6 +668,7 @@ let {{ { "code": eWalkCode, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += NeonX1RegOpDeclare.subst(iop) exec_output += NeonXUnequalRegOpExecute.subst(iop) for type in types: @@ -696,6 +711,7 @@ let {{ { "code": eWalkCode, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += NeonX1RegImmOnlyOpDeclare.subst(iop) exec_output += NeonXEqualRegOpExecute.subst(iop) for type in types: @@ -725,6 +741,7 @@ let {{ { "code": eWalkCode, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += NeonX1RegOpDeclare.subst(iop) exec_output += NeonXEqualRegOpExecute.subst(iop) for type in types: @@ -757,6 +774,7 @@ let {{ { "code": eWalkCode, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += NeonX2RegImmOpDeclare.subst(iop) exec_output += NeonXEqualRegOpExecute.subst(iop) for type in types: @@ -785,6 +803,7 @@ let {{ { "code": eWalkCode, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += NeonX1RegImmOpDeclare.subst(iop) exec_output += NeonXEqualRegOpExecute.subst(iop) for type in types: @@ -880,6 +899,7 @@ let {{ { "code": code, "r_count": rCount, "op_class": opClass }, []) + iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" header_output += NeonX2RegOpDeclare.subst(iop) exec_output += NeonXEqualRegOpExecute.subst(iop) for type in types: diff --git a/src/arch/arm/isa/insts/neon64_mem.isa b/src/arch/arm/isa/insts/neon64_mem.isa index 4511ad105..8f53369e9 100644 --- a/src/arch/arm/isa/insts/neon64_mem.isa +++ b/src/arch/arm/isa/insts/neon64_mem.isa @@ -44,6 +44,11 @@ let {{ decoder_output = '' exec_output = '' + zeroSveVecRegUpperPartCode = ''' + TheISA::ISA::zeroSveVecRegUpperPart(%s, + ArmStaticInst::getCurSveVecLen<uint64_t>(xc->tcBase())); + ''' + def mkMemAccMicroOp(name): global header_output, decoder_output, exec_output SPAlignmentCheckCodeNeon = ''' @@ -145,6 +150,8 @@ let {{ 'ea_code' : simd64EnabledCheckCode + eaCode, }, [ 'IsMicroop', 'IsMemRef', 'IsLoad' ]) + loadIop.snippets["memacc_code"] += zeroSveVecRegUpperPartCode % \ + "AA64FpDest" storeIop = InstObjParams(name + 'st', 'MicroNeonStore64', 'MicroNeonMemOp', diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa new file mode 100644 index 000000000..b1b946f63 --- /dev/null +++ b/src/arch/arm/isa/insts/sve.isa @@ -0,0 +1,4798 @@ +// Copyright (c) 2017-2019 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli + +// @file Definition of SVE instructions. + +output header {{ + + // Decodes unary, constructive, predicated (merging) SVE instructions, + // handling signed and unsigned variants. + template <template <typename T> class BaseS, + template <typename T> class BaseU> + StaticInstPtr + decodeSveUnaryPred(unsigned size, unsigned u, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex gp) + { + switch (size) { + case 0: + if (u) { + return new BaseU<uint8_t>(machInst, dest, op1, gp); + } else { + return new BaseS<int8_t>(machInst, dest, op1, gp); + } + case 1: + if (u) { + return new BaseU<uint16_t>(machInst, dest, op1, gp); + } else { + return new BaseS<int16_t>(machInst, dest, op1, gp); + } + case 2: + if (u) { + return new BaseU<uint32_t>(machInst, dest, op1, gp); + } else { + return new BaseS<int32_t>(machInst, dest, op1, gp); + } + case 3: + if (u) { + return new BaseU<uint64_t>(machInst, dest, op1, gp); + } else { + return new BaseS<int64_t>(machInst, dest, op1, gp); + } + default: + return new Unknown64(machInst); + } + } + + // Decodes SVE widening reductions. + // handling signed and unsigned variants. + template <template <typename T1, typename T2> class BaseS, + template <typename T1, typename T2> class BaseU> + StaticInstPtr + decodeSveWideningReduc(unsigned size, unsigned u, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex gp) + { + switch (size) { + case 0: + if (u) { + return new BaseU<uint8_t, uint64_t>(machInst, dest, op1, gp); + } else { + return new BaseS<int8_t, int64_t>(machInst, dest, op1, gp); + } + case 1: + if (u) { + return new BaseU<uint16_t, uint64_t>(machInst, dest, op1, gp); + } else { + return new BaseS<int16_t, int64_t>(machInst, dest, op1, gp); + } + case 2: + if (u) { + return new BaseU<uint32_t, uint64_t>(machInst, dest, op1, gp); + } else { + return new BaseS<int32_t, int64_t>(machInst, dest, op1, gp); + } + case 3: + assert(u); + return new BaseU<uint64_t, uint64_t>(machInst, dest, op1, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes unary, constructive, predicated (merging) SVE instructions, + // handling signed variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveUnaryPredS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, op1, gp); + case 1: + return new Base<int16_t>(machInst, dest, op1, gp); + case 2: + return new Base<int32_t>(machInst, dest, op1, gp); + case 3: + return new Base<int64_t>(machInst, dest, op1, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes unary, constructive, predicated (merging) SVE instructions, + // handling unsigned variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveUnaryPredU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, gp); + case 1: + return new Base<uint16_t>(machInst, dest, op1, gp); + case 2: + return new Base<uint32_t>(machInst, dest, op1, gp); + case 3: + return new Base<uint64_t>(machInst, dest, op1, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes unary, constructive, predicated (merging) SVE instructions, + // handling signed and unsigned variants, for small element sizes (8- to + // 32-bit). + template <template <typename T> class BaseS, + template <typename T> class BaseU> + StaticInstPtr + decodeSveUnaryPredSmall(unsigned size, unsigned u, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex gp) + { + switch (size) { + case 0: + if (u) { + return new BaseU<uint8_t>(machInst, dest, op1, gp); + } else { + return new BaseS<int8_t>(machInst, dest, op1, gp); + } + case 1: + if (u) { + return new BaseU<uint16_t>(machInst, dest, op1, gp); + } else { + return new BaseS<int16_t>(machInst, dest, op1, gp); + } + case 2: + if (u) { + return new BaseU<uint32_t>(machInst, dest, op1, gp); + } else { + return new BaseS<int32_t>(machInst, dest, op1, gp); + } + default: + return new Unknown64(machInst); + } + } + + // Decodes unary, constructive, predicated (merging) SVE instructions, + // handling floating point variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveUnaryPredF(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex gp) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, op1, gp); + case 2: + return new Base<uint32_t>(machInst, dest, op1, gp); + case 3: + return new Base<uint64_t>(machInst, dest, op1, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes unary, constructive, unpredicated SVE instructions, handling + // unsigned variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveUnaryUnpredU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1); + case 1: + return new Base<uint16_t>(machInst, dest, op1); + case 2: + return new Base<uint32_t>(machInst, dest, op1); + case 3: + return new Base<uint64_t>(machInst, dest, op1); + default: + return new Unknown64(machInst); + } + } + + // Decodes unary, constructive, unpredicated SVE instructions, handling + // floating-point variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveUnaryUnpredF(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, op1); + case 2: + return new Base<uint32_t>(machInst, dest, op1); + case 3: + return new Base<uint64_t>(machInst, dest, op1); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary, destructive, predicated (merging) SVE instructions, + // handling signed and unsigned variants. + template <template <typename T> class BaseS, + template <typename T> class BaseU> + StaticInstPtr + decodeSveBinDestrPred(unsigned size, unsigned u, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op2, IntRegIndex gp) + { + switch (size) { + case 0: + if (u) { + return new BaseU<uint8_t>(machInst, dest, op2, gp); + } else { + return new BaseS<int8_t>(machInst, dest, op2, gp); + } + case 1: + if (u) { + return new BaseU<uint16_t>(machInst, dest, op2, gp); + } else { + return new BaseS<int16_t>(machInst, dest, op2, gp); + } + case 2: + if (u) { + return new BaseU<uint32_t>(machInst, dest, op2, gp); + } else { + return new BaseS<int32_t>(machInst, dest, op2, gp); + } + case 3: + if (u) { + return new BaseU<uint64_t>(machInst, dest, op2, gp); + } else { + return new BaseS<int64_t>(machInst, dest, op2, gp); + } + default: + return new Unknown64(machInst); + } + } + + // Decodes binary with immediate operand, constructive, unpredicated + // SVE instructions, handling signed variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinImmUnpredS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, unsigned immediate) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, op1, immediate); + case 1: + return new Base<int16_t>(machInst, dest, op1, immediate); + case 2: + return new Base<int32_t>(machInst, dest, op1, immediate); + case 3: + return new Base<int64_t>(machInst, dest, op1, immediate); + default: + return new Unknown64(machInst); + } + } + + + // Decodes binary with immediate operand, constructive, unpredicated + // SVE instructions, handling unsigned variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinImmUnpredU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, unsigned immediate) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, immediate); + case 1: + return new Base<uint16_t>(machInst, dest, op1, immediate); + case 2: + return new Base<uint32_t>(machInst, dest, op1, immediate); + case 3: + return new Base<uint64_t>(machInst, dest, op1, immediate); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary with immediate operand, destructive, predicated (merging) + // SVE instructions, handling unsigned variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinImmPredU(unsigned size, ExtMachInst machInst, IntRegIndex dest, + unsigned immediate, IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, immediate, gp); + case 1: + return new Base<uint16_t>(machInst, dest, immediate, gp); + case 2: + return new Base<uint32_t>(machInst, dest, immediate, gp); + case 3: + return new Base<uint64_t>(machInst, dest, immediate, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary with immediate operand, destructive, predicated (merging) + // SVE instructions, handling signed variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinImmPredS(unsigned size, ExtMachInst machInst, IntRegIndex dest, + unsigned immediate, IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, immediate, gp); + case 1: + return new Base<int16_t>(machInst, dest, immediate, gp); + case 2: + return new Base<int32_t>(machInst, dest, immediate, gp); + case 3: + return new Base<int64_t>(machInst, dest, immediate, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary with immediate operand, destructive, predicated (merging) + // SVE instructions, handling floating-point variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinImmPredF(unsigned size, ExtMachInst machInst, IntRegIndex dest, + uint64_t immediate, IntRegIndex gp) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, immediate, gp); + case 2: + return new Base<uint32_t>(machInst, dest, immediate, gp); + case 3: + return new Base<uint64_t>(machInst, dest, immediate, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes unary/binary with wide immediate operand, destructive, + // unpredicated SVE instructions, handling unsigned variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveWideImmUnpredU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, uint64_t immediate) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, immediate); + case 1: + return new Base<uint16_t>(machInst, dest, immediate); + case 2: + return new Base<uint32_t>(machInst, dest, immediate); + case 3: + return new Base<uint64_t>(machInst, dest, immediate); + default: + return new Unknown64(machInst); + } + } + + // Decodes unary/binary with wide immediate operand, destructive, + // unpredicated SVE instructions, handling signed variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveWideImmUnpredS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, uint64_t immediate) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, immediate); + case 1: + return new Base<int16_t>(machInst, dest, immediate); + case 2: + return new Base<int32_t>(machInst, dest, immediate); + case 3: + return new Base<int64_t>(machInst, dest, immediate); + default: + return new Unknown64(machInst); + } + } + + // Decodes unary/binary with wide immediate operand, destructive, + // unpredicated SVE instructions, handling floating-point variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveWideImmUnpredF(unsigned size, ExtMachInst machInst, + IntRegIndex dest, uint64_t immediate) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, immediate); + case 2: + return new Base<uint32_t>(machInst, dest, immediate); + case 3: + return new Base<uint64_t>(machInst, dest, immediate); + default: + return new Unknown64(machInst); + } + } + + // Decodes unary/binary with wide immediate operand, destructive, + // predicated SVE instructions, handling unsigned variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveWideImmPredU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, uint64_t immediate, IntRegIndex gp, + bool isMerging = true) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, immediate, gp, + isMerging); + case 1: + return new Base<uint16_t>(machInst, dest, immediate, gp, + isMerging); + case 2: + return new Base<uint32_t>(machInst, dest, immediate, gp, + isMerging); + case 3: + return new Base<uint64_t>(machInst, dest, immediate, gp, + isMerging); + default: + return new Unknown64(machInst); + } + } + + // Decodes unary/binary with wide immediate operand, destructive, + // predicated SVE instructions, handling floating-point variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveWideImmPredF(unsigned size, ExtMachInst machInst, + IntRegIndex dest, uint64_t immediate, IntRegIndex gp) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, immediate, gp); + case 2: + return new Base<uint32_t>(machInst, dest, immediate, gp); + case 3: + return new Base<uint64_t>(machInst, dest, immediate, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary, destructive, predicated (merging) SVE instructions, + // handling unsigned variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinDestrPredU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op2, IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op2, gp); + case 1: + return new Base<uint16_t>(machInst, dest, op2, gp); + case 2: + return new Base<uint32_t>(machInst, dest, op2, gp); + case 3: + return new Base<uint64_t>(machInst, dest, op2, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary, destructive, predicated (merging) SVE instructions, + // handling signed variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinDestrPredS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op2, IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, op2, gp); + case 1: + return new Base<int16_t>(machInst, dest, op2, gp); + case 2: + return new Base<int32_t>(machInst, dest, op2, gp); + case 3: + return new Base<int64_t>(machInst, dest, op2, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary, destructive, predicated (merging) SVE instructions, + // handling floating-point variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinDestrPredF(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op2, IntRegIndex gp) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, op2, gp); + case 2: + return new Base<uint32_t>(machInst, dest, op2, gp); + case 3: + return new Base<uint64_t>(machInst, dest, op2, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary, constructive, predicated SVE instructions, handling + // unsigned variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinConstrPredU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2, + IntRegIndex gp, SvePredType predType) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, op2, gp, predType); + case 1: + return new Base<uint16_t>(machInst, dest, op1, op2, gp, predType); + case 2: + return new Base<uint32_t>(machInst, dest, op1, op2, gp, predType); + case 3: + return new Base<uint64_t>(machInst, dest, op1, op2, gp, predType); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary, constructive, unpredicated SVE instructions. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinUnpred(unsigned size, unsigned u, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) + { + switch (size) { + case 0: + if (u) { + return new Base<uint8_t>(machInst, dest, op1, op2); + } else { + return new Base<int8_t>(machInst, dest, op1, op2); + } + case 1: + if (u) { + return new Base<uint16_t>(machInst, dest, op1, op2); + } else { + return new Base<int16_t>(machInst, dest, op1, op2); + } + case 2: + if (u) { + return new Base<uint32_t>(machInst, dest, op1, op2); + } else { + return new Base<int32_t>(machInst, dest, op1, op2); + } + case 3: + if (u) { + return new Base<uint64_t>(machInst, dest, op1, op2); + } else { + return new Base<int64_t>(machInst, dest, op1, op2); + } + default: + return new Unknown64(machInst); + } + } + + // Decodes binary, constructive, unpredicated SVE instructions. + // Unsigned instructions only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinUnpredU(unsigned size, ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, op2); + case 1: + return new Base<uint16_t>(machInst, dest, op1, op2); + case 2: + return new Base<uint32_t>(machInst, dest, op1, op2); + case 3: + return new Base<uint64_t>(machInst, dest, op1, op2); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary, constructive, unpredicated SVE instructions. + // Signed instructions only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinUnpredS(unsigned size, ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, op1, op2); + case 1: + return new Base<int16_t>(machInst, dest, op1, op2); + case 2: + return new Base<int32_t>(machInst, dest, op1, op2); + case 3: + return new Base<int64_t>(machInst, dest, op1, op2); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary, costructive, unpredicated SVE instructions, handling + // floating-point variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinUnpredF(unsigned size, ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, op1, op2); + case 2: + return new Base<uint32_t>(machInst, dest, op1, op2); + case 3: + return new Base<uint64_t>(machInst, dest, op1, op2); + default: + return new Unknown64(machInst); + } + } + + // Decodes SVE compare instructions - binary, predicated (zeroing), + // generating a predicate - handling floating-point variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveCmpF(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2, + IntRegIndex gp) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, op1, op2, gp); + case 2: + return new Base<uint32_t>(machInst, dest, op1, op2, gp); + case 3: + return new Base<uint64_t>(machInst, dest, op1, op2, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes SVE compare-with-immediate instructions - binary, predicated + // (zeroing), generating a predicate - handling floating-point variants + // only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveCmpImmF(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm, + IntRegIndex gp) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, op1, imm, gp); + case 2: + return new Base<uint32_t>(machInst, dest, op1, imm, gp); + case 3: + return new Base<uint64_t>(machInst, dest, op1, imm, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes ternary, destructive, predicated (merging) SVE instructions. + template <template <typename T> class Base> + StaticInstPtr + decodeSveTerPred(unsigned size, unsigned u, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2, + IntRegIndex gp) + { + switch (size) { + case 0: + if (u) { + return new Base<uint8_t>(machInst, dest, op1, op2, gp); + } else { + return new Base<int8_t>(machInst, dest, op1, op2, gp); + } + case 1: + if (u) { + return new Base<uint16_t>(machInst, dest, op1, op2, gp); + } else { + return new Base<int16_t>(machInst, dest, op1, op2, gp); + } + case 2: + if (u) { + return new Base<uint32_t>(machInst, dest, op1, op2, gp); + } else { + return new Base<int32_t>(machInst, dest, op1, op2, gp); + } + case 3: + if (u) { + return new Base<uint64_t>(machInst, dest, op1, op2, gp); + } else { + return new Base<int64_t>(machInst, dest, op1, op2, gp); + } + default: + return new Unknown64(machInst); + } + } + + // Decodes ternary, destructive, predicated (merging) SVE instructions, + // handling wide signed variants only. XXX: zeroing for CMP instructions. + template <template <typename T> class Base> + StaticInstPtr + decodeSveTerPredWS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2, + IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, op1, op2, gp); + case 1: + return new Base<int16_t>(machInst, dest, op1, op2, gp); + case 2: + return new Base<int32_t>(machInst, dest, op1, op2, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes ternary, destructive, predicated (merging) SVE instructions, + // handling wide unsigned variants only. XXX: zeroing for CMP instructions. + template <template <typename T> class Base> + StaticInstPtr + decodeSveTerPredWU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2, + IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, op2, gp); + case 1: + return new Base<uint16_t>(machInst, dest, op1, op2, gp); + case 2: + return new Base<uint32_t>(machInst, dest, op1, op2, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes ternary, destructive, predicated (merging) SVE instructions, + // handling signed variants only. XXX: zeroing for CMP instructions. + template <template <typename T> class Base> + StaticInstPtr + decodeSveTerPredS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2, + IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, op1, op2, gp); + case 1: + return new Base<int16_t>(machInst, dest, op1, op2, gp); + case 2: + return new Base<int32_t>(machInst, dest, op1, op2, gp); + case 3: + return new Base<int64_t>(machInst, dest, op1, op2, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes ternary, destructive, predicated (merging) SVE instructions, + // handling unsigned variants only. XXX: zeroing for CMP instructions. + template <template <typename T> class Base> + StaticInstPtr + decodeSveTerPredU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2, + IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, op2, gp); + case 1: + return new Base<uint16_t>(machInst, dest, op1, op2, gp); + case 2: + return new Base<uint32_t>(machInst, dest, op1, op2, gp); + case 3: + return new Base<uint64_t>(machInst, dest, op1, op2, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes SVE signed unary extension instructions (8-bit source element + // size) + template <template <typename TS, typename TD> class Base> + StaticInstPtr + decodeSveUnaryExtendFromBPredS(unsigned dsize, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex gp) + { + switch (dsize) { + case 1: + return new Base<int8_t, int16_t>(machInst, dest, op1, gp); + case 2: + return new Base<int8_t, int32_t>(machInst, dest, op1, gp); + case 3: + return new Base<int8_t, int64_t>(machInst, dest, op1, gp); + } + return new Unknown64(machInst); + } + + // Decodes SVE unsigned unary extension instructions (8-bit source element + // size) + template <template <typename TS, typename TD> class Base> + StaticInstPtr + decodeSveUnaryExtendFromBPredU(unsigned dsize, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex gp) + { + switch (dsize) { + case 1: + return new Base<uint8_t, uint16_t>(machInst, dest, op1, gp); + case 2: + return new Base<uint8_t, uint32_t>(machInst, dest, op1, gp); + case 3: + return new Base<uint8_t, uint64_t>(machInst, dest, op1, gp); + } + return new Unknown64(machInst); + } + + // Decodes SVE signed unary extension instructions (16-bit source element + // size) + template <template <typename TS, typename TD> class Base> + StaticInstPtr + decodeSveUnaryExtendFromHPredS(unsigned dsize, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex gp) + { + switch (dsize) { + case 2: + return new Base<int16_t, int32_t>(machInst, dest, op1, gp); + case 3: + return new Base<int16_t, int64_t>(machInst, dest, op1, gp); + } + return new Unknown64(machInst); + } + + // Decodes SVE unsigned unary extension instructions (16-bit source element + // size) + template <template <typename TS, typename TD> class Base> + StaticInstPtr + decodeSveUnaryExtendFromHPredU(unsigned dsize, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex gp) + { + switch (dsize) { + case 2: + return new Base<uint16_t, uint32_t>(machInst, dest, op1, gp); + case 3: + return new Base<uint16_t, uint64_t>(machInst, dest, op1, gp); + } + return new Unknown64(machInst); + } + + // Decodes ternary, destructive, predicated (merging) SVE instructions, + // handling floating-point variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveTerPredF(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2, + IntRegIndex gp) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, op1, op2, gp); + case 2: + return new Base<uint32_t>(machInst, dest, op1, op2, gp); + case 3: + return new Base<uint64_t>(machInst, dest, op1, op2, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes ternary with immediate operand, destructive, unpredicated SVE + // instructions handling floating-point variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveTerImmUnpredF(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op2, uint8_t imm) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, op2, imm); + case 2: + return new Base<uint32_t>(machInst, dest, op2, imm); + case 3: + return new Base<uint64_t>(machInst, dest, op2, imm); + default: + return new Unknown64(machInst); + } + } + + // Decodes SVE PTRUE(S) instructions. + template <template <typename T> class Base> + StaticInstPtr + decodeSvePtrue(unsigned size, ExtMachInst machInst, + IntRegIndex dest, uint8_t imm) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, imm); + case 1: + return new Base<uint16_t>(machInst, dest, imm); + case 2: + return new Base<uint32_t>(machInst, dest, imm); + case 3: + return new Base<uint64_t>(machInst, dest, imm); + default: + return new Unknown64(machInst); + } + } + + // Decodes SVE predicate count instructions, scalar signed variant only + template <template <typename T> class Base> + StaticInstPtr + decodeSvePredCountS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, op1); + case 1: + return new Base<int16_t>(machInst, dest, op1); + case 2: + return new Base<int32_t>(machInst, dest, op1); + case 3: + return new Base<int64_t>(machInst, dest, op1); + default: + return new Unknown64(machInst); + } + } + + // Decodes SVE predicate count instructions, scalar unsigned variant only + template <template <typename T> class Base> + StaticInstPtr + decodeSvePredCountU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1); + case 1: + return new Base<uint16_t>(machInst, dest, op1); + case 2: + return new Base<uint32_t>(machInst, dest, op1); + case 3: + return new Base<uint64_t>(machInst, dest, op1); + default: + return new Unknown64(machInst); + } + } + + // Decodes SVE predicate count instructions, vector signed variant only + template <template <typename T> class Base> + StaticInstPtr + decodeSvePredCountVS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + switch (size) { + case 1: + return new Base<int16_t>(machInst, dest, op1); + case 2: + return new Base<int32_t>(machInst, dest, op1); + case 3: + return new Base<int64_t>(machInst, dest, op1); + default: + return new Unknown64(machInst); + } + } + + // Decodes SVE predicate count instructions, vector unsigned variant only + template <template <typename T> class Base> + StaticInstPtr + decodeSvePredCountVU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, op1); + case 2: + return new Base<uint32_t>(machInst, dest, op1); + case 3: + return new Base<uint64_t>(machInst, dest, op1); + default: + return new Unknown64(machInst); + } + } + + // Decodes ternary with immediate operand, predicated SVE + // instructions handling unsigned variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveTerImmPredU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, int64_t imm, IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, imm, gp); + case 1: + return new Base<uint16_t>(machInst, dest, op1, imm, gp); + case 2: + return new Base<uint32_t>(machInst, dest, op1, imm, gp); + case 3: + return new Base<uint64_t>(machInst, dest, op1, imm, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes ternary with immediate operand, predicated SVE + // instructions handling signed variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveTerImmPredS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, int64_t imm, IntRegIndex gp) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, op1, imm, gp); + case 1: + return new Base<int16_t>(machInst, dest, op1, imm, gp); + case 2: + return new Base<int32_t>(machInst, dest, op1, imm, gp); + case 3: + return new Base<int64_t>(machInst, dest, op1, imm, gp); + default: + return new Unknown64(machInst); + } + } + + // Decodes integer element count SVE instructions, handling + // signed variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveElemIntCountS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, uint8_t pattern, uint8_t imm4) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, pattern, imm4); + case 1: + return new Base<int16_t>(machInst, dest, pattern, imm4); + case 2: + return new Base<int32_t>(machInst, dest, pattern, imm4); + case 3: + return new Base<int64_t>(machInst, dest, pattern, imm4); + default: + return new Unknown64(machInst); + } + } + + // Decodes integer element count SVE instructions, handling + // unsigned variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveElemIntCountU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, uint8_t pattern, uint8_t imm4) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, pattern, imm4); + case 1: + return new Base<uint16_t>(machInst, dest, pattern, imm4); + case 2: + return new Base<uint32_t>(machInst, dest, pattern, imm4); + case 3: + return new Base<uint64_t>(machInst, dest, pattern, imm4); + default: + return new Unknown64(machInst); + } + } + + // Decodes integer element count SVE instructions, handling + // signed variants from 16 to 64 bits only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveElemIntCountLS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, uint8_t pattern, uint8_t imm4) + { + switch (size) { + case 1: + return new Base<int16_t>(machInst, dest, pattern, imm4); + case 2: + return new Base<int32_t>(machInst, dest, pattern, imm4); + case 3: + return new Base<int64_t>(machInst, dest, pattern, imm4); + default: + return new Unknown64(machInst); + } + } + + // Decodes integer element count SVE instructions, handling + // unsigned variants from 16 to 64 bits only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveElemIntCountLU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, uint8_t pattern, uint8_t imm4) + { + switch (size) { + case 1: + return new Base<uint16_t>(machInst, dest, pattern, imm4); + case 2: + return new Base<uint32_t>(machInst, dest, pattern, imm4); + case 3: + return new Base<uint64_t>(machInst, dest, pattern, imm4); + default: + return new Unknown64(machInst); + } + } + + // Decodes SVE unpack instructions. Handling signed variants. + template <template <typename T1, typename T2> class Base> + StaticInstPtr + decodeSveUnpackS(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + switch (size) { + case 1: + return new Base<int8_t, int16_t>(machInst, dest, op1); + case 2: + return new Base<int16_t, int32_t>(machInst, dest, op1); + case 3: + return new Base<int32_t, int64_t>(machInst, dest, op1); + default: + return new Unknown64(machInst); + } + } + + // Decodes SVE unpack instructions. Handling unsigned variants. + template <template <typename T1, typename T2> class Base> + StaticInstPtr + decodeSveUnpackU(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + switch (size) { + case 1: + return new Base<uint8_t, uint16_t>(machInst, dest, op1); + case 2: + return new Base<uint16_t, uint32_t>(machInst, dest, op1); + case 3: + return new Base<uint32_t, uint64_t>(machInst, dest, op1); + default: + return new Unknown64(machInst); + } + } +}}; + +let {{ + + header_output = '' + exec_output = '' + decoders = { 'Generic': {} } + + class PredType: + NONE = 0 + MERGE = 1 + ZERO = 2 + SELECT = 3 + + class CvtDir: + Narrow = 0 + Widen = 1 + + class IndexFormat(object): + ImmImm = 'II' + ImmReg = 'IR' + RegImm = 'RI' + RegReg = 'RR' + + class SrcRegType(object): + Vector = 0 + Scalar = 1 + SimdFpScalar = 2 + Predicate = 3 + + class DstRegType(object): + Vector = 0 + Scalar = 1 + SimdFpScalar = 2 + Predicate = 3 + + class DestType(object): + Scalar = 'false' + Vector = 'true' + + class SrcSize(object): + Src32bit = 'true' + Src64bit = 'false' + + class Break(object): + Before = 0 + After = 1 + + class Unpack(object): + High = 0 + Low = 1 + + # Generates definitions for SVE ADR instructions + def sveAdrInst(name, Name, opClass, types, op): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) { + const Element& srcElem1 = AA64FpOp1_x[i]; + Element srcElem2 = AA64FpOp2_x[i]; + Element destElem = 0; + %(op)s + AA64FpDest_x[i] = destElem; + }''' % {'op': op} + iop = InstObjParams(name, 'Sve' + Name, 'SveAdrOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveAdrOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definition for SVE while predicate generation instructions + def sveWhileInst(name, Name, opClass, types, op, + srcSize = SrcSize.Src64bit): + global header_output, exec_output, decoders + extraPrologCode = ''' + auto& destPred = PDest;''' + if 'int32_t' in types: + srcType = 'int64_t' if srcSize == SrcSize.Src64bit else 'int32_t' + else: + srcType = 'uint64_t' if srcSize == SrcSize.Src64bit else 'uint32_t' + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + %(stype)s srcElem1 = static_cast<%(stype)s>(XOp1); + %(stype)s srcElem2 = static_cast<%(stype)s>(XOp2); + bool cond, first = false, none = true, last = true; + destPred.reset(); + for (unsigned i = 0; i < eCount; i++) { + %(op)s; + last = last && cond; + none = none && !cond; + first = first || (i == 0 && cond); + PDest_x[i] = last; + srcElem1++; + } + CondCodesNZ = (first << 1) | none; + CondCodesC = !last; + CondCodesV = false; + '''%{'op': op, 'stype': srcType} + iop = InstObjParams(name, 'Sve' + Name, 'SveWhileOp', + {'code': code, 'op_class': opClass, 'srcIs32b': srcSize}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SveWhileOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict); + + # Generate definition for SVE compare & terminate instructions + def sveCompTermInst(name, Name, opClass, types, op): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + bool destElem; + Element srcElem1 = static_cast<Element>(XOp1); + Element srcElem2 = static_cast<Element>(XOp2); + %(op)s; + if (destElem) { + CondCodesNZ = CondCodesNZ | 0x2; + CondCodesV = 0; + } else { + CondCodesNZ = CondCodesNZ & ~0x2; + CondCodesV = !CondCodesC; + } + ''' % {'op': op} + iop = InstObjParams(name, 'Sve' + Name, 'SveCompTermOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveCompTermOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, 'class_name': 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict); + + # Generates definition for SVE predicate count instructions + def svePredCountInst(name, Name, opClass, types, op, + destType=DestType.Vector, + srcSize=SrcSize.Src64bit): + global header_output, exec_output, decoders + assert not (destType == DestType.Vector and + srcSize != SrcSize.Src64bit) + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + int count = 0; + for (unsigned i = 0; i < eCount; i++) { + if (GpOp_x[i]) { + count++; + } + }''' + if destType == DestType.Vector: + code += ''' + for (unsigned i = 0; i < eCount; i++) { + Element destElem = 0; + const Element& srcElem = AA64FpDestMerge_x[i]; + %(op)s + AA64FpDest_x[i] = destElem; + }''' % {'op': op} + else: + code += ''' + %(op)s''' % {'op': op} + iop = InstObjParams(name, 'Sve' + Name, 'SvePredCountOp', + {'code': code, 'op_class': opClass, 'srcIs32b': srcSize, + 'destIsVec': destType}, []) + header_output += SvePredCountOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict); + + # Generates definition for SVE predicate count instructions (predicated) + def svePredCountPredInst(name, Name, opClass, types): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + int count = 0; + for (unsigned i = 0; i < eCount; i++) { + if (POp1_x[i] && GpOp_x[i]) { + count++; + } + } + XDest = count; + ''' + iop = InstObjParams(name, 'Sve' + Name, 'SvePredCountPredOp', + {'code': code, 'op_class': opClass}, []) + header_output += SvePredCountPredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definition for SVE Index generation instructions + def sveIndex(fmt): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase());''' + if fmt == IndexFormat.ImmReg or fmt == IndexFormat.ImmImm: + code += ''' + const Element& srcElem1 = imm1;''' + if fmt == IndexFormat.RegImm or fmt == IndexFormat.RegReg: + code += ''' + const Element& srcElem1 = XOp1;''' + if fmt == IndexFormat.RegImm or fmt == IndexFormat.ImmImm: + code += ''' + const Element& srcElem2 = imm2;''' + if fmt == IndexFormat.ImmReg or fmt == IndexFormat.RegReg: + code += ''' + const Element& srcElem2 = XOp2;''' + code +=''' + for (unsigned i = 0; i < eCount; i++) { + AA64FpDest_x[i] = srcElem1 + i * srcElem2; + }''' + iop = InstObjParams('index', 'SveIndex'+fmt, 'SveIndex'+fmt+'Op', + {'code': code, 'op_class': 'SimdAluOp'}) + if fmt == IndexFormat.ImmImm: + header_output += SveIndexIIOpDeclare.subst(iop) + elif fmt == IndexFormat.ImmReg: + header_output += SveIndexIROpDeclare.subst(iop) + elif fmt == IndexFormat.RegImm: + header_output += SveIndexRIOpDeclare.subst(iop) + elif fmt == IndexFormat.RegReg: + header_output += SveIndexRROpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in ['int8_t', 'int16_t', 'int32_t', 'int64_t']: + substDict = {'targs': type, 'class_name': 'SveIndex'+fmt} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for widening unary SVE instructions + # (always constructive) + def sveWidenUnaryInst(name, Name, opClass, types, op, + predType=PredType.NONE, decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<DElement>( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) { + SElement srcElem1 = AA64FpOp1_xd[i]; + DElement destElem = 0;''' + if predType != PredType.NONE: + code += ''' + if (GpOp_xd[i]) { + %(op)s + } else { + destElem = %(dest_elem)s; + }''' % {'op': op, + 'dest_elem': 'AA64FpDestMerge_xd[i]' + if predType == PredType.MERGE + else '0'} + else: + code += ''' + %(op)s''' % {'op': op} + code += ''' + AA64FpDest_xd[i] = destElem; + }''' + iop = InstObjParams(name, 'Sve' + Name, + 'SveUnaryPredOp' if predType != PredType.NONE + else 'SveUnaryUnpredOp', + {'code': code, 'op_class': opClass}, []) + if predType != PredType.NONE: + header_output += SveWideningUnaryPredOpDeclare.subst(iop) + else: + header_output += SveWideningUnaryUnpredOpDeclare.subst(iop) + exec_output += SveWideningOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for unary SVE instructions (always constructive) + def sveUnaryInst(name, Name, opClass, types, op, predType=PredType.NONE, + srcRegType=SrcRegType.Vector, decoder='Generic'): + global header_output, exec_output, decoders + op1 = ('AA64FpOp1_x[i]' if srcRegType == SrcRegType.Vector + else 'XOp1' if srcRegType == SrcRegType.Scalar + else 'AA64FpOp1_x[0]') + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) { + Element srcElem1 = %s; + Element destElem = 0;''' % op1 + if predType != PredType.NONE: + code += ''' + if (GpOp_x[i]) { + %(op)s + } else { + destElem = %(dest_elem)s; + }''' % {'op': op, + 'dest_elem': 'AA64FpDestMerge_x[i]' + if predType == PredType.MERGE + else '0'} + else: + code += ''' + %(op)s''' % {'op': op} + code += ''' + AA64FpDest_x[i] = destElem; + }''' + iop = InstObjParams(name, 'Sve' + Name, + 'SveUnaryPredOp' if predType != PredType.NONE + else 'SveUnaryUnpredOp', + {'code': code, 'op_class': opClass}, []) + if predType != PredType.NONE: + header_output += SveUnaryPredOpDeclare.subst(iop) + else: + header_output += SveUnaryUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for SVE floating-point conversions (always + # unary, constructive, merging + def sveCvtInst(name, Name, opClass, types, op, direction=CvtDir.Narrow, + decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<%(bigElemType)s>( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) { + SElement srcElem1 = AA64FpOp1_x%(bigElemSuffix)s[i] & + mask(sizeof(SElement) * 8); + DElement destElem = 0; + if (GpOp_x%(bigElemSuffix)s[i]) { + %(op)s + AA64FpDest_x%(bigElemSuffix)s[i] = destElem; + } else { + AA64FpDest_x%(bigElemSuffix)s[i] = + AA64FpDestMerge_x%(bigElemSuffix)s[i]; + } + } + ''' % {'op': op, + 'bigElemType': 'SElement' if direction == CvtDir.Narrow + else 'DElement', + 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd'} + iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveWideningUnaryPredOpDeclare.subst(iop) + exec_output += SveWideningOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for associative SVE reductions + def sveAssocReducInst(name, Name, opClass, types, op, identity, + decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecRegContainer tmpVecC; + auto auxOp1 = tmpVecC.as<Element>(); + for (unsigned i = 0; i < eCount; ++i) { + auxOp1[i] = AA64FpOp1_x[i]; + } + Element destElem = %(identity)s; + for (unsigned i = 0; i < eCount; i++) { + AA64FpDest_x[i] = 0; // zero upper part + if (GpOp_x[i]) { + const Element& srcElem1 = auxOp1[i]; + %(op)s + } + } + AA64FpDest_x[0] = destElem; + ''' % {'op': op, 'identity': identity} + iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveReducOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for widening associative SVE reductions + def sveWideningAssocReducInst(name, Name, opClass, types, op, identity, + decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<SElement>( + xc->tcBase()); + unsigned eWideCount = ArmStaticInst::getCurSveVecLen<DElement>( + xc->tcBase()); + DElement destElem = %(identity)s; + for (unsigned i = 0; i < eCount; i++) { + if (GpOp_xs[i]) { + DElement srcElem1 = AA64FpOp1_xs[i]; + %(op)s + } + } + AA64FpDest_xd[0] = destElem; + for (int i = 1; i < eWideCount; i++) { + AA64FpDest_xd[i] = 0; + } + ''' % {'op': op, 'identity': identity} + iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveWideningReducOpDeclare.subst(iop) + exec_output += SveWideningOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for non-associative SVE reductions + def sveNonAssocReducInst(name, Name, opClass, types, op, identity, + decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecRegContainer tmpVecC; + auto tmpVec = tmpVecC.as<Element>(); + int ePow2Count = 1; + while (ePow2Count < eCount) { + ePow2Count *= 2; + } + + for (unsigned i = 0; i < ePow2Count; i++) { + if (i < eCount && GpOp_x[i]) { + tmpVec[i] = AA64FpOp1_x[i]; + } else { + tmpVec[i] = %(identity)s; + } + } + + unsigned n = ePow2Count; + while (n > 1) { + unsigned max = n; + n = 0; + for (unsigned i = 0; i < max; i += 2) { + Element srcElem1 = tmpVec[i]; + Element srcElem2 = tmpVec[i + 1]; + Element destElem = 0; + %(op)s + tmpVec[n] = destElem; + n++; + } + } + AA64FpDest_x[0] = tmpVec[0]; + for (unsigned i = 1; i < eCount; i++) { + AA64FpDest_x[i] = 0; // zero upper part + } + ''' % {'op': op, 'identity': identity} + iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveReducOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for binary SVE instructions with immediate operand + def sveBinImmInst(name, Name, opClass, types, op, predType=PredType.NONE, + decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) {''' + if predType != PredType.NONE: + code += ''' + const Element& srcElem1 = %s;''' % ( + 'AA64FpDestMerge_x[i]' if predType == PredType.MERGE else '0') + else: + code += ''' + const Element& srcElem1 = AA64FpOp1_x[i];''' + code += ''' + Element srcElem2 = imm; + Element destElem = 0;''' + if predType != PredType.NONE: + code += ''' + if (GpOp_x[i]) { + %(op)s + } else { + destElem = %(dest_elem)s; + }''' % {'op': op, + 'dest_elem': 'AA64FpDestMerge_x[i]' + if predType == PredType.MERGE else '0'} + else: + code += ''' + %(op)s''' % {'op': op} + code += ''' + AA64FpDest_x[i] = destElem; + }''' + iop = InstObjParams(name, 'Sve' + Name, + 'SveBinImmPredOp' if predType != PredType.NONE + else 'SveBinImmUnpredConstrOp', + {'code': code, 'op_class': opClass}, []) + if predType != PredType.NONE: + header_output += SveBinImmPredOpDeclare.subst(iop) + else: + header_output += SveBinImmUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for unary and binary SVE instructions with wide + # immediate operand + def sveWideImmInst(name, Name, opClass, types, op, predType=PredType.NONE, + isUnary=False, decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) {''' + # TODO: handle unsigned-to-signed conversion properly... + if isUnary: + code += ''' + Element srcElem1 = imm;''' + else: + code += ''' + const Element& srcElem1 = AA64FpDestMerge_x[i]; + Element srcElem2 = imm;''' + code += ''' + Element destElem = 0;''' + if predType != PredType.NONE: + code += ''' + if (GpOp_x[i]) { + %(op)s + } else { + destElem = %(dest_elem)s; + }''' % {'op': op, + 'dest_elem': 'AA64FpDestMerge_x[i]' + if predType == PredType.MERGE else '0'} + else: + code += ''' + %(op)s''' % {'op': op} + code += ''' + AA64FpDest_x[i] = destElem; + }''' + iop = InstObjParams(name, 'Sve' + Name, + 'Sve%sWideImm%sOp' % ( + 'Unary' if isUnary else 'Bin', + 'Unpred' if predType == PredType.NONE else 'Pred'), + {'code': code, 'op_class': opClass}, []) + if predType == PredType.NONE: + header_output += SveWideImmUnpredOpDeclare.subst(iop) + else: + header_output += SveWideImmPredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for shift SVE instructions with wide elements + def sveShiftByWideElemsInst(name, Name, opClass, types, op, + predType=PredType.NONE, decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecRegContainer tmpVecC; + auto auxOp2 = tmpVecC.as<Element>(); + for (unsigned i = 0; i < eCount; i++) { + auxOp2[i] = AA64FpOp2_ud[i]; + } + for (unsigned i = 0; i < eCount; i++) {''' + if predType != PredType.NONE: + code += ''' + const Element& srcElem1 = AA64FpDestMerge_x[i];''' + else: + code += ''' + const Element& srcElem1 = AA64FpOp1_x[i];''' + code += ''' + const auto& srcElem2 = auxOp2[ + (i * sizeof(Element) * 8) / 64]; + Element destElem = 0;''' + if predType != PredType.NONE: + code += ''' + if (GpOp_x[i]) { + %(op)s + } else { + destElem = %(dest_elem)s; + }''' % {'op': op, + 'dest_elem': 'AA64FpDestMerge_x[i]' + if predType == PredType.MERGE else '0'} + else: + code += ''' + %(op)s''' % {'op': op} + code += ''' + AA64FpDest_x[i] = destElem; + }''' + iop = InstObjParams(name, 'Sve' + Name, + 'SveBinDestrPredOp' if predType != PredType.NONE + else 'SveBinUnpredOp', + {'code': code, 'op_class': opClass}, []) + if predType != PredType.NONE: + header_output += SveBinDestrPredOpDeclare.subst(iop) + else: + header_output += SveBinUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for binary indexed SVE instructions + # (always unpredicated) + def sveBinIdxInst(name, Name, opClass, types, op, decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + + // Number of elements in a 128 bit segment + constexpr unsigned ePerSegment = 128 / sizeof(Element); + + ''' + + code += ''' + for (unsigned i = 0; i < eCount; i++) { + const auto segmentBase = i - i % ePerSegment; + const auto segmentIdx = segmentBase + index; + + const Element& srcElem1 = AA64FpOp1_x[i]; + const Element& srcElem2 = AA64FpOp2_x[segmentIdx]; + Element destElem = 0; + + ''' + + code += ''' + %(op)s + AA64FpDest_x[i] = destElem; + } + ''' % {'op': op} + + baseClass = 'SveBinIdxUnpredOp' + + iop = InstObjParams(name, 'Sve' + Name, baseClass, + {'code': code, 'op_class': opClass}, []) + header_output += SveBinIdxUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for binary SVE instructions + def sveBinInst(name, Name, opClass, types, op, predType=PredType.NONE, + isDestructive=False, customIterCode=None, + decoder='Generic'): + assert not (predType in (PredType.NONE, PredType.SELECT) and + isDestructive) + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase());''' + if customIterCode is None: + code += ''' + for (unsigned i = 0; i < eCount; i++) {''' + if predType == PredType.MERGE: + code += ''' + const Element& srcElem1 = AA64FpDestMerge_x[i];''' + else: + code += ''' + const Element& srcElem1 = AA64FpOp1_x[i];''' + code += ''' + const Element& srcElem2 = AA64FpOp2_x[i]; + Element destElem = 0;''' + if predType != PredType.NONE: + code += ''' + if (GpOp_x[i]) { + %(op)s + } else { + destElem = %(dest_elem)s; + }''' % {'op': op, + 'dest_elem': + 'AA64FpDestMerge_x[i]' if predType == PredType.MERGE + else '0' if predType == PredType.ZERO + else 'srcElem2'} + else: + code += ''' + %(op)s''' % {'op': op} + code += ''' + AA64FpDest_x[i] = destElem; + }''' + else: + code += customIterCode + if predType == PredType.NONE: + baseClass = 'SveBinUnpredOp' + elif isDestructive: + baseClass = 'SveBinDestrPredOp' + else: + baseClass = 'SveBinConstrPredOp' + iop = InstObjParams(name, 'Sve' + Name, baseClass, + {'code': code, 'op_class': opClass}, []) + if predType == PredType.NONE: + header_output += SveBinUnpredOpDeclare.subst(iop) + elif isDestructive: + header_output += SveBinDestrPredOpDeclare.subst(iop) + else: + header_output += SveBinConstrPredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for predicate logical instructions + def svePredLogicalInst(name, Name, opClass, types, op, + predType=PredType.ZERO, isFlagSetting=False, + decoder='Generic'): + global header_output, exec_output, decoders + assert predType in (PredType.ZERO, PredType.SELECT) + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecPredRegContainer tmpPredC; + auto auxGpOp = tmpPredC.as<Element>(); + for (unsigned i = 0; i < eCount; i++) { + auxGpOp[i] = GpOp_x[i]; + } + for (unsigned i = 0; i < eCount; i++) { + bool srcElem1 = POp1_x[i]; + bool srcElem2 = POp2_x[i]; + bool destElem = false; + if (auxGpOp[i]) { + %(op)s + } else { + destElem = %(dest_elem)s; + } + PDest_x[i] = destElem; + }''' % {'op': op, + 'dest_elem': 'false' if predType == PredType.ZERO + else 'srcElem2'} + extraPrologCode = '' + if isFlagSetting: + code += ''' + CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) | + destPred.noneActive(auxGpOp, eCount); + CondCodesC = !destPred.lastActive(auxGpOp, eCount); + CondCodesV = 0;''' + extraPrologCode += ''' + auto& destPred = PDest;''' + iop = InstObjParams(name, 'Sve' + Name, 'SvePredLogicalOp', + {'code': code, 'op_class': opClass}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SvePredLogicalOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for predicate permute instructions + def svePredBinPermInst(name, Name, opClass, types, iterCode, + decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase());''' + code += iterCode + iop = InstObjParams(name, 'Sve' + Name, 'SvePredBinPermOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveBinUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for SVE compare instructions + # NOTE: compares are all predicated zeroing + def sveCmpInst(name, Name, opClass, types, op, isImm=False, + decoder='Generic'): + global header_output, exec_output, decoders + extraPrologCode = ''' + auto& destPred = PDest;''' + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecPredRegContainer tmpPredC; + auto tmpPred = tmpPredC.as<Element>(); + for (unsigned i = 0; i < eCount; ++i) + tmpPred[i] = GpOp_x[i]; + destPred.reset(); + for (unsigned i = 0; i < eCount; i++) { + const Element& srcElem1 = AA64FpOp1_x[i]; + %(src_elem_2_ty)s srcElem2 __attribute__((unused)) = + %(src_elem_2)s; + bool destElem = false; + if (tmpPred[i]) { + %(op)s + } else { + destElem = false; + } + PDest_x[i] = destElem; + }''' % {'op': op, + 'src_elem_2_ty': 'Element' if isImm else 'const Element&', + 'src_elem_2': 'imm' if isImm else 'AA64FpOp2_x[i]'} + iop = InstObjParams(name, 'Sve' + Name, + 'SveCmpImmOp' if isImm else 'SveCmpOp', + {'code': code, 'op_class': opClass}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + if isImm: + header_output += SveCmpImmOpDeclare.subst(iop) + else: + header_output += SveCmpOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for ternary SVE intructions (always predicated - + # merging) + def sveTerInst(name, Name, opClass, types, op, decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) { + const Element& srcElem1 = AA64FpOp1_x[i]; + const Element& srcElem2 = AA64FpOp2_x[i]; + Element destElem = AA64FpDestMerge_x[i]; + if (GpOp_x[i]) { + %(op)s + } + AA64FpDest_x[i] = destElem; + }''' % {'op': op} + iop = InstObjParams(name, 'Sve' + Name, 'SveTerPredOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveTerPredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for ternary SVE instructions with indexed operand + def sveTerIdxInst(name, Name, opClass, types, op, decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + + // Number of elements in a 128 bit segment + constexpr unsigned ePerSegment = 128 / sizeof(Element); + + for (unsigned i = 0; i < eCount; i++) { + const auto segmentBase = i - i % ePerSegment; + const auto segmentIdx = segmentBase + index; + + const Element& srcElem1 = AA64FpOp1_x[i]; + const Element& srcElem2 = AA64FpOp2_x[segmentIdx]; + Element destElem = AA64FpDestMerge_x[i]; + ''' + + code += ''' + %(op)s + AA64FpDest_x[i] = destElem; + }''' % {'op': op} + + iop = InstObjParams(name, 'Sve' + Name, 'SveBinIdxUnpredOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveBinIdxUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for ternary SVE intructions with immediate operand + # (always unpredicated) + def sveTerImmInst(name, Name, opClass, types, op, decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) { + const Element& srcElem2 = AA64FpOp2_x[i]; + Element srcElem3 = imm; + Element destElem = AA64FpDestMerge_x[i]; + %(op)s + AA64FpDest_x[i] = destElem; + }''' % {'op': op} + iop = InstObjParams(name, 'Sve' + Name, 'SveTerImmUnpredOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveTerImmUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generates definitions for PTRUE and PTRUES instructions. + def svePtrueInst(name, Name, opClass, types, isFlagSetting=False, + decoder='Generic'): + global header_output, exec_output, decoders + extraPrologCode = ''' + auto& destPred = PDest;''' + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + unsigned predCount = sveDecodePredCount(imm, eCount); + destPred.reset(); + for (unsigned i = 0; i < eCount; i++) { + PDest_x[i] = (i < predCount); + }''' + if isFlagSetting: + code += ''' + CondCodesNZ = (destPred.firstActive(destPred, eCount) << 1) | + destPred.noneActive(destPred, eCount); + CondCodesC = !destPred.lastActive(destPred, eCount); + CondCodesV = 0;''' + iop = InstObjParams(name, 'Sve' + Name, 'SvePtrueOp', + {'code': code, 'op_class': opClass}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SvePtrueOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definitions for integer CMP<cc> instructions + def sveIntCmpInst(name, Name, opClass, types, op, wideop = False, + decoder = 'Generic'): + global header_output, exec_output, decoders + signed = 'int8_t' in types + srcType = 'Element' + op2Suffix = 'x' + if wideop: + srcType = 'int64_t' if signed else 'uint64_t' + op2Suffix = 'sd' if signed else 'ud' + extraPrologCode = ''' + auto& destPred = PDest;''' + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecPredRegContainer tmpPredC; + auto tmpPred = tmpPredC.as<Element>(); + for (unsigned i = 0; i < eCount; ++i) + tmpPred[i] = GpOp_x[i]; + destPred.reset(); + for (unsigned i = 0; i < eCount; ++i) { + %(srcType)s srcElem1 = (%(srcType)s) AA64FpOp1_x[i]; + %(srcType)s srcElem2 = AA64FpOp2_%(op2Suffix)s[%(op2Index)s]; + bool destElem = false; + if (tmpPred[i]) { + %(op)s + } + PDest_x[i] = destElem; + } + CondCodesNZ = (destPred.firstActive(tmpPred, eCount) << 1) | + destPred.noneActive(tmpPred, eCount); + CondCodesC = !destPred.lastActive(tmpPred, eCount); + CondCodesV = 0;''' % { + 'op': op, + 'srcType': srcType, + 'op2Suffix': op2Suffix, + 'op2Index': '(i * sizeof(Element)) / 8' if wideop else 'i' + } + iop = InstObjParams(name, 'Sve' + Name, 'SveIntCmpOp', + { + 'code': code, + 'op_class': opClass, + 'op2IsWide': 'true' if wideop else 'false', + }, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SveIntCmpOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definitions for integer CMP<cc> instructions (with immediate) + def sveIntCmpImmInst(name, Name, opClass, types, op, decoder = 'Generic'): + global header_output, exec_output, decoders + extraPrologCode = ''' + auto& destPred = PDest;''' + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecPredRegContainer tmpPredC; + auto tmpPred = tmpPredC.as<Element>(); + for (unsigned i = 0; i < eCount; ++i) + tmpPred[i] = GpOp_x[i]; + destPred.reset(); + for (unsigned i = 0; i < eCount; ++i) { + Element srcElem1 = AA64FpOp1_x[i]; + Element srcElem2 = static_cast<Element>(imm); + bool destElem = false; + if (tmpPred[i]) { + %(op)s + } + PDest_x[i] = destElem; + } + CondCodesNZ = (destPred.firstActive(tmpPred, eCount) << 1) | + destPred.noneActive(tmpPred, eCount); + CondCodesC = !destPred.lastActive(tmpPred, eCount); + CondCodesV = 0;'''%{'op': op} + iop = InstObjParams(name, 'Sve' + Name, 'SveIntCmpImmOp', + {'code': code, 'op_class': opClass,}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SveIntCmpImmOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definitions for SVE element count instructions + def sveElemCountInst(name, Name, opClass, types, op, + destType = DestType.Scalar, dstIs32b = False, + dstAcc = True, decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + unsigned count = sveDecodePredCount(pattern, eCount); + ''' + if destType == DestType.Vector: + code += ''' + for (unsigned i = 0; i < eCount; ++i) { + Element srcElem1 = AA64FpDestMerge_x[i]; + Element destElem = 0; + %(op)s + AA64FpDest_x[i] = destElem; + }'''%{'op': op} + else: + if 'uint16_t' in types: + if dstIs32b: + dstType = 'uint32_t' + else: + dstType = 'uint64_t' + else: + if dstIs32b: + dstType = 'int32_t' + else: + dstType = 'int64_t' + if dstAcc: + code += ''' + %(dstType)s srcElem1 = XDest; + '''%{'dstType': dstType} + code += ''' + %(dstType)s destElem = 0; + %(op)s; + XDest = destElem; + '''%{'op': op, 'dstType': dstType} + iop = InstObjParams(name, 'Sve' + Name, 'SveElemCountOp', + {'code': code, 'op_class': opClass, 'dstIsVec': destType, + 'dstIs32b': 'true' if dstIs32b else 'false'}, []) + header_output += SveElemCountOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict); + + def svePartBrkInst(name, Name, opClass, isFlagSetting, predType, whenBrk, + decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>( + xc->tcBase()); + bool dobreak = false; + TheISA::VecPredRegContainer tmpPredC; + auto auxGpOp = tmpPredC.as<uint8_t>(); + for (unsigned i = 0; i < eCount; ++i) { + auxGpOp[i] = GpOp_ub[i]; + } + for (unsigned i = 0; i < eCount; ++i) { + bool element = POp1_ub[i] == 1; + if (auxGpOp[i]) {''' + breakCode = ''' + dobreak = dobreak || element;''' + if whenBrk == Break.Before: + code += breakCode + code += ''' + PDest_ub[i] = !dobreak;''' + if whenBrk == Break.After: + code += breakCode + code += ''' + }''' + if predType == PredType.ZERO: + code += ''' else { + PDest_ub[i] = 0; + }''' + elif predType == PredType.MERGE: + code += ''' else { + PDest_ub[i] = PDestMerge_ub[i]; + }''' + code += ''' + }''' + extraPrologCode = '' + if isFlagSetting: + code += ''' + CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) | + destPred.noneActive(auxGpOp, eCount); + CondCodesC = !destPred.lastActive(auxGpOp, eCount); + CondCodesV = 0;''' + extraPrologCode += ''' + auto& destPred = PDest;''' + iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkOp', + {'code': code, 'op_class': opClass, + 'isMerging': 'true' if predType == PredType.MERGE + else 'false'}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SvePartBrkOpDeclare.subst(iop) + exec_output += SveNonTemplatedOpExecute.subst(iop) + + def svePartBrkPropPrevInst(name, Name, opClass, isFlagSetting, whenBrk, + decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>( + xc->tcBase()); + bool last = POp1_ub.lastActive(GpOp_ub, eCount); + TheISA::VecPredRegContainer tmpPredC; + auto auxGpOp = tmpPredC.as<uint8_t>(); + for (unsigned i = 0; i < eCount; ++i) { + auxGpOp[i] = GpOp_ub[i]; + } + for (unsigned i = 0; i < eCount; ++i) { + if (auxGpOp[i]) {''' + breakCode = ''' + last = last && (POp2_ub[i] == 0);''' + if whenBrk == Break.Before: + code += breakCode + code += ''' + PDest_ub[i] = last;''' + if whenBrk == Break.After: + code += breakCode + code += ''' + } else { + PDest_ub[i] = 0; + } + }''' + extraPrologCode = '' + if isFlagSetting: + code += ''' + CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) | + destPred.noneActive(auxGpOp, eCount); + CondCodesC = !destPred.lastActive(auxGpOp, eCount); + CondCodesV = 0;''' + extraPrologCode += ''' + auto& destPred = PDest;''' + iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkPropOp', + {'code': code, 'op_class': opClass}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SvePartBrkPropOpDeclare.subst(iop) + exec_output += SveNonTemplatedOpExecute.subst(iop) + + def svePartBrkPropNextInst(name, Name, opClass, isFlagSetting, + decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>( + xc->tcBase()); + bool last = POp1_ub.lastActive(GpOp_ub, eCount); + for (unsigned i = 0; i < eCount; i++) { + if (!last) { + PDest_ub[i] = 0; + } else { + PDest_ub[i] = PDestMerge_ub[i]; + } + }''' + extraPrologCode = '' + if isFlagSetting: + code += ''' + VecPredRegT<uint8_t, MaxSveVecLenInBytes, false, false>::Container c; + VecPredRegT<uint8_t, MaxSveVecLenInBytes, false, false> predOnes(c); + for (unsigned i = 0; i < eCount; i++) { + predOnes[i] = 1; + } + CondCodesNZ = (destPred.firstActive(predOnes, eCount) << 1) | + destPred.noneActive(predOnes, eCount); + CondCodesC = !destPred.lastActive(predOnes, eCount); + CondCodesV = 0;''' + extraPrologCode += ''' + auto& destPred = PDest;''' + iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkPropOp', + {'code': code, 'op_class': opClass}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SvePartBrkPropOpDeclare.subst(iop) + exec_output += SveNonTemplatedOpExecute.subst(iop) + + # Generate definitions for scalar select instructions + def sveSelectInst(name, Name, opClass, types, op, isCond, + destType = DstRegType.Scalar, decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + int last; + for (last = eCount - 1; last >= 0; --last) { + if (GpOp_x[last]) { + break; + } + } + ''' + if isCond: + code += ''' + if (last >= 0) {''' + code += ''' + Element destElem; + %(op)s'''%{'op': op} + if destType == DstRegType.Vector: + code += ''' + for (unsigned i = 0; i < eCount; ++i) + AA64FpDest_x[i] = destElem;''' + elif destType == DstRegType.Scalar: + code += ''' + XDest = destElem;''' + elif destType == DstRegType.SimdFpScalar: + code += ''' + AA64FpDest_x[0] = destElem;''' + if isCond: + code += ''' + }''' + if destType == DstRegType.Scalar: + code += ''' else { + XDest = (Element) XDest; + }''' + elif destType == DstRegType.Vector: + code += ''' else { + for (unsigned i = 0; i < eCount; ++i) + AA64FpDest_x[i] = AA64FpDestMerge_x[i]; + }''' + elif destType == DstRegType.SimdFpScalar: + code += ''' else { + AA64FpDest_x[0] = AA64FpDestMerge_x[0]; + }''' + iop = InstObjParams(name, 'Sve' + Name, 'SveSelectOp', + {'code': code, 'op_class': opClass, + 'isCond': 'true' if isCond else 'false', + 'isScalar': 'true' + if destType == DstRegType.Scalar else 'false', + 'isSimdFp': 'true' + if destType == DstRegType.SimdFpScalar + else 'false'}, + []) + header_output += SveSelectOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definitions for PNEXT (find next active predicate) + # instructions + def svePNextInst(name, Name, opClass, types, decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecPredRegContainer tmpPredC; + auto auxGpOp = tmpPredC.as<Element>(); + for (unsigned i = 0; i < eCount; ++i) { + auxGpOp[i] = GpOp_x[i]; + } + int last; + for (last = eCount - 1; last >= 0; --last) { + if (POp1_x[last]) { + break; + } + } + int next = last + 1; + while (next < eCount && GpOp_x[next] == 0) { + next++; + } + destPred.reset(); + if (next < eCount) { + PDest_x[next] = 1; + } + CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) | + destPred.noneActive(auxGpOp, eCount); + CondCodesC = !destPred.lastActive(auxGpOp, eCount); + CondCodesV = 0;''' + extraPrologCode = ''' + auto& destPred = PDest;''' + iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredPredOp', + {'code': code, 'op_class': opClass}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SveUnaryPredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definitions for PFIRST (set first active predicate) + # instructions + def svePFirstInst(name, Name, opClass, decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecPredRegContainer tmpPredC; + auto auxGpOp = tmpPredC.as<Element>(); + for (unsigned i = 0; i < eCount; ++i) + auxGpOp[i] = GpOp_x[i]; + int first = -1; + for (int i = 0; i < eCount; ++i) { + if (auxGpOp[i] && first == -1) { + first = i; + } + } + for (int i = 0; i < eCount; ++i) { + PDest_x[i] = PDestMerge_x[i]; + } + if (first >= 0) { + PDest_x[first] = 1; + } + CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) | + destPred.noneActive(auxGpOp, eCount); + CondCodesC = !destPred.lastActive(auxGpOp, eCount); + CondCodesV = 0;''' + extraPrologCode = ''' + auto& destPred = PDest;''' + iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredPredOp', + {'code': code, 'op_class': opClass}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SveUnaryPredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + substDict = {'targs' : 'uint8_t', + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definitions for SVE TBL instructions + def sveTblInst(name, Name, opClass, decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + for (int i = 0; i < eCount; ++i) { + Element idx = AA64FpOp2_x[i]; + Element val; + if (idx < eCount) { + val = AA64FpOp1_x[idx]; + } else { + val = 0; + } + AA64FpDest_x[i] = val; + }''' + iop = InstObjParams(name, 'Sve' + Name, 'SveTblOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveBinUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in unsignedTypes: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definitions for SVE Unpack instructions + def sveUnpackInst(name, Name, opClass, sdtypes, unpackHalf, + regType, decoder = 'Generic'): + global header_output, exec_output, decoders + extraPrologCode = ''' + auto& destPred = PDest;''' + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<DElement>( + xc->tcBase());''' + if unpackHalf == Unpack.Low: + if regType == SrcRegType.Predicate: + code += ''' + TheISA::VecPredRegContainer tmpPredC; + auto auxPOp1 = tmpPredC.as<SElement>(); + for (int i = 0; i < eCount; ++i) { + auxPOp1[i] = POp1_xs[i]; + }''' + else: + code += ''' + TheISA::VecRegContainer tmpVecC; + auto auxOp1 = tmpVecC.as<SElement>(); + for (int i = 0; i < eCount; ++i) { + auxOp1[i] = AA64FpOp1_xs[i]; + }''' + code += ''' + for (int i = 0; i < eCount; ++i) {''' + if regType == SrcRegType.Predicate: + if unpackHalf == Unpack.High: + code +=''' + const SElement& srcElem1 = POp1_xs[i + eCount];''' + else: + code +=''' + const SElement& srcElem1 = auxPOp1[i];''' + code += ''' + destPred.set_raw(i, 0); + PDest_xd[i] = srcElem1;''' + else: + if unpackHalf == Unpack.High: + code +=''' + const SElement& srcElem1 = AA64FpOp1_xs[i + eCount];''' + else: + code +=''' + const SElement& srcElem1 = auxOp1[i];''' + code += ''' + AA64FpDest_xd[i] = static_cast<DElement>(srcElem1);''' + code += ''' + } + ''' + iop = InstObjParams(name, 'Sve' + Name, 'SveUnpackOp', + {'code': code, 'op_class': opClass}, []) + if regType == SrcRegType.Predicate: + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SveUnpackOpDeclare.subst(iop) + exec_output += SveWideningOpExecute.subst(iop) + for srcType, dstType in sdtypes: + substDict = {'targs': srcType + ', ' + dstType, + 'class_name': 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definition for SVE predicate test instructions + def svePredTestInst(name, Name, opClass, decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>( + xc->tcBase()); + CondCodesNZ = (POp1_ub.firstActive(GpOp_ub, eCount) << 1) | + POp1_ub.noneActive(GpOp_ub, eCount); + CondCodesC = !POp1_ub.lastActive(GpOp_ub, eCount); + CondCodesV = 0;''' + iop = InstObjParams(name, 'Sve' + Name, 'SvePredTestOp', + {'code': code, 'op_class': opClass}, []) + header_output += SvePredicateTestOpDeclare.subst(iop) + exec_output += SveNonTemplatedOpExecute.subst(iop) + + # Generate definition for SVE predicate compact operations + def sveCompactInst(name, Name, opClass, types, decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecRegContainer tmpVecC; + auto auxOp1 = tmpVecC.as<Element>(); + for (unsigned i = 0; i < eCount; ++i) { + auxOp1[i] = AA64FpOp1_x[i]; + } + unsigned x = 0; + for (unsigned i = 0; i < eCount; ++i) { + AA64FpDest_x[i] = 0; + if (GpOp_x[i]) { + AA64FpDest_x[x] = auxOp1[i]; + x++; + } + }''' + iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveUnaryPredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs': type, 'class_name': 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definition for unary SVE predicate instructions with implicit + # source operand (PFALSE, RDFFR(S)) + def svePredUnaryWImplicitSrcInst(name, Name, opClass, op, + predType=PredType.NONE, isFlagSetting=False, decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + op + if isFlagSetting: + code += ''' + CondCodesNZ = (destPred.firstActive(GpOp, eCount) << 1) | + destPred.noneActive(GpOp, eCount); + CondCodesC = !destPred.lastActive(GpOp, eCount); + CondCodesV = 0;''' + extraPrologCode = ''' + auto& destPred M5_VAR_USED = PDest;''' + baseClass = ('SvePredUnaryWImplicitSrcOp' if predType == PredType.NONE + else 'SvePredUnaryWImplicitSrcPredOp') + iop = InstObjParams(name, 'Sve' + Name, baseClass, + {'code': code, 'op_class': opClass}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + if predType == PredType.NONE: + header_output += SvePredUnaryOpWImplicitSrcDeclare.subst(iop) + else: + header_output += SvePredUnaryPredOpWImplicitSrcDeclare.subst(iop) + exec_output += SveNonTemplatedOpExecute.subst(iop) + + # Generate definition for SVE instructions writing to the FFR (SETFFR, + # WRFFR) + def svePredWriteFfrInst(name, Name, opClass, op, isSetFfr, + decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + op + extraPrologCode = ''' + auto& destPred M5_VAR_USED = Ffr;''' + baseClass = ('SveWImplicitSrcDstOp' if isSetFfr + else 'SvePredUnaryWImplicitDstOp') + iop = InstObjParams(name, 'Sve' + Name, baseClass, + {'code': code, 'op_class': opClass}, []) + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + if isSetFfr: + header_output += SveOpWImplicitSrcDstDeclare.subst(iop) + else: + header_output += SvePredUnaryOpWImplicitDstDeclare.subst(iop) + exec_output += SveNonTemplatedOpExecute.subst(iop) + + # Generate definition for SVE Ext instruction + def sveExtInst(name, Name, opClass, decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecRegContainer tmpVecC; + auto auxOp1 = tmpVecC.as<Element>(); + for (unsigned i = 0; i < eCount; ++i) { + auxOp1[i] = AA64FpOp1_x[i]; + } + uint64_t pos = imm; + if (pos >= eCount) + pos = 0; + for (int i = 0; i < eCount; ++i, ++pos) + { + if (pos < eCount) + AA64FpDest_x[i] = AA64FpDestMerge_x[pos]; + else + AA64FpDest_x[i] = auxOp1[pos-eCount]; + } + ''' + iop = InstObjParams(name, 'Sve' + Name, 'SveBinImmUnpredDestrOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveBinImmUnpredOpDeclare.subst(iop); + exec_output += SveOpExecute.subst(iop) + substDict = {'targs': 'uint8_t', 'class_name': 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definition for SVE Slice instruction + def sveSpliceInst(name, Name, opClass, types, decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + TheISA::VecRegContainer tmpVecC; + auto auxDest = tmpVecC.as<Element>(); + int firstelem = -1, lastelem = -2; + for (int i = 0; i < eCount; ++i) { + if (GpOp_x[i]) { + lastelem = i; + if (firstelem < 0) + firstelem = i; + } + } + int x = 0; + for (int i = firstelem; i <= lastelem; ++i, ++x) { + auxDest[x] = AA64FpDestMerge_x[i]; + } + int remaining = eCount - x; + for (int i = 0; i < remaining; ++i, ++x) { + auxDest[x] = AA64FpOp2_x[i]; + } + for (int i = 0; i < eCount; ++i) { + AA64FpDest_x[i] = auxDest[i]; + } + ''' + iop = InstObjParams(name, 'Sve' + Name, 'SveBinDestrPredOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveBinDestrPredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs': type, 'class_name': 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definition for SVE DUP (index) instruction + def sveDupIndexInst(name, Name, opClass, types, decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + Element srcElem1 = 0; + if (imm < eCount) { + srcElem1 = AA64FpOp1_x[imm]; + } + for (int i = 0; i < eCount; ++i) { + AA64FpDest_x[i] = srcElem1; + }''' + iop = InstObjParams(name, 'Sve' + Name, 'SveBinImmIdxUnpredOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveBinImmUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs': type, 'class_name': 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definition for SVE reverse elements instructions + def sveReverseElementsInst(name, Name, opClass, types, + srcType = SrcRegType.Vector, decoder = 'Generic'): + assert srcType in (SrcRegType.Vector, SrcRegType.Predicate) + global header_output, exec_output, decoders + extraPrologCode = ''' + auto& destPred = PDest;''' + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase());''' + if srcType == SrcRegType.Predicate: + code += ''' + TheISA::VecPredRegContainer tmpPredC; + auto auxPOp1 = tmpPredC.as<Element>(); + for (unsigned i = 0; i < eCount; ++i) { + uint8_t v = POp1_x.get_raw(i); + auxPOp1.set_raw(i, v); + } + PDest_x[0] = 0;''' + else: + code += ''' + TheISA::VecRegContainer tmpRegC; + auto auxOp1 = tmpRegC.as<Element>(); + for (unsigned i = 0; i < eCount; ++i) { + auxOp1[i] = AA64FpOp1_x[i]; + }''' + code += ''' + for (int i = 0; i < eCount; ++i) {''' + if srcType == SrcRegType.Vector: + code += ''' + AA64FpDest_x[i] = auxOp1[eCount - i - 1];''' + else: + code += ''' + destPred.set_raw(i, auxPOp1.get_raw(eCount - i - 1));''' + code += ''' + }''' + iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryUnpredOp', + {'code': code, 'op_class': opClass}, []) + if srcType == SrcRegType.Predicate: + iop.snippets['code'] = extraPrologCode + iop.snippets['code'] + header_output += SveUnaryUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs': type, 'class_name': 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definition for shift & insert instructions + def sveShiftAndInsertInst(name, Name, opClass, types, + srcType = SrcRegType.Scalar, decoder = 'Generic'): + assert srcType in (SrcRegType.SimdFpScalar, SrcRegType.Scalar) + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase());''' + if srcType == SrcRegType.Scalar: + code += ''' + auto& srcElem1 = XOp1;''' + elif srcType == SrcRegType.SimdFpScalar: + code += ''' + auto& srcElem1 = AA64FpOp1_x[0];''' + code += ''' + for (int i = eCount - 1; i > 0; --i) { + AA64FpDest_x[i] = AA64FpDestMerge_x[i-1]; + } + AA64FpDest_x[0] = srcElem1;''' + iop = InstObjParams(name, 'Sve' + Name, 'SveUnarySca2VecUnpredOp', + {'code': code, 'op_class': opClass, + 'isSimdFp': 'true' if srcType == SrcRegType.SimdFpScalar + else 'false'}, []) + header_output += SveShiftAndInsertOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs': type, 'class_name': 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definition for DOT instructions + def sveDotInst(name, Name, opClass, types, isIndexed = True): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + for (int i = 0; i < eCount; ++i) {''' + if isIndexed: + code += ''' + int segbase = i - i % (16 / sizeof(Element)); + int s = segbase + imm;''' + code += ''' + DElement res = AA64FpDest_xd[i]; + DElement srcElem1, srcElem2; + for (int j = 0; j <= 3; ++j) { + srcElem1 = static_cast<DElement>(AA64FpOp1_xs[4 * i + j]);''' + if isIndexed: + code += ''' + srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * s + j]);''' + else: + code += ''' + srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * i + j]);''' + code += ''' + res += srcElem1 * srcElem2; + } + AA64FpDestMerge_xd[i] = res; + }''' + iop = InstObjParams(name, 'Sve' + Name, + 'SveDotProdIdxOp' if isIndexed else + 'SveDotProdOp', + {'code': code, 'op_class': opClass}, []) + if isIndexed: + header_output += SveWideningTerImmOpDeclare.subst(iop) + else: + header_output += SveWideningTerOpDeclare.subst(iop) + exec_output += SveWideningOpExecute.subst(iop) + for type in types: + substDict = {'targs': type, 'class_name': 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definition for ordered reduction + def sveOrderedReduction(name, Name, opClass, types, op, + decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase());''' + code += ''' + Element destElem = AA64FpDestMerge_x[0]; + for (int i = 0; i < eCount; ++i) { + if (GpOp_x[i]) { + Element srcElem1 = AA64FpOp1_x[i]; + %(op)s + } + } + for (int i = 1; i < eCount; ++i) { + AA64FpDest_x[i] = 0; + } + AA64FpDest_x[0] = destElem;'''%{'op': op} + iop = InstObjParams(name, 'Sve' + Name, 'SveOrdReducOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveReducOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definitions for complex addition instructions + def sveComplexAddInst(name, Name, opClass, types, + decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase());''' + code += ''' + bool sub_i = (rot == 1); + bool sub_r = (rot == 3); + for (int i = 0; i < eCount / 2; ++i) { + Element acc_r = AA64FpOp1_x[2 * i]; + Element acc_i = AA64FpOp1_x[2 * i + 1]; + Element elt2_r = AA64FpOp2_x[2 * i]; + Element elt2_i = AA64FpOp2_x[2 * i + 1]; + + FPSCR fpscr; + if (GpOp_x[2 * i]) { + if (sub_i) { + elt2_i = fplibNeg<Element>(elt2_i); + } + fpscr = (FPSCR) FpscrExc; + acc_r = fplibAdd<Element>(acc_r, elt2_i, fpscr); + FpscrExc = fpscr; + } + if (GpOp_x[2 * i + 1]) { + if (sub_r) { + elt2_r = fplibNeg<Element>(elt2_r); + } + fpscr = (FPSCR) FpscrExc; + acc_i = fplibAdd<Element>(acc_i, elt2_r, fpscr); + FpscrExc = fpscr; + } + + AA64FpDest_x[2 * i] = acc_r; + AA64FpDest_x[2 * i + 1] = acc_i; + } + ''' + iop = InstObjParams(name, 'Sve' + Name, 'SveComplexOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveComplexOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definitions for complex multiply and accumulate instructions + def sveComplexMulAddInst(name, Name, opClass, types, + predType=PredType.NONE, decoder='Generic'): + assert predType in (PredType.NONE, PredType.MERGE) + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase());''' + code += ''' + uint32_t sel_a = rot & 0x1; + uint32_t sel_b = sel_a ? 0 : 1; + bool neg_i = (rot & 0x2) == 1; + bool neg_r = (rot & 0x1) != (rot & 0x2);''' + if predType == PredType.NONE: + code += ''' + uint32_t eltspersegment = 16 / (2 * sizeof(Element));''' + code += ''' + for (int i = 0; i < eCount / 2; ++i) {''' + if predType == PredType.NONE: + code += ''' + uint32_t segmentbase = i - (i % eltspersegment); + uint32_t s = segmentbase + imm;''' + else: + code += ''' + uint32_t s = i;''' + code += ''' + Element addend_r = AA64FpDestMerge_x[2 * i]; + Element addend_i = AA64FpDestMerge_x[2 * i + 1]; + Element elt1_a = AA64FpOp1_x[2 * i + sel_a]; + Element elt2_a = AA64FpOp2_x[2 * s + sel_a]; + Element elt2_b = AA64FpOp2_x[2 * s + sel_b]; + FPSCR fpscr; + ''' + if predType != PredType.NONE: + code += ''' + if (GpOp_x[2 * i]) {''' + code += ''' + if (neg_r) { + elt2_a = fplibNeg<Element>(elt2_a); + } + fpscr = (FPSCR) FpscrExc; + addend_r = fplibMulAdd<Element>(addend_r, elt1_a, elt2_a, fpscr); + FpscrExc = fpscr;''' + if predType != PredType.NONE: + code += ''' + }''' + if predType != PredType.NONE: + code += ''' + if (GpOp_x[2 * i + 1]) {''' + code += ''' + if (neg_i) { + elt2_b = fplibNeg<Element>(elt2_b); + } + fpscr = (FPSCR) FpscrExc; + addend_i = fplibMulAdd<Element>(addend_i, elt1_a, elt2_b, fpscr); + FpscrExc = fpscr;''' + if predType != PredType.NONE: + code += ''' + }''' + code += ''' + AA64FpDest_x[2 * i] = addend_r; + AA64FpDest_x[2 * i + 1] = addend_i; + }''' + iop = InstObjParams(name, 'Sve' + Name, + 'SveComplexIdxOp' if predType == PredType.NONE + else 'SveComplexOp', + {'code': code, 'op_class': opClass}, []) + if predType == PredType.NONE: + header_output += SveComplexIndexOpDeclare.subst(iop) + else: + header_output += SveComplexOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + fpTypes = ('uint16_t', 'uint32_t', 'uint64_t') + signedTypes = ('int8_t', 'int16_t', 'int32_t', 'int64_t') + unsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t') + + smallSignedTypes = ('int8_t', 'int16_t', 'int32_t') + bigSignedTypes = ('int16_t', 'int32_t', 'int64_t') + smallUnsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t') + bigUnsignedTypes = ('uint16_t', 'uint32_t', 'uint64_t') + + unsignedWideSDTypes = (('uint8_t', 'uint16_t'), + ('uint16_t', 'uint32_t'), ('uint32_t', 'uint64_t')) + signedWideSDTypes = (('int8_t', 'int16_t'), + ('int16_t', 'int32_t'), ('int32_t', 'int64_t')) + + # ABS + absCode = 'destElem = (Element) std::abs(srcElem1);' + sveUnaryInst('abs', 'Abs', 'SimdAluOp', signedTypes, absCode, + PredType.MERGE) + # ADD (immediate) + sveWideImmInst('add', 'AddImm', 'SimdAddOp', unsignedTypes, addCode, False) + # ADD (vectors, predicated) + addCode = 'destElem = srcElem1 + srcElem2;' + sveBinInst('add', 'AddPred', 'SimdAddOp', unsignedTypes, addCode, + PredType.MERGE, True) + # ADD (vectors, unpredicated) + addCode = 'destElem = srcElem1 + srcElem2;' + sveBinInst('add', 'AddUnpred', 'SimdAddOp', unsignedTypes, addCode) + # ADDPL + addvlCode = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<uint%d_t>( + xc->tcBase()); + XDest = XOp1 + eCount * (int64_t) imm; + ''' + buildXImmDataInst('addpl', addvlCode % 64, buildCc=False) + # ADDVL + buildXImmDataInst('addvl', addvlCode % 8, buildCc=False) + # ADR + adrCode = ''' + if (offsetFormat == SveAdrOffsetUnpackedSigned) { + srcElem2 = sext<32>(srcElem2 & mask(32)); + } else if (offsetFormat == SveAdrOffsetUnpackedUnsigned) { + srcElem2 = srcElem2 & mask(32); + } + destElem = srcElem1 + srcElem2 * mult; + ''' + sveAdrInst('adr', 'Adr', 'SimdAddOp', ('uint32_t', 'uint64_t'), adrCode) + # AND (immediate) + andCode = 'destElem = srcElem1 & srcElem2;' + sveWideImmInst('and', 'AndImm', 'SimdAluOp', ('uint64_t',), andCode) + # AND (vectors, predicated) + sveBinInst('and', 'AndPred', 'SimdAluOp', unsignedTypes, andCode, + PredType.MERGE, True) + # AND (vectors, unpredicated) + andCode = 'destElem = srcElem1 & srcElem2;' + sveBinInst('and', 'AndUnpred', 'SimdAluOp', ('uint64_t',), andCode) + # AND, ANDS (predicates) + svePredLogicalInst('and', 'PredAnd', 'SimdPredAluOp', ('uint8_t',), + andCode) + svePredLogicalInst('ands', 'PredAnds', 'SimdPredAluOp', ('uint8_t',), + andCode, isFlagSetting=True) + # ANDV + andvCode = 'destElem &= srcElem1;' + sveAssocReducInst('andv', 'Andv', 'SimdReduceAluOp', unsignedTypes, + andvCode, 'std::numeric_limits<Element>::max()') + # ASR (immediate, predicated) + asrCode = ''' + int sign_bit = bits(srcElem1, sizeof(Element) * 8 - 1); + if (srcElem2 == 0) { + destElem = srcElem1; + } else if (srcElem2 >= sizeof(Element) * 8) { + destElem = sign_bit ? std::numeric_limits<Element>::max() : 0; + } else { + destElem = srcElem1 >> srcElem2; + if (sign_bit) { + destElem |= ~mask(sizeof(Element) * 8 - srcElem2); + } + } + ''' + sveBinImmInst('asr', 'AsrImmPred', 'SimdAluOp', unsignedTypes, asrCode, + PredType.MERGE) + # ASR (immediate, unpredicated) + sveBinImmInst('asr', 'AsrImmUnpred', 'SimdAluOp', unsignedTypes, asrCode) + # ASR (vectors) + sveBinInst('asr', 'AsrPred', 'SimdAluOp', unsignedTypes, asrCode, + PredType.MERGE, True) + # ASR (wide elements, predicated) + sveShiftByWideElemsInst('asr', 'AsrWidePred', 'SimdAluOp', unsignedTypes, + asrCode, PredType.MERGE) + # ASR (wide elements, unpredicated) + sveShiftByWideElemsInst('asr', 'AsrWideUnpred', 'SimdAluOp', unsignedTypes, + asrCode) + # ASRD + asrdCode = ''' + Element element1 = srcElem1; + Element shift = srcElem2; + if (srcElem1 < 0) { + Element tmp = ((1L << shift) - 1L); + if (tmp == -1L) { + element1 = 0; + } else { + element1 = element1 + tmp; + } + } + destElem = (element1 >> shift); + ''' + sveBinImmInst('asrd', 'Asrd', 'SimdAluOp', signedTypes, asrdCode, + PredType.MERGE) + # ASRR + asrrCode = ''' + int sign_bit = bits(srcElem2, sizeof(Element) * 8 - 1); + if (srcElem1 == 0) { + destElem = srcElem2; + } else if (srcElem1 >= sizeof(Element) * 8) { + destElem = sign_bit ? std::numeric_limits<Element>::max() : 0; + } else { + destElem = srcElem2 >> srcElem1; + if (sign_bit) { + destElem |= ~mask(sizeof(Element) * 8 - srcElem1); + } + } + ''' + sveBinInst('asrr', 'Asrr', 'SimdAluOp', unsignedTypes, asrrCode, + PredType.MERGE, True) + # BIC (vectors, predicated) + bicCode = 'destElem = srcElem1 & ~srcElem2;' + sveBinInst('bic', 'BicPred', 'SimdAluOp', unsignedTypes, bicCode, + PredType.MERGE, True) + # BIC (vectors, unpredicated) + bicCode = 'destElem = srcElem1 & ~srcElem2;' + sveBinInst('bic', 'BicUnpred', 'SimdAluOp', unsignedTypes, bicCode) + # BIC, BICS (predicates) + svePredLogicalInst('bic', 'PredBic', 'SimdPredAluOp', ('uint8_t',), + bicCode) + svePredLogicalInst('bics', 'PredBics', 'SimdPredAluOp', ('uint8_t',), + bicCode, isFlagSetting=True) + # BRKA (merging) + svePartBrkInst('brka', 'Brkam', 'SimdPredAluOp', isFlagSetting = False, + predType = PredType.MERGE, whenBrk = Break.After) + # BRKA (zeroing) + svePartBrkInst('brka', 'Brkaz', 'SimdPredAluOp', isFlagSetting = False, + predType = PredType.ZERO, whenBrk = Break.After) + # BRKAS + svePartBrkInst('brkas', 'Brkas', 'SimdPredAluOp', isFlagSetting = True, + predType = PredType.ZERO, whenBrk = Break.After) + # BRKB (merging) + svePartBrkInst('brkb', 'Brkbm', 'SimdPredAluOp', isFlagSetting = False, + predType = PredType.MERGE, whenBrk = Break.Before) + # BRKB (zeroging) + svePartBrkInst('brkb', 'Brkbz', 'SimdPredAluOp', isFlagSetting = False, + predType = PredType.ZERO, whenBrk = Break.Before) + # BRKBS + svePartBrkInst('brkbs', 'Brkbs', 'SimdPredAluOp', isFlagSetting = True, + predType = PredType.ZERO, whenBrk = Break.Before) + # BRKN + svePartBrkPropNextInst('brkn', 'Brkn', 'SimdPredAluOp', + isFlagSetting = False) + # BRKNS + svePartBrkPropNextInst('brkns', 'Brkns', 'SimdPredAluOp', + isFlagSetting = True) + # BRKPA + svePartBrkPropPrevInst('brkpa', 'Brkpa', 'SimdPredAluOp', + isFlagSetting = False, whenBrk = Break.After) + # BRKPAS + svePartBrkPropPrevInst('brkpas', 'Brkpas', 'SimdPredAluOp', + isFlagSetting = True, whenBrk = Break.After) + # BRKPB + svePartBrkPropPrevInst('brkpb', 'Brkpb', 'SimdPredAluOp', + isFlagSetting = False, whenBrk = Break.Before) + # BRKPBS + svePartBrkPropPrevInst('brkpbs', 'Brkpbs', 'SimdPredAluOp', + isFlagSetting = True, whenBrk = Break.Before) + # CLASTA (scalar) + clastaCode = ''' + last++; + if (last >= eCount) + last = 0; + destElem = AA64FpOp1_x[last];''' + sveSelectInst('clasta', 'Clasta', 'SimdAluOp', unsignedTypes, clastaCode, + isCond = True, destType = DstRegType.Scalar) + # CLASTA (SIMD&FP scalar) + sveSelectInst('clasta', 'Clastaf', 'SimdAluOp', unsignedTypes, clastaCode, + isCond = True, destType = DstRegType.SimdFpScalar) + # CLASTA (vector) + sveSelectInst('clasta', 'Clastav', 'SimdAluOp', unsignedTypes, clastaCode, + isCond = True, destType = DstRegType.Vector) + # CLASTB (scalar) + clastbCode = ''' + destElem = AA64FpOp1_x[last];''' + sveSelectInst('clastb', 'Clastb', 'SimdAluOp', unsignedTypes, clastbCode, + isCond = True, destType = DstRegType.Scalar) + # CLASTB (SIMD&FP scalar) + sveSelectInst('clastb', 'Clastbf', 'SimdAluOp', unsignedTypes, clastbCode, + isCond = True, destType = DstRegType.SimdFpScalar) + # CLASTB (vectors) + sveSelectInst('clastb', 'Clastbv', 'SimdAluOp', unsignedTypes, clastbCode, + isCond = True, destType = DstRegType.Vector) + # CLS + clsCode = ''' + destElem = 0; + Element val = srcElem1; + if (val < 0) { + val <<= 1; + while (val < 0) { + destElem++; + val <<= 1; + } + } else { + val <<= 1; + while (val >= 0 && destElem < sizeof(Element) * 8 - 1) { + destElem++; + val <<= 1; + } + } + ''' + sveUnaryInst('cls', 'Cls', 'SimdAluOp', signedTypes, clsCode, + PredType.MERGE) + # CLZ + clzCode = ''' + destElem = 0; + Element val = srcElem1; + while (val >= 0 && destElem < sizeof(Element) * 8) { + destElem++; + val <<= 1; + } + ''' + sveUnaryInst('clz', 'Clz', 'SimdAluOp', signedTypes, clzCode, + PredType.MERGE) + # CMPEQ (immediate) + cmpeqCode = ''' + destElem = (srcElem1 == srcElem2); + ''' + sveIntCmpImmInst('cmpeq', 'Cmpeqi', 'SimdCmpOp', unsignedTypes, cmpeqCode) + # CMPEQ (vectors) + sveIntCmpInst('cmpeq', 'Cmpeq', 'SimdCmpOp', unsignedTypes, cmpeqCode) + # CMPEQ (wide elements) + sveIntCmpInst('cmpeq', 'Cmpeqw', 'SimdCmpOp', smallUnsignedTypes, + cmpeqCode, True) + # CMPGE (immediate) + cmpgeCode = ''' + destElem = (srcElem1 >= srcElem2); + ''' + sveIntCmpImmInst('cmpge', 'Cmpgei', 'SimdCmpOp', signedTypes, cmpgeCode) + # CMPGE (vectors) + sveIntCmpInst('cmpge', 'Cmpge', 'SimdCmpOp', signedTypes, cmpgeCode) + # CMPGE (wide elements) + sveIntCmpInst('cmpge', 'Cmpgew', 'SimdCmpOp', smallSignedTypes, + cmpgeCode, True) + # CMPGT (immediate) + cmpgtCode = ''' + destElem = (srcElem1 > srcElem2); + ''' + sveIntCmpImmInst('cmpge', 'Cmpgti', 'SimdCmpOp', signedTypes, cmpgtCode) + # CMPGT (vectors) + sveIntCmpInst('cmpge', 'Cmpgt', 'SimdCmpOp', signedTypes, cmpgtCode) + # CMPGT (wide elements) + sveIntCmpInst('cmpge', 'Cmpgtw', 'SimdCmpOp', smallSignedTypes, + cmpgtCode, True) + # CMPHI (immediate) + sveIntCmpImmInst('cmphi', 'Cmphii', 'SimdCmpOp', unsignedTypes, cmpgtCode) + # CMPHI (vectors) + sveIntCmpInst('cmphi', 'Cmphi', 'SimdCmpOp', unsignedTypes, cmpgtCode) + # CMPHI (wide elements) + sveIntCmpInst('cmphi', 'Cmphiw', 'SimdCmpOp', smallUnsignedTypes, + cmpgtCode, True) + # CMPHS (immediate) + sveIntCmpImmInst('cmphs', 'Cmphsi', 'SimdCmpOp', unsignedTypes, cmpgeCode) + # CMPHS (vectors) + sveIntCmpInst('cmphs', 'Cmphs', 'SimdCmpOp', unsignedTypes, cmpgeCode) + # CMPHS (wide elements) + sveIntCmpInst('cmphs', 'Cmphsw', 'SimdCmpOp', smallUnsignedTypes, + cmpgeCode, True) + # CMPLE (immediate) + cmpleCode = ''' + destElem = (srcElem1 <= srcElem2); + ''' + sveIntCmpImmInst('cmple', 'Cmplei', 'SimdCmpOp', signedTypes, cmpleCode) + # CMPLE (wide elements) + sveIntCmpInst('cmple', 'Cmplew', 'SimdCmpOp', smallSignedTypes, + cmpleCode, True) + # CMPLO (immediate) + cmpltCode = ''' + destElem = (srcElem1 < srcElem2); + ''' + sveIntCmpImmInst('cmplo', 'Cmploi', 'SimdCmpOp', unsignedTypes, cmpltCode) + # CMPLO (wide elements) + sveIntCmpInst('cmplo', 'Cmplow', 'SimdCmpOp', smallUnsignedTypes, + cmpltCode, True) + # CMPLS (immediate) + sveIntCmpImmInst('cmpls', 'Cmplsi', 'SimdCmpOp', unsignedTypes, cmpleCode) + # CMPLS (wide elements) + sveIntCmpInst('cmpls', 'Cmplsw', 'SimdCmpOp', smallUnsignedTypes, + cmpleCode, True) + # CMPLT (immediate) + sveIntCmpImmInst('cmplt', 'Cmplti', 'SimdCmpOp', signedTypes, cmpltCode) + # CMPLT (wide elements) + sveIntCmpInst('cmplt', 'Cmpltw', 'SimdCmpOp', smallSignedTypes, + cmpltCode, True) + # CMPNE (immediate) + cmpneCode = ''' + destElem = (srcElem1 != srcElem2); + ''' + sveIntCmpImmInst('cmpeq', 'Cmpnei', 'SimdCmpOp', unsignedTypes, cmpneCode) + # CMPNE (vectors) + sveIntCmpInst('cmpeq', 'Cmpne', 'SimdCmpOp', unsignedTypes, cmpneCode) + # CMPNE (wide elements) + sveIntCmpInst('cmpeq', 'Cmpnew', 'SimdCmpOp', smallUnsignedTypes, + cmpneCode, True) + # CNOT + cnotCode = ''' + destElem = srcElem1?0:1; + ''' + sveUnaryInst('cnot', 'Cnot', 'SimdAluOp', unsignedTypes, cnotCode, + PredType.MERGE) + # CNT + cntCode = ''' + destElem = 0; + Element val = srcElem1; + while (val) { + destElem += val & 0x1; + val >>= 1; + } + ''' + sveUnaryInst('cnt', 'Cnt', 'SimdAluOp', unsignedTypes, cntCode, + PredType.MERGE) + # CNTB, CNTD, CNTH, CNTW + cntxCode = ''' + destElem = (count * imm); + ''' + sveElemCountInst('cnt', 'Cntx', 'SimdAluOp', unsignedTypes, cntxCode, + destType = DestType.Scalar, dstIs32b = False, dstAcc = False) + # COMPACT + sveCompactInst('compact', 'Compact', 'SimdPredAluOp', + ('uint32_t', 'uint64_t')) + # CPY (immediate) + dupCode = 'destElem = srcElem1;' + sveWideImmInst('cpy', 'CpyImmMerge', 'SimdAluOp', unsignedTypes, dupCode, + predType=PredType.MERGE, isUnary=True) + sveWideImmInst('cpy', 'CpyImmZero', 'SimdAluOp', unsignedTypes, dupCode, + predType=PredType.ZERO, isUnary=True) + # CPY (scalar) + sveUnaryInst('cpy', 'CpyScalar', 'SimdAluOp', unsignedTypes, dupCode, + PredType.MERGE, srcRegType=SrcRegType.Scalar) + # CPY (SIMD&FP scalar) + sveUnaryInst('cpy', 'CpySimdFpScalar', 'SimdAluOp', unsignedTypes, dupCode, + PredType.MERGE, srcRegType=SrcRegType.SimdFpScalar) + # CNTP + svePredCountPredInst('cntp', 'Cntp', 'SimdAluOp', unsignedTypes) + # CTERMEQ + cteqCode = ''' + destElem = srcElem1 == srcElem2; + ''' + sveCompTermInst('ctermeq', 'Ctermeq', 'IntAluOp', + ['uint32_t', 'uint64_t'], cteqCode) + # CTERMNE + ctneCode = ''' + destElem = srcElem1 != srcElem2; + ''' + sveCompTermInst('ctermne', 'Ctermne', 'IntAluOp', + ['uint32_t', 'uint64_t'], ctneCode) + # DECB, DECH, DECW, DECD (scalar) + decxCode = ''' + destElem = srcElem1 - (count * imm); + ''' + sveElemCountInst('dec', 'Dec', 'SimdAluOp', unsignedTypes, decxCode, + destType = DestType.Scalar, dstIs32b = False) + # DECH, DECW, DECD (vector) + sveElemCountInst('dec', 'Decv', 'SimdAluOp', bigUnsignedTypes, decxCode, + destType = DestType.Vector, dstIs32b = False) + # DECP (scalar) + decpCode = ''' + XDest = XDest - count; + ''' + svePredCountInst('decp', 'Decp', 'SimdAluOp', unsignedTypes, decpCode, + DestType.Scalar, SrcSize.Src64bit) + # DECP (vector) + decpvCode = ''' + destElem = srcElem - count; + ''' + svePredCountInst('decp', 'Decpv', 'SimdAluOp', unsignedTypes, decpvCode, + DestType.Vector) + # DUP (immediate) + sveWideImmInst('dup', 'DupImm', 'SimdAluOp', unsignedTypes, dupCode, + isUnary=True) + # DUP (indexed) + sveDupIndexInst('mov', 'DupIdx', 'SimdAluOp', + list(unsignedTypes) + ['__uint128_t']) + # DUP (scalar) + sveUnaryInst('dup', 'DupScalar', 'SimdAluOp', unsignedTypes, dupCode, + PredType.NONE, srcRegType=SrcRegType.Scalar) + # DUPM + sveWideImmInst('dupm', 'Dupm', 'SimdAluOp', unsignedTypes, dupCode, + isUnary=True) + # EOR (immediate) + eorCode = 'destElem = srcElem1 ^ srcElem2;' + sveWideImmInst('eor', 'EorImm', 'SimdAluOp', ('uint64_t',), eorCode) + # EOR (vectors, predicated) + sveBinInst('eor', 'EorPred', 'SimdAluOp', unsignedTypes, eorCode, + PredType.MERGE, True) + # EOR (vectors, unpredicated) + eorCode = 'destElem = srcElem1 ^ srcElem2;' + sveBinInst('eor', 'EorUnpred', 'SimdAluOp', ('uint64_t',), eorCode) + # EOR, EORS (predicates) + svePredLogicalInst('eor', 'PredEor', 'SimdPredAluOp', ('uint8_t',), + eorCode) + svePredLogicalInst('eors', 'PredEors', 'SimdPredAluOp', ('uint8_t',), + eorCode, isFlagSetting=True) + # EORV + eorvCode = 'destElem ^= srcElem1;' + sveAssocReducInst('eorv', 'Eorv', 'SimdReduceAluOp', unsignedTypes, + eorvCode, '0') + # EXT + sveExtInst('ext', 'Ext', 'SimdAluOp') + # FABD + fpOp = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destElem = %s; + FpscrExc = fpscr; + ''' + fabdCode = fpOp % 'fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))' + sveBinInst('fabd', 'Fabd', 'SimdFloatAddOp', floatTypes, fabdCode, + PredType.MERGE, True) + # FABS + fabsCode = 'destElem = fplibAbs<Element>(srcElem1);' + sveUnaryInst('fabs', 'Fabs', 'SimdFloatAluOp', fpTypes, fabsCode, + PredType.MERGE) + # FACGE + fpCmpAbsOp = fpOp % ('fplibCompare%s<Element>(fplibAbs<Element>(srcElem1),' + ' fplibAbs<Element>(srcElem2), fpscr)') + facgeCode = fpCmpAbsOp % 'GE' + sveCmpInst('facge', 'Facge', 'SimdFloatCmpOp', fpTypes, facgeCode) + # FACGT + facgtCode = fpCmpAbsOp % 'GT' + sveCmpInst('facgt', 'Facgt', 'SimdFloatCmpOp', fpTypes, facgtCode) + # FADD (immediate) + fpBinOp = fpOp % 'fplib%s<Element>(srcElem1, srcElem2, fpscr)' + faddCode = fpBinOp % 'Add' + sveBinImmInst('fadd', 'FaddImm', 'SimdFloatAddOp', floatTypes, faddCode, + PredType.MERGE) + # FADD (vectors, predicated) + sveBinInst('fadd', 'FaddPred', 'SimdFloatAddOp', floatTypes, faddCode, + PredType.MERGE, True) + # FADD (vectors, unpredicated) + sveBinInst('fadd', 'FaddUnpred', 'SimdFloatAddOp', floatTypes, faddCode) + # FADDA + fpAddaOp = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destElem = fplibAdd<Element>(destElem, srcElem1, fpscr); + FpscrExc = FpscrExc | fpscr; + ''' + sveOrderedReduction('fadda', 'Fadda', 'SimdFloatReduceAddOp', floatTypes, + fpAddaOp) + # FADDV + fpReduceOp = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr); + FpscrExc = FpscrExc | fpscr; + ''' + faddvCode = fpReduceOp % 'Add' + sveNonAssocReducInst('faddv', 'Faddv', 'SimdFloatReduceAddOp', floatTypes, + faddvCode, '0') + # FCADD + sveComplexAddInst('fcadd','Fcadd', 'SimdFloatAddOp', fpTypes) + # FCMEQ (vectors) + fpCmpOp = fpOp % ('fplibCompare%s<Element>(srcElem1, srcElem2, fpscr)') + fcmeqCode = fpCmpOp % 'EQ' + sveCmpInst('fcmeq', 'Fcmeq', 'SimdFloatCmpOp', fpTypes, fcmeqCode) + # FCMEQ (zero) + fpCmpZeroOp = fpOp % 'fplibCompare%s<Element>(srcElem1, 0, fpscr)' + fcmeqZeroCode = fpCmpZeroOp % 'EQ' + sveCmpInst('fcmeq', 'FcmeqZero', 'SimdFloatCmpOp', fpTypes, fcmeqZeroCode, + True) + # FCMGE (vectors) + fcmgeCode = fpCmpOp % 'GE' + sveCmpInst('fcmge', 'Fcmge', 'SimdFloatCmpOp', fpTypes, fcmgeCode) + # FCMGE (zero) + fcmgeZeroCode = fpCmpZeroOp % 'GE' + sveCmpInst('fcmge', 'FcmgeZero', 'SimdFloatCmpOp', fpTypes, fcmgeZeroCode, + True) + # FCMGT (vectors) + fcmgtCode = fpCmpOp % 'GT' + sveCmpInst('fcmgt', 'Fcmgt', 'SimdFloatCmpOp', fpTypes, fcmgtCode) + # FCMGT (zero) + fcmgtZeroCode = fpCmpZeroOp % 'GT' + sveCmpInst('fcmgt', 'FcmgtZero', 'SimdFloatCmpOp', fpTypes, fcmgtZeroCode, + True) + # FCMLE (zero) + fpCmpRevZeroOp = fpOp % ('fplibCompare%s<Element>(0, srcElem1, fpscr)') + fcmleZeroCode = fpCmpRevZeroOp % 'GE' + sveCmpInst('fcmle', 'FcmleZero', 'SimdFloatCmpOp', fpTypes, fcmleZeroCode, + True) + # FCMLT (zero) + fcmltZeroCode = fpCmpRevZeroOp % 'GT' + sveCmpInst('fcmlt', 'FcmltZero', 'SimdFloatCmpOp', fpTypes, fcmltZeroCode, + True) + # FCMNE (vectors) + fcmneCode = fpOp % ('!fplibCompareEQ<Element>(srcElem1, srcElem2, fpscr)') + sveCmpInst('fcmne', 'Fcmne', 'SimdFloatCmpOp', fpTypes, fcmneCode) + # FCMNE (zero) + fcmneZeroCode = fpOp % ('!fplibCompareEQ<Element>(srcElem1, 0, fpscr)') + sveCmpInst('fcmne', 'FcmneZero', 'SimdFloatCmpOp', fpTypes, fcmneZeroCode, + True) + # FCMUO (vectors) + fcmuoCode = fpCmpOp % 'UN' + sveCmpInst('fcmuo', 'Fcmuo', 'SimdFloatCmpOp', fpTypes, fcmuoCode) + # FCMLA (indexed) + sveComplexMulAddInst('fcmla', 'Fcmlai', 'SimdFloatMultAccOp', + fpTypes[1:], predType = PredType.NONE) + # FCMLA (vectors) + sveComplexMulAddInst('fcmla', 'Fcmlav', 'SimdFloatMultAccOp', + fpTypes, predType = PredType.MERGE) + # FCPY + sveWideImmInst('fcpy', 'Fcpy', 'SimdAluOp', unsignedTypes, dupCode, + predType=PredType.MERGE, isUnary=True) + # FCVT + fcvtCode = fpOp % ('fplibConvert<SElement, DElement>(' + 'srcElem1, FPCRRounding(fpscr), fpscr)') + sveCvtInst('fcvt', 'FcvtNarrow', 'SimdCvtOp', + ('uint32_t, uint16_t', + 'uint64_t, uint16_t', + 'uint64_t, uint32_t'), + fcvtCode, CvtDir.Narrow) + sveCvtInst('fcvt', 'FcvtWiden', 'SimdCvtOp', + ('uint16_t, uint32_t', + 'uint16_t, uint64_t', + 'uint32_t, uint64_t'), + fcvtCode, CvtDir.Widen) + # FCVTZS + fcvtIntCode = fpOp % ('fplibFPToFixed<SElement, DElement>(' + 'srcElem1, %s, %s, %s, fpscr)') + fcvtzsCode = fcvtIntCode % ('0', 'false', 'FPRounding_ZERO') + sveCvtInst('fcvtzs', 'FcvtzsNarrow', 'SimdCvtOp', + ('uint16_t, uint16_t', + 'uint32_t, uint32_t', + 'uint64_t, uint32_t', + 'uint64_t, uint64_t'), + fcvtzsCode, CvtDir.Narrow) + sveCvtInst('fcvtzs', 'FcvtzsWiden', 'SimdCvtOp', + ('uint16_t, uint32_t', + 'uint16_t, uint64_t', + 'uint32_t, uint64_t'), + fcvtzsCode, CvtDir.Widen) + # FCVTZU + fcvtzuCode = fcvtIntCode % ('0', 'true', 'FPRounding_ZERO') + sveCvtInst('fcvtzu', 'FcvtzuNarrow', 'SimdCvtOp', + ('uint16_t, uint16_t', + 'uint32_t, uint32_t', + 'uint64_t, uint32_t', + 'uint64_t, uint64_t'), + fcvtzuCode, CvtDir.Narrow) + sveCvtInst('fcvtzu', 'FcvtzuWiden', 'SimdCvtOp', + ('uint16_t, uint32_t', + 'uint16_t, uint64_t', + 'uint32_t, uint64_t'), + fcvtzuCode, CvtDir.Widen) + # FDIV + fdivCode = fpBinOp % 'Div' + sveBinInst('fdiv', 'Fdiv', 'SimdFloatDivOp', floatTypes, fdivCode, + PredType.MERGE, True) + # FDIVR + fpBinRevOp = fpOp % 'fplib%s<Element>(srcElem2, srcElem1, fpscr)' + fdivrCode = fpBinRevOp % 'Div' + sveBinInst('fdivr', 'Fdivr', 'SimdFloatDivOp', floatTypes, fdivrCode, + PredType.MERGE, True) + # FDUP + sveWideImmInst('fdup', 'Fdup', 'SimdFloatAluOp', floatTypes, dupCode, + isUnary=True) + # FEXPA + fexpaCode = 'destElem = fplibExpA<Element>(srcElem1);' + sveUnaryInst('fexpa', 'Fexpa', 'SimdFloatAluOp', fpTypes, fexpaCode) + # FMAD + fmadCode = fpOp % ('fplibMulAdd<Element>(' + 'srcElem1, destElem, srcElem2, fpscr)') + sveTerInst('fmad', 'Fmad', 'SimdFloatMultAccOp', floatTypes, fmadCode, + PredType.MERGE) + # FMAX (immediate) + fmaxCode = fpBinOp % 'Max' + sveBinImmInst('fmax', 'FmaxImm', 'SimdFloatCmpOp', floatTypes, fmaxCode, + PredType.MERGE) + # FMAX (vectors) + sveBinInst('fmax', 'Fmax', 'SimdFloatCmpOp', floatTypes, fmaxCode, + PredType.MERGE, True) + # FMAXNM (immediate) + fmaxnmCode = fpBinOp % 'MaxNum' + sveBinImmInst('fmaxnm', 'FmaxnmImm', 'SimdFloatCmpOp', floatTypes, + fmaxnmCode, PredType.MERGE) + # FMAXNM (vectors) + sveBinInst('fmaxnm', 'Fmaxnm', 'SimdFloatCmpOp', floatTypes, fmaxnmCode, + PredType.MERGE, True) + # FMAXNMV + fmaxnmvCode = fpReduceOp % 'MaxNum' + sveNonAssocReducInst('fmaxnmv', 'Fmaxnmv', 'SimdFloatReduceCmpOp', + floatTypes, fmaxnmvCode, 'fplibDefaultNaN<Element>()') + # FMAXV + fmaxvCode = fpReduceOp % 'Max' + sveNonAssocReducInst('fmaxv', 'Fmaxv', 'SimdFloatReduceCmpOp', floatTypes, + fmaxvCode, 'fplibInfinity<Element>(1)') + # FMIN (immediate) + fminCode = fpBinOp % 'Min' + sveBinImmInst('fmin', 'FminImm', 'SimdFloatCmpOp', floatTypes, fminCode, + PredType.MERGE) + # FMIN (vectors) + sveBinInst('fmin', 'Fmin', 'SimdFloatCmpOp', floatTypes, fminCode, + PredType.MERGE, True) + # FMINNM (immediate) + fminnmCode = fpBinOp % 'MinNum' + sveBinImmInst('fminnm', 'FminnmImm', 'SimdFloatCmpOp', floatTypes, + fminnmCode, PredType.MERGE) + # FMINNM (vectors) + sveBinInst('fminnm', 'Fminnm', 'SimdFloatCmpOp', floatTypes, fminnmCode, + PredType.MERGE, True) + # FMINNMV + fminnmvCode = fpReduceOp % 'MinNum' + sveNonAssocReducInst('fminnmv', 'Fminnmv', 'SimdFloatReduceCmpOp', + floatTypes, fminnmvCode, 'fplibDefaultNaN<Element>()') + # FMINV + fminvCode = fpReduceOp % 'Min' + sveNonAssocReducInst('fminv', 'Fminv', 'SimdFloatReduceCmpOp', floatTypes, + fminvCode, 'fplibInfinity<Element>(0)') + fmlaCode = fpOp % ('fplibMulAdd<Element>(' + 'destElem, srcElem1, srcElem2, fpscr)') + # FMLA (indexed) + sveTerIdxInst('fmla', 'FmlaIdx', 'SimdFloatMultAccOp', floatTypes, + fmlaCode, PredType.MERGE) + # FMLA (vectors) + sveTerInst('fmla', 'Fmla', 'SimdFloatMultAccOp', floatTypes, fmlaCode, + PredType.MERGE) + fmlsCode = fpOp % ('fplibMulAdd<Element>(destElem, ' + 'fplibNeg<Element>(srcElem1), srcElem2, fpscr)') + # FMLS (indexed) + sveTerIdxInst('fmls', 'FmlsIdx', 'SimdFloatMultAccOp', floatTypes, + fmlsCode, PredType.MERGE) + # FMLS (vectors) + sveTerInst('fmls', 'Fmls', 'SimdFloatMultAccOp', floatTypes, fmlsCode, + PredType.MERGE) + # FMSB + fmsbCode = fpOp % ('fplibMulAdd<Element>(srcElem1, ' + 'fplibNeg<Element>(destElem), srcElem2, fpscr)') + sveTerInst('fmsb', 'Fmsb', 'SimdFloatMultAccOp', floatTypes, fmsbCode, + PredType.MERGE) + # FMUL (immediate) + fpBinOp = fpOp % 'fplib%s<Element>(srcElem1, srcElem2, fpscr)' + fmulCode = fpBinOp % 'Mul' + sveBinImmInst('fmul', 'FmulImm', 'SimdFloatMultOp', floatTypes, fmulCode, + PredType.MERGE) + # TODO: FMUL (indexed) + # FMUL (vectors, predicated) + fmulCode = fpBinOp % 'Mul' + sveBinInst('fmul', 'FmulPred', 'SimdFloatMultOp', floatTypes, fmulCode, + PredType.MERGE, True) + # FMUL (vectors, unpredicated) + sveBinInst('fmul', 'FmulUnpred', 'SimdFloatMultOp', floatTypes, fmulCode) + # FMUL (indexed) + sveBinIdxInst('fmul', 'FmulIdx', 'SimdFloatMultOp', floatTypes, fmulCode) + + # FMULX + fmulxCode = fpBinOp % 'MulX' + sveBinInst('fmulx', 'Fmulx', 'SimdFloatMultOp', floatTypes, fmulxCode, + PredType.MERGE, True) + # FNEG + fnegCode = 'destElem = fplibNeg<Element>(srcElem1);' + sveUnaryInst('fneg', 'Fneg', 'SimdFloatAluOp', fpTypes, fnegCode, + PredType.MERGE) + # FNMAD + fnmadCode = fpOp % ('fplibMulAdd<Element>(' + 'fplibNeg<Element>(srcElem1), ' + 'fplibNeg<Element>(destElem), srcElem2, fpscr)') + sveTerInst('fnmad', 'Fnmad', 'SimdFloatMultAccOp', floatTypes, fnmadCode, + PredType.MERGE) + # FNMLA + fnmlaCode = fpOp % ('fplibMulAdd<Element>(' + 'fplibNeg<Element>(destElem), ' + 'fplibNeg<Element>(srcElem1), srcElem2, fpscr)') + sveTerInst('fnmla', 'Fnmla', 'SimdFloatMultAccOp', floatTypes, fnmlaCode, + PredType.MERGE) + # FNMLS + fnmlsCode = fpOp % ('fplibMulAdd<Element>(' + 'fplibNeg<Element>(destElem), srcElem1, srcElem2, ' + 'fpscr)') + sveTerInst('fnmls', 'Fnmls', 'SimdFloatMultAccOp', floatTypes, fnmlsCode, + PredType.MERGE) + # FNMSB + fnmsbCode = fpOp % ('fplibMulAdd<Element>(' + 'fplibNeg<Element>(srcElem1), destElem, srcElem2, ' + 'fpscr)') + sveTerInst('fnmsb', 'Fnmsb', 'SimdFloatMultAccOp', floatTypes, fnmsbCode, + PredType.MERGE) + # FRECPE + frecpeCode = fpOp % 'fplibRecipEstimate<Element>(srcElem1, fpscr)' + sveUnaryInst('frecpe', 'Frecpe', 'SimdFloatMultAccOp', floatTypes, + frecpeCode) + # FRECPS + frecpsCode = fpBinOp % 'RecipStepFused' + sveBinInst('frecps', 'Frecps', 'SimdFloatMultAccOp', floatTypes, + frecpsCode) + # FRECPX + frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)" + sveUnaryInst('frecpx', 'Frecpx', 'SimdFloatMultAccOp', floatTypes, + frecpxCode, PredType.MERGE) + # FRINTA + frintCode = fpOp % 'fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)' + frintaCode = frintCode % ('FPRounding_TIEAWAY', 'false') + sveUnaryInst('frinta', 'Frinta', 'SimdCvtOp', floatTypes, frintaCode, + PredType.MERGE) + # FRINTI + frintiCode = frintCode % ('FPCRRounding(fpscr)', 'false') + sveUnaryInst('frinti', 'Frinti', 'SimdCvtOp', floatTypes, frintiCode, + PredType.MERGE) + # FRINTM + frintmCode = frintCode % ('FPRounding_NEGINF', 'false') + sveUnaryInst('frintm', 'Frintm', 'SimdCvtOp', floatTypes, frintmCode, + PredType.MERGE) + # FRINTN + frintnCode = frintCode % ('FPRounding_TIEEVEN', 'false') + sveUnaryInst('frintn', 'Frintn', 'SimdCvtOp', floatTypes, frintnCode, + PredType.MERGE) + # FRINTP + frintpCode = frintCode % ('FPRounding_POSINF', 'false') + sveUnaryInst('frintp', 'Frintp', 'SimdCvtOp', floatTypes, frintpCode, + PredType.MERGE) + # FRINTX + frintxCode = frintCode % ('FPCRRounding(fpscr)', 'true') + sveUnaryInst('frintx', 'Frintx', 'SimdCvtOp', floatTypes, frintxCode, + PredType.MERGE) + # FRINTZ + frintzCode = frintCode % ('FPRounding_ZERO', 'false') + sveUnaryInst('frintz', 'Frintz', 'SimdCvtOp', floatTypes, frintzCode, + PredType.MERGE) + # FRSQRTE + frsqrteCode = fpOp % 'fplibRSqrtEstimate<Element>(srcElem1, fpscr)' + sveUnaryInst('frsqrte', 'Frsqrte', 'SimdFloatSqrtOp', floatTypes, + frsqrteCode) + # FRSQRTS + frsqrtsCode = fpBinOp % 'RSqrtStepFused' + sveBinInst('frsqrts', 'Frsqrts', 'SimdFloatMiscOp', floatTypes, + frsqrtsCode) + # FSCALE + fscaleCode = fpBinOp % 'Scale' + sveBinInst('fscale', 'Fscale', 'SimdFloatMiscOp', floatTypes, fscaleCode, + PredType.MERGE, True) + # FSQRT + fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)" + sveUnaryInst('fsqrt', 'Fsqrt', 'SimdFloatSqrtOp', floatTypes, fsqrtCode, + PredType.MERGE) + # FSUB (immediate) + fsubCode = fpBinOp % 'Sub' + sveBinImmInst('fsub', 'FsubImm', 'SimdFloatAddOp', floatTypes, fsubCode, + PredType.MERGE) + # FSUB (vectors, predicated) + sveBinInst('fsub', 'FsubPred', 'SimdFloatAddOp', floatTypes, fsubCode, + PredType.MERGE, True) + # FSUB (vectors, unpredicated) + sveBinInst('fsub', 'FsubUnpred', 'SimdFloatAddOp', floatTypes, fsubCode) + # FSUBR (immediate) + fsubrCode = fpBinRevOp % 'Sub' + sveBinImmInst('fsubr', 'FsubrImm', 'SimdFloatAddOp', floatTypes, fsubrCode, + PredType.MERGE) + # FSUBR (vectors) + sveBinInst('fsubr', 'Fsubr', 'SimdFloatAddOp', floatTypes, fsubrCode, + PredType.MERGE, True) + # FTMAD + ftmadCode = fpOp % ('fplibTrigMulAdd<Element>(' + 'srcElem3, destElem, srcElem2, fpscr)') + sveTerImmInst('ftmad', 'Ftmad', 'SimdFloatMultAccOp', floatTypes, + ftmadCode) + # FTSMUL + ftsmulCode = fpBinOp % 'TrigSMul' + sveBinInst('ftsmul', 'Ftsmul', 'SimdFloatMiscOp', floatTypes, ftsmulCode) + # FTSSEL + ftsselCode = fpBinOp % 'TrigSSel' + sveBinInst('ftssel', 'Ftssel', 'SimdFloatMultOp', floatTypes, ftsselCode) + # INCB, INCH, INCW, INCD (scalar) + incxCode = ''' + destElem = srcElem1 + (count * imm); + ''' + sveElemCountInst('inc', 'Inc', 'SimdAluOp', unsignedTypes, incxCode, + destType = DestType.Scalar, dstIs32b = False) + # INCH, INCW, INCD (vector) + sveElemCountInst('inc', 'Incv', 'SimdAluOp', bigUnsignedTypes, incxCode, + destType = DestType.Vector, dstIs32b = False) + # INCP (scalar) + incpCode = ''' + XDest = XDest + count; + ''' + svePredCountInst('incp', 'Incp', 'SimdAluOp', unsignedTypes, incpCode, + DestType.Scalar, SrcSize.Src64bit) + # INCP (vector) + incpvCode = ''' + destElem = srcElem + count; + ''' + svePredCountInst('incp', 'Incpv', 'SimdAluOp', unsignedTypes, incpvCode, + DestType.Vector) + # INDEX (immediate, scalar) + sveIndex(IndexFormat.ImmReg) + # INDEX (immediates) + sveIndex(IndexFormat.ImmImm) + # INDEX (scalar, immediate) + sveIndex(IndexFormat.RegImm) + # INDEX (scalars) + sveIndex(IndexFormat.RegReg) + # INSR (scalar) + sveShiftAndInsertInst('insr', 'Insr', 'SimdAluOp', unsignedTypes, + srcType = SrcRegType.Scalar) + # INSR (SIMD&FP scalar) + sveShiftAndInsertInst('insr', 'Insrf', 'SimdAluOp', unsignedTypes, + srcType = SrcRegType.SimdFpScalar) + # LASTA (scalar) + lastaCode = ''' + last++; + if (last >= eCount) { + last = 0; + } + destElem = AA64FpOp1_x[last];''' + sveSelectInst('lasta', 'Lasta', 'SimdAluOp', unsignedTypes, lastaCode, + isCond = False) + # LASTA (SIMD&FP scalar) + sveSelectInst('lasta', 'Lastaf', 'SimdAluOp', unsignedTypes, lastaCode, + isCond = False, destType = DstRegType.SimdFpScalar) + # LASTB (scalar) + lastbCode = ''' + if (last < 0) { + last = eCount - 1; + } + destElem = AA64FpOp1_x[last];''' + sveSelectInst('lastb', 'Lastb', 'SimdAluOp', unsignedTypes, lastbCode, + isCond = False) + # LASTB (SIMD&FP scalar) + sveSelectInst('lastb', 'Lastbf', 'SimdAluOp', unsignedTypes, lastbCode, + isCond = False, destType = DstRegType.SimdFpScalar) + # LSL (immediate, predicated) + lslCode = ''' + if (srcElem2 == 0) { + destElem = srcElem1; + } else if (srcElem2 >= sizeof(Element) * 8) { + destElem = 0; + } else { + destElem = srcElem1 << srcElem2; + } + ''' + sveBinImmInst('lsl', 'LslImmPred', 'SimdAluOp', unsignedTypes, lslCode, + PredType.MERGE) + # LSL (immediate, unpredicated) + sveBinImmInst('lsl', 'LslImmUnpred', 'SimdAluOp', unsignedTypes, lslCode) + # LSL (vectors) + sveBinInst('lsl', 'LslPred', 'SimdAluOp', unsignedTypes, lslCode, + PredType.MERGE, True) + # LSL (wide elements, predicated) + sveShiftByWideElemsInst('lsl', 'LslWidePred', 'SimdAluOp', unsignedTypes, + lslCode, PredType.MERGE) + # LSL (wide elements, unpredicated) + sveShiftByWideElemsInst('lsl', 'LslWideUnpred', 'SimdAluOp', unsignedTypes, + lslCode) + # LSLR + lslrCode = ''' + if (srcElem1 == 0) { + destElem = srcElem2; + } else if (srcElem1 >= sizeof(Element) * 8) { + destElem = 0; + } else { + destElem = srcElem2 << srcElem1; + } + ''' + sveBinInst('lslr', 'Lslr', 'SimdAluOp', unsignedTypes, lslrCode, + PredType.MERGE, True) + # LSR (immediate, predicated) + lsrCode = ''' + if (srcElem2 >= sizeof(Element) * 8) { + destElem = 0; + } else { + destElem = srcElem1 >> srcElem2; + } + ''' + sveBinImmInst('lsr', 'LsrImmPred', 'SimdAluOp', unsignedTypes, lsrCode, + PredType.MERGE) + # LSR (immediate, unpredicated) + sveBinImmInst('lsr', 'LsrImmUnpred', 'SimdAluOp', unsignedTypes, lsrCode) + # LSR (vectors) + sveBinInst('lsr', 'LsrPred', 'SimdAluOp', unsignedTypes, lsrCode, + PredType.MERGE, True) + # LSR (wide elements, predicated) + sveShiftByWideElemsInst('lsr', 'LsrWidePred', 'SimdAluOp', unsignedTypes, + lsrCode, PredType.MERGE) + # LSR (wide elements, unpredicated) + sveShiftByWideElemsInst('lsr', 'LsrWideUnpred', 'SimdAluOp', unsignedTypes, + lsrCode) + # LSRR + lsrrCode = ''' + if (srcElem1 >= sizeof(Element) * 8) { + destElem = 0; + } else { + destElem = srcElem2 >> srcElem1; + } + ''' + sveBinInst('lsrr', 'Lsrr', 'SimdAluOp', unsignedTypes, lsrrCode, + PredType.MERGE, True) + # MAD + madCode = 'destElem = srcElem1 + destElem * srcElem2;' + sveTerInst('mad', 'Mad', 'SimdMultAccOp', signedTypes, madCode) + # MLA + mlaCode = 'destElem += srcElem1 * srcElem2;' + sveTerInst('mla', 'Mla', 'SimdMultAccOp', signedTypes, mlaCode) + # MLS + mlsCode = 'destElem -= srcElem1 * srcElem2;' + sveTerInst('mls', 'Mls', 'SimdMultAccOp', signedTypes, mlsCode) + # MOVPRFX (predicated) + movCode = 'destElem = srcElem1;' + sveUnaryInst('movprfx', 'MovprfxPredM', 'SimdMiscOp', unsignedTypes, + movCode, PredType.MERGE) + sveUnaryInst('movprfx', 'MovprfxPredZ', 'SimdMiscOp', unsignedTypes, + movCode, PredType.ZERO) + # MOVPRFX (unpredicated) + sveUnaryInst('movprfx', 'MovprfxUnpred', 'SimdMiscOp', ('uint64_t',), + movCode) + # MSB + msbCode = 'destElem = srcElem1 - destElem * srcElem2;' + sveTerInst('msb', 'Msb', 'SimdMultAccOp', signedTypes, msbCode) + # MUL (immediate) + mulCode = 'destElem = srcElem1 * srcElem2;' + sveWideImmInst('mul', 'MulImm', 'SimdMultOp', unsignedTypes, mulCode) + # MUL (vectors) + sveBinInst('mul', 'Mul', 'SimdMultOp', unsignedTypes, mulCode, + PredType.MERGE, True) + # NAND, NANDS + nandCode = 'destElem = !(srcElem1 & srcElem2);'; + svePredLogicalInst('nand', 'PredNand', 'SimdPredAluOp', ('uint8_t',), + nandCode) + svePredLogicalInst('nands', 'PredNands', 'SimdPredAluOp', ('uint8_t',), + nandCode, isFlagSetting=True) + # NEG + negCode = 'destElem = -srcElem1;' + sveUnaryInst('neg', 'Neg', 'SimdAluOp', signedTypes, negCode, + PredType.MERGE) + # NOR, NORS + norCode = 'destElem = !(srcElem1 | srcElem2);'; + svePredLogicalInst('nor', 'PredNor', 'SimdPredAluOp', ('uint8_t',), + norCode) + svePredLogicalInst('nors', 'PredNors', 'SimdPredAluOp', ('uint8_t',), + norCode, isFlagSetting=True) + # NOT (vector) + notCode = 'destElem = ~srcElem1;' + sveUnaryInst('not', 'Not', 'SimdAluOp', unsignedTypes, notCode, + PredType.MERGE) + # ORN, ORNS (predicates) + ornCode = 'destElem = srcElem1 | !srcElem2;'; + svePredLogicalInst('orn', 'PredOrn', 'SimdPredAluOp', ('uint8_t',), + ornCode) + svePredLogicalInst('orns', 'PredOrns', 'SimdPredAluOp', ('uint8_t',), + ornCode, isFlagSetting=True) + # ORR (immediate) + orCode = 'destElem = srcElem1 | srcElem2;' + sveWideImmInst('orr', 'OrrImm', 'SimdAluOp', ('uint64_t',), orCode) + # ORR (vectors, predicated) + sveBinInst('orr', 'OrrPred', 'SimdAluOp', unsignedTypes, orCode, + PredType.MERGE, True) + # ORR (vectors, unpredicated) + orCode = 'destElem = srcElem1 | srcElem2;' + sveBinInst('orr', 'OrrUnpred', 'SimdAluOp', ('uint64_t',), orCode) + # ORR, ORRS (predicates) + svePredLogicalInst('orr', 'PredOrr', 'SimdPredAluOp', ('uint8_t',), orCode) + svePredLogicalInst('orrs', 'PredOrrs', 'SimdPredAluOp', ('uint8_t',), + orCode, isFlagSetting=True) + # ORV + orvCode = 'destElem |= srcElem1;' + sveAssocReducInst('orv', 'Orv', 'SimdReduceAluOp', unsignedTypes, + orvCode, '0') + # PFALSE + pfalseCode = ''' + PDest_ub[0] = 0; + destPred.reset(); + ''' + svePredUnaryWImplicitSrcInst('pfalse', 'Pfalse', 'SimdPredAluOp', + pfalseCode) + # PFIRST + svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp') + # PNEXT + svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes) + # PTEST + svePredTestInst('ptest', 'Ptest', 'SimdPredAluOp') + # PTRUE + svePtrueInst('ptrue', 'Ptrue', 'SimdPredAluOp', unsignedTypes, False) + # PTRUES + svePtrueInst('ptrues', 'Ptrues', 'SimdPredAluOp', unsignedTypes, True) + # PUNPKHI + sveUnpackInst('punpkhi', 'Punpkhi', 'SimdPredAluOp', unsignedWideSDTypes, + unpackHalf = Unpack.High, regType = SrcRegType.Predicate) + # PUNPKLO + sveUnpackInst('punpklo', 'Punpklo', 'SimdPredAluOp', unsignedWideSDTypes, + unpackHalf = Unpack.Low, regType = SrcRegType.Predicate) + # RBIT + rbitCode = ''' + destElem = reverseBits(srcElem1);''' + sveUnaryInst('rbit', 'Rbit', 'SimdAluOp', unsignedTypes, rbitCode, + predType=PredType.MERGE, srcRegType=SrcRegType.Vector) + # RDFFR (unpredicated) + rdffrUnpredCode = ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) { + PDest_ub[i] = Ffr_ub[i]; + }''' + svePredUnaryWImplicitSrcInst('rdffr', 'RdffrUnpred', 'SimdPredAluOp', + rdffrUnpredCode) + # RDFFR, RDFFRS (predicated) + rdffrPredCode = ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) { + if (GpOp_ub[i]) { + PDest_ub[i] = Ffr_ub[i]; + } else { + PDest_ub[i] = false; + } + }''' + svePredUnaryWImplicitSrcInst('rdffr', 'RdffrPred', 'SimdPredAluOp', + rdffrPredCode, PredType.ZERO, False) + svePredUnaryWImplicitSrcInst('rdffrs', 'RdffrsPred', 'SimdPredAluOp', + rdffrPredCode, PredType.ZERO, True) + # RDVL + rdvlCode = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>( + xc->tcBase()); + XDest = eCount * (int64_t) imm; + ''' + rdvlIop = InstObjParams('rdvl', 'SveRdvl', 'RegImmOp', rdvlCode, []) + header_output += RegImmOpDeclare.subst(rdvlIop) + decoder_output += RegImmOpConstructor.subst(rdvlIop) + exec_output += BasicExecute.subst(rdvlIop) + # REV (predicate) + sveReverseElementsInst('rev', 'Revp', 'SimdPredAluOp', unsignedTypes, + srcType = SrcRegType.Predicate) + # REV (vector) + sveReverseElementsInst('rev', 'Revv', 'SimdAluOp', unsignedTypes, + srcType = SrcRegType.Vector) + # REVB + revCode = ''' + %(revtype)s* srcPtr = reinterpret_cast<%(revtype)s*>(&srcElem1); + %(revtype)s* dstPtr = reinterpret_cast<%(revtype)s*>(&destElem); + uint8_t subelements = sizeof(Element) / sizeof(%(revtype)s); + for(int i = 0; i < subelements; ++i) { + dstPtr[subelements - i - 1] = srcPtr[i]; + }''' + sveUnaryInst('revb', 'Revb', 'SimdAluOp', + ['uint16_t', 'uint32_t', 'uint64_t'], + revCode % {'revtype' : 'uint8_t'}, predType=PredType.MERGE, + srcRegType=SrcRegType.Vector, decoder='Generic') + # REVH + sveUnaryInst('revh', 'Revh', 'SimdAluOp', ['uint32_t', 'uint64_t'], + revCode % {'revtype' : 'uint16_t'}, predType=PredType.MERGE, + srcRegType=SrcRegType.Vector, decoder='Generic') + # REVW + sveUnaryInst('revw', 'Revw', 'SimdAluOp', ['uint64_t'], + revCode % {'revtype' : 'uint32_t'}, predType=PredType.MERGE, + srcRegType=SrcRegType.Vector, decoder='Generic') + # SABD + abdCode = ''' + destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : + (srcElem2 - srcElem1); + ''' + sveBinInst('sabd', 'Sabd', 'SimdAddOp', signedTypes, abdCode, + PredType.MERGE, True) + # SADDV + addvCode = 'destElem += srcElem1;' + sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp', + ['int8_t, int64_t', 'int16_t, int64_t', 'int32_t, int64_t'], + addvCode, '0') + # SCVTF + scvtfCode = fpOp % ('fplibFixedToFP<DElement>(' + 'sext<sizeof(SElement) * 8>(srcElem1), 0,' + ' false, FPCRRounding(fpscr), fpscr)') + sveCvtInst('scvtf', 'ScvtfNarrow', 'SimdCvtOp', + ('uint16_t, uint16_t', + 'uint32_t, uint16_t', + 'uint64_t, uint16_t', + 'uint32_t, uint32_t', + 'uint64_t, uint32_t', + 'uint64_t, uint64_t'), + scvtfCode, CvtDir.Narrow) + sveCvtInst('scvtf', 'ScvtfWiden', 'SimdCvtOp', ('uint32_t, uint64_t',), + scvtfCode, CvtDir.Widen) + # SDIV + sdivCode = ''' + constexpr Element ELEM_MIN = std::numeric_limits<Element>::min(); + destElem = (srcElem2 == 0) ? 0 : + (srcElem2 == -1 && srcElem1 == ELEM_MIN) ? ELEM_MIN : + (srcElem1 / srcElem2); + ''' + sveBinInst('sdiv', 'Sdiv', 'SimdDivOp', signedTypes, sdivCode, + PredType.MERGE, True) + # SDIVR + sdivrCode = ''' + constexpr Element ELEM_MIN = std::numeric_limits<Element>::min(); + destElem = (srcElem1 == 0) ? 0 : + (srcElem1 == -1 && srcElem2 == ELEM_MIN) ? ELEM_MIN : + (srcElem2 / srcElem1); + ''' + sveBinInst('sdivr', 'Sdivr', 'SimdDivOp', signedTypes, sdivrCode, + PredType.MERGE, True) + # SDOT (indexed) + sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int32_t', + 'int16_t, int64_t'], isIndexed = True) + # SDOT (vectors) + sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int32_t', + 'int16_t, int64_t'], isIndexed = False) + # SEL (predicates) + selCode = 'destElem = srcElem1;' + svePredLogicalInst('sel', 'PredSel', 'SimdPredAluOp', ('uint8_t',), + selCode, PredType.SELECT) + # SEL (vectors) + sveBinInst('sel', 'Sel', 'SimdAluOp', unsignedTypes, selCode, + PredType.SELECT, False) + # SETFFR + setffrCode = ''' + Ffr_ub[0] = true; + destPred.set();''' + svePredWriteFfrInst('setffr', 'Setffr', 'SimdPredAluOp', setffrCode, True) + # SMAX (immediate) + maxCode = 'destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;' + sveWideImmInst('smax', 'SmaxImm', 'SimdCmpOp', signedTypes, maxCode) + # SMAX (vectors) + sveBinInst('smax', 'Smax', 'SimdCmpOp', signedTypes, maxCode, + PredType.MERGE, True) + # SMAXV + maxvCode = ''' + if (srcElem1 > destElem) + destElem = srcElem1; + ''' + sveAssocReducInst('smaxv', 'Smaxv', 'SimdReduceCmpOp', signedTypes, + maxvCode, 'std::numeric_limits<Element>::min()') + # SMIN (immediate) + minCode = 'destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;' + sveWideImmInst('smin', 'SminImm', 'SimdCmpOp', signedTypes, minCode) + # SMIN (vectors) + sveBinInst('smin', 'Smin', 'SimdCmpOp', signedTypes, minCode, + PredType.MERGE, True) + # SMINV + minvCode = ''' + if (srcElem1 < destElem) + destElem = srcElem1; + ''' + sveAssocReducInst('sminv', 'Sminv', 'SimdReduceCmpOp', signedTypes, + minvCode, 'std::numeric_limits<Element>::max()') + # SMULH + exec_output += ''' + template <class T> + T do_mulh(T srcElem1, T srcElem2) + { + return ((int64_t)srcElem1 * (int64_t)srcElem2) >> sizeof(T) * 8; + } + + int64_t do_mulh(int64_t srcElem1, int64_t srcElem2) + { + uint64_t x = (uint64_t) llabs(srcElem1); + uint64_t y = (uint64_t) llabs(srcElem2); + + uint64_t a = x >> 32; + uint64_t b = x & 0xFFFFFFFF; + uint64_t c = y >> 32; + uint64_t d = y & 0xFFFFFFFF; + + uint64_t hi = a * c; + uint64_t lo = b * d; + + hi += (a * d) >> 32; + uint64_t tmp = lo; + lo += ((a * d) & 0xFFFFFFFF) << 32; + if (lo < tmp) + hi++; + + hi += (b * c) >> 32; + tmp = lo; + lo += ((b * c) & 0xFFFFFFFF) << 32; + if (lo < tmp) + hi++; + + uint64_t destElem = hi; + if ((srcElem1 < 0) ^ (srcElem2 < 0)) { + uint64_t tmp = lo = ~lo; + destElem = ~hi; + if (++lo < tmp) + destElem++; + } + + return destElem; + } + + uint64_t do_mulh(uint64_t srcElem1, uint64_t srcElem2) + { + uint64_t x = srcElem1; + uint64_t y = srcElem2; + + uint64_t a = x >> 32; + uint64_t b = x & 0xFFFFFFFF; + uint64_t c = y >> 32; + uint64_t d = y & 0xFFFFFFFF; + + uint64_t hi = a * c; + uint64_t lo = b * d; + + hi += (a * d) >> 32; + uint64_t tmp = lo; + lo += ((a * d) & 0xFFFFFFFF) << 32; + if (lo < tmp) + hi++; + + hi += (b * c) >> 32; + tmp = lo; + lo += ((b * c) & 0xFFFFFFFF) << 32; + if (lo < tmp) + hi++; + + return hi; + }''' + mulhCode = ''' + destElem = do_mulh(srcElem1, srcElem2);''' + sveBinInst('smulh', 'Smulh', 'SimdMultOp', signedTypes, mulhCode, + PredType.MERGE, True) + # SPLICE + sveSpliceInst('splice', 'Splice', 'SimdAluOp', unsignedTypes) + # SQADD (immediate) + sqaddCode = ''' + destElem = srcElem1 + srcElem2; + bool negDest = (destElem < 0); + bool negSrc1 = (srcElem1 < 0); + bool negSrc2 = (srcElem2 < 0); + if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (negDest) + destElem -= 1; + } + ''' + sveWideImmInst('sqadd', 'SqaddImm', 'SimdAddOp', signedTypes, sqaddCode) + # SQADD (vectors) + sveBinInst('sqadd', 'Sqadd', 'SimdAddOp', signedTypes, sqaddCode) + # SQDECB, SQDECH, SQDECW, SQDECD (scalar, 32-bit) + sqdecCode = ''' + destElem = srcElem1 - (count * imm); + bool negDest = (destElem < 0); + bool negSrc = (srcElem1 < 0); + bool posCount = ((count * imm) >= 0); + if ((negDest != negSrc) && (negSrc == posCount)) { + destElem = (%(dstType)s)1 << (sizeof(%(dstType)s) * 8 - 1); + if (negDest) + destElem -= 1; + } + ''' + sveElemCountInst('sqdec', 'Sqdec32', 'SimdAluOp', signedTypes, + sqdecCode%{'dstType':'int32_t'}, destType = DestType.Scalar, + dstIs32b = True) + # SQDECB, SQDECH, SQDECW, SQDECD (scalar, 64-bit) + sveElemCountInst('sqdec', 'Sqdec', 'SimdAluOp', signedTypes, + sqdecCode%{'dstType':'int64_t'}, destType = DestType.Scalar, + dstIs32b = False) + # SQDECH, SQDECW, SQDECD (vector) + sveElemCountInst('sqdec', 'Sqdecv', 'SimdAluOp', bigSignedTypes, + sqdecCode%{'dstType':'Element'}, destType = DestType.Vector, + dstIs32b = False) + # SQDECP (scalar, 32-bit) + sqdecpCode = ''' + destElem = srcElem - count; + bool negDest = (destElem < 0); + bool negSrc = (srcElem < 0); + bool posCount = (count >= 0); + if ((negDest != negSrc) && (negSrc == posCount)) { + destElem = std::numeric_limits<%s>::min(); + if (negDest) + destElem -= 1; + } + ''' + sqdecp32Code = ''' + int32_t srcElem = WDest; + int32_t destElem;''' + (sqdecpCode % 'int32_t') + ''' + if (destElem < 0) { + XDest = static_cast<uint32_t>(destElem) | ~mask(32); + } else { + XDest = destElem; + } + ''' + svePredCountInst('sqdecp', 'Sqdecp32', 'SimdAluOp', signedTypes, + sqdecp32Code, DestType.Scalar, SrcSize.Src32bit) + # SQDECP (scalar, 64-bit) + sqdecp64Code = ''' + int64_t srcElem = XDest; + int64_t destElem;''' + (sqdecpCode % 'int64_t') + ''' + XDest = destElem; + ''' + svePredCountInst('sqdecp', 'Sqdecp64', 'SimdAluOp', signedTypes, + sqdecp64Code, DestType.Scalar, SrcSize.Src64bit) + # SQDECP (vector) + svePredCountInst('sqdecp', 'Sqdecpv', 'SimdAluOp', signedTypes, + sqdecpCode % 'Element', DestType.Vector) + # SQINCB, SQINCH, SQINCW, SQINCD (scalar, 32-bit) + sqincCode = ''' + destElem = srcElem1 + (count * imm); + bool negDest = (destElem < 0); + bool negSrc = (srcElem1 < 0); + bool negCount = ((count * imm) < 0); + if ((negDest != negSrc) && (negSrc == negCount)) { + destElem = (%(dstType)s)1 << (sizeof(%(dstType)s) * 8 - 1); + if (negDest) + destElem -= 1; + } + ''' + sveElemCountInst('sqinc', 'Sqinc32', 'SimdAluOp', signedTypes, + sqincCode%{'dstType':'int32_t'}, destType = DestType.Scalar, + dstIs32b = True) + # SQINCB, SQINCH, SQINCW, SQINCD (scalar, 64-bit) + sveElemCountInst('sqinc', 'Sqinc', 'SimdAluOp', signedTypes, + sqincCode%{'dstType':'int64_t'}, destType = DestType.Scalar, + dstIs32b = False) + # SQINCH, SQINCW, SQINCD (vector) + sveElemCountInst('sqinc', 'Sqincv', 'SimdAluOp', bigSignedTypes, + sqincCode%{'dstType':'Element'}, destType = DestType.Vector, + dstIs32b = False) + # SQINCP (scalar, 32-bit) + sqincpCode = ''' + destElem = srcElem + count; + bool negDest = (destElem < 0); + bool negSrc = (srcElem < 0); + bool negCount = (count < 0); + if ((negDest != negSrc) && (negSrc == negCount)) { + destElem = std::numeric_limits<%s>::min(); + if (negDest) + destElem -= 1; + } + ''' + sqincp32Code = ''' + int32_t srcElem = WDest; + int32_t destElem;''' + (sqincpCode % 'int32_t') + ''' + if (destElem < 0) { + XDest = static_cast<uint32_t>(destElem) | ~mask(32); + } else { + XDest = destElem; + } + ''' + svePredCountInst('sqincp', 'Sqincp32', 'SimdAluOp', signedTypes, + sqincp32Code, DestType.Scalar, SrcSize.Src32bit) + # SQINCP (scalar, 64-bit) + sqincp64Code = ''' + int64_t srcElem = XDest; + int64_t destElem;''' + (sqincpCode % 'int64_t') + ''' + XDest = destElem; + ''' + svePredCountInst('sqincp', 'Sqincp64', 'SimdAluOp', signedTypes, + sqincp64Code, DestType.Scalar, SrcSize.Src64bit) + # SQINCP (vector) + svePredCountInst('sqincp', 'Sqincpv', 'SimdAluOp', signedTypes, + sqincpCode % 'Element', DestType.Vector) + # SQSUB (immediate) + sqsubCode = ''' + destElem = srcElem1 - srcElem2; + bool negDest = (destElem < 0); + bool negSrc1 = (srcElem1 < 0); + bool posSrc2 = (srcElem2 >= 0); + if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (negDest) + destElem -= 1; + } + ''' + sveWideImmInst('sqsub', 'SqsubImm', 'SimdAddOp', signedTypes, sqsubCode) + # SQSUB (vectors) + sveBinInst('sqsub', 'Sqsub', 'SimdAddOp', signedTypes, sqsubCode) + # SUB (immediate) + subCode = 'destElem = srcElem1 - srcElem2;' + sveWideImmInst('sub', 'SubImm', 'SimdAddOp', unsignedTypes, subCode) + # SUB (vectors, predicated) + sveBinInst('sub', 'SubPred', 'SimdAddOp', unsignedTypes, subCode, + PredType.MERGE, True) + # SUB (vectors, unpredicated) + subCode = 'destElem = srcElem1 - srcElem2;' + sveBinInst('sub', 'SubUnpred', 'SimdAddOp', unsignedTypes, subCode) + # SUBR (immediate) + subrCode = 'destElem = srcElem2 - srcElem1;' + sveWideImmInst('subr', 'SubrImm', 'SimdAddOp', unsignedTypes, subrCode) + # SUBR (vectors) + sveBinInst('subr', 'Subr', 'SimdAddOp', unsignedTypes, subrCode, + PredType.MERGE, True) + # SUNPKHI + sveUnpackInst('sunpkhi', 'Sunpkhi', 'SimdAluOp', signedWideSDTypes, + unpackHalf = Unpack.High, regType = SrcRegType.Vector) + # SUNPKLO + sveUnpackInst('sunpklo', 'Sunpklo', 'SimdAluOp', signedWideSDTypes, + unpackHalf = Unpack.Low, regType = SrcRegType.Vector) + # SXTB + sxtCode = 'destElem = sext<8 * sizeof(SElement)>(srcElem1);' + sveWidenUnaryInst('sxtb', 'Sxtb', 'SimdAluOp', + ['uint8_t, uint16_t', 'uint8_t, uint32_t', 'uint8_t, uint64_t'], + sxtCode, PredType.MERGE) + # SXTH + sveWidenUnaryInst('sxth', 'Sxth', 'SimdAluOp', + ['uint16_t, uint32_t', 'uint16_t, uint64_t'], + sxtCode, PredType.MERGE) + # SXTW + sveWidenUnaryInst('sxtw', 'Sxtw', 'SimdAluOp', + ['uint32_t, uint64_t'], + sxtCode, PredType.MERGE) + # TBL + sveTblInst('tbl', 'Tbl', 'SimdAluOp') + # TRN1, TRN2 (predicates) + trnPredIterCode = ''' + constexpr unsigned sz = sizeof(Element); + int s; + int part = %d; + TheISA::VecPredRegContainer tmpPredC; + auto auxPDest = tmpPredC.as<uint8_t>(); + for (unsigned i = 0; i < eCount / 2; i++) { + s = 2 * i + part; + for (unsigned j = 0; j < sz; j++) { + auxPDest[(2 * i) * sz + j] = POp1_pb[s * sz + j]; + auxPDest[(2 * i + 1) * sz + j] = POp2_pb[s * sz + j]; + } + } + for (unsigned i = 0; i < eCount * sz; i++) { + PDest_pb[i] = auxPDest[i]; + } + ''' + svePredBinPermInst('trn1', 'Trn1Pred', 'SimdPredAluOp', unsignedTypes, + trnPredIterCode % 0) + svePredBinPermInst('trn2', 'Trn2Pred', 'SimdPredAluOp', unsignedTypes, + trnPredIterCode % 1) + # TRN1, TRN2 (vectors) + trnIterCode = ''' + int s; + int part = %d; + TheISA::VecRegContainer tmpVecC; + auto auxDest = tmpVecC.as<Element>(); + for (unsigned i = 0; i < eCount / 2; i++) { + s = 2 * i + part; + auxDest[2 * i] = AA64FpOp1_x[s]; + auxDest[2 * i + 1] = AA64FpOp2_x[s]; + } + for (unsigned i = 0; i < eCount; i++) { + AA64FpDest_x[i] = auxDest[i]; + } + ''' + sveBinInst('trn1', 'Trn1', 'SimdAluOp', unsignedTypes, '', + customIterCode=trnIterCode % 0) + sveBinInst('trn2', 'Trn2', 'SimdAluOp', unsignedTypes, '', + customIterCode=trnIterCode % 1) + # UABD + sveBinInst('uabd', 'Uabd', 'SimdAddOp', unsignedTypes, abdCode, + PredType.MERGE, True) + # UADDV + sveWideningAssocReducInst('uaddv', 'Uaddv', 'SimdReduceAddOp', + ['uint8_t, uint64_t', 'uint16_t, uint64_t', 'uint32_t, uint64_t', + 'uint64_t, uint64_t'], + addvCode, '0') + # UCVTF + ucvtfCode = fpOp % ('fplibFixedToFP<DElement>(srcElem1, 0, true,' + ' FPCRRounding(fpscr), fpscr)') + sveCvtInst('ucvtf', 'UcvtfNarrow', 'SimdCvtOp', + ('uint16_t, uint16_t', + 'uint32_t, uint16_t', + 'uint64_t, uint16_t', + 'uint32_t, uint32_t', + 'uint64_t, uint32_t', + 'uint64_t, uint64_t'), + ucvtfCode, CvtDir.Narrow) + sveCvtInst('ucvtf', 'UcvtfWiden', 'SimdCvtOp', ('uint32_t, uint64_t',), + ucvtfCode, CvtDir.Widen) + # UDIV + udivCode = 'destElem = (srcElem2 == 0) ? 0 : (srcElem1 / srcElem2);' + sveBinInst('udiv', 'Udiv', 'SimdDivOp', unsignedTypes, udivCode, + PredType.MERGE, True) + # UDIVR + udivrCode = 'destElem = (srcElem1 == 0) ? 0 : (srcElem2 / srcElem1);' + sveBinInst('udivr', 'Udivr', 'SimdDivOp', unsignedTypes, udivrCode, + PredType.MERGE, True) + # UDOT (indexed) + sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint32_t', + 'uint16_t, uint64_t'], isIndexed = True) + # UDOT (vectors) + sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint32_t', + 'uint16_t, uint64_t'], isIndexed = False) + # UMAX (immediate) + sveWideImmInst('umax', 'UmaxImm', 'SimdCmpOp', unsignedTypes, maxCode) + # UMAX (vectors) + sveBinInst('umax', 'Umax', 'SimdCmpOp', unsignedTypes, maxCode, + PredType.MERGE, True) + # UMAXV + sveAssocReducInst('umaxv', 'Umaxv', 'SimdReduceCmpOp', unsignedTypes, + maxvCode, 'std::numeric_limits<Element>::min()') + # UMIN (immediate) + sveWideImmInst('umin', 'UminImm', 'SimdCmpOp', unsignedTypes, minCode) + # UMIN (vectors) + sveBinInst('umin', 'Umin', 'SimdCmpOp', unsignedTypes, minCode, + PredType.MERGE, True) + # UMINV + sveAssocReducInst('uminv', 'Uminv', 'SimdReduceCmpOp', unsignedTypes, + minvCode, 'std::numeric_limits<Element>::max()') + # UMULH + sveBinInst('umulh', 'Umulh', 'SimdMultOp', unsignedTypes, mulhCode, + PredType.MERGE, True) + # UQADD (immediate) + uqaddCode = ''' + destElem = srcElem1 + srcElem2; + if (destElem < srcElem1 || destElem < srcElem2) { + destElem = (Element)(-1); + } + ''' + sveWideImmInst('uqadd', 'UqaddImm', 'SimdAddOp', unsignedTypes, uqaddCode) + # UQADD (vectors) + sveBinInst('uqadd', 'Uqadd', 'SimdAddOp', unsignedTypes, uqaddCode) + # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 32-bit) + uqdecCode = ''' + destElem = srcElem1 - (imm * count); + if (destElem > srcElem1) { + destElem = 0; + } + ''' + sveElemCountInst('uqdec', 'Uqdec32', 'SimdAluOp', unsignedTypes, + uqdecCode, destType = DestType.Scalar, dstIs32b = True) + # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 64-bit) + sveElemCountInst('uqdec', 'Uqdec', 'SimdAluOp', unsignedTypes, + uqdecCode, destType = DestType.Scalar, dstIs32b = False) + # UQDECH, UQDECW, UQDECD (vector) + sveElemCountInst('uqdec', 'Uqdecv', 'SimdAluOp', bigUnsignedTypes, + uqdecCode, destType = DestType.Vector, dstIs32b = False) + # UQDECP (scalar, 32-bit) + uqdecpCode = ''' + destElem = srcElem - count; + if (destElem > srcElem) { + destElem = 0; + } + ''' + uqdecp32Code = ''' + uint32_t srcElem = WDest; + uint32_t destElem;''' + uqdecpCode + ''' + WDest = destElem; + ''' + svePredCountInst('uqdecp', 'Uqdecp32', 'SimdAluOp', unsignedTypes, + uqdecp32Code, DestType.Scalar, SrcSize.Src32bit) + # UQDECP (scalar, 64-bit) + uqdecp64Code = ''' + uint64_t srcElem = XDest; + uint64_t destElem;''' + uqdecpCode + ''' + XDest = destElem; + ''' + svePredCountInst('uqdecp', 'Uqdecp64', 'SimdAluOp', unsignedTypes, + uqdecp64Code, DestType.Scalar, SrcSize.Src64bit) + # UQDECP (vector) + svePredCountInst('uqdecp', 'Uqdecpv', 'SimdAluOp', unsignedTypes, + uqdecpCode, DestType.Vector) + # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 32-bit) + uqincCode = ''' + destElem = srcElem1 + (imm * count); + if (destElem < srcElem1 || destElem < (imm * count)) { + destElem = static_cast<%(destType)s>(-1); + } + ''' + sveElemCountInst('uqinc', 'Uqinc32', 'SimdAluOp', unsignedTypes, + uqincCode%{'destType': 'uint32_t'}, destType = DestType.Scalar, + dstIs32b = True) + # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 64-bit) + sveElemCountInst('uqinc', 'Uqinc', 'SimdAluOp', unsignedTypes, + uqincCode%{'destType': 'uint64_t'}, destType = DestType.Scalar, + dstIs32b = False) + # UQDECH, UQDECW, UQDECD (vector) + sveElemCountInst('uqinc', 'Uqincv', 'SimdAluOp', bigUnsignedTypes, + uqincCode%{'destType': 'Element'}, destType = DestType.Vector, + dstIs32b = False) + # UQINCP (scalar, 32-bit) + uqincpCode = ''' + destElem = srcElem + count; + if (destElem < srcElem || destElem < count) { + destElem = std::numeric_limits<%s>::max(); + } + ''' + uqincp32Code = ''' + uint32_t srcElem = WDest; + uint32_t destElem;''' + (uqincpCode % 'uint32_t') + ''' + XDest = destElem; + ''' + svePredCountInst('uqincp', 'Uqincp32', 'SimdAluOp', unsignedTypes, + uqincp32Code, DestType.Scalar, SrcSize.Src32bit) + # UQINCP (scalar, 64-bit) + uqincp64Code = ''' + uint64_t srcElem = XDest; + uint64_t destElem;''' + (uqincpCode % 'uint64_t') + ''' + XDest = destElem; + ''' + svePredCountInst('uqincp', 'Uqincp64', 'SimdAluOp', unsignedTypes, + uqincp64Code, DestType.Scalar, SrcSize.Src64bit) + # UQINCP (vector) + svePredCountInst('uqincp', 'Uqincpv', 'SimdAluOp', unsignedTypes, + uqincpCode % 'Element', DestType.Vector) + # UQSUB (immediate) + uqsubCode = ''' + destElem = srcElem1 - srcElem2; + if (destElem > srcElem1) { + destElem = 0; + } + ''' + sveWideImmInst('uqsub', 'UqsubImm', 'SimdAddOp', unsignedTypes, uqsubCode) + # UQSUB (vectors) + sveBinInst('uqsub', 'Uqsub', 'SimdAddOp', unsignedTypes, uqsubCode) + # UUNPKHI + sveUnpackInst('uunpkhi', 'Uunpkhi', 'SimdAluOp', unsignedWideSDTypes, + unpackHalf = Unpack.High, regType = SrcRegType.Vector) + # UUNPKLO + sveUnpackInst('uunpklo', 'Uunpklo', 'SimdAluOp', unsignedWideSDTypes, + unpackHalf = Unpack.Low, regType = SrcRegType.Vector) + # UXTB + uxtCode = 'destElem = srcElem1;' + sveWidenUnaryInst('uxtb', 'Uxtb', 'SimdAluOp', + ['uint8_t, uint16_t', 'uint8_t, uint32_t', 'uint8_t, uint64_t'], + uxtCode, PredType.MERGE) + # UXTH + sveWidenUnaryInst('uxth', 'Uxth', 'SimdAluOp', + ['uint16_t, uint32_t', 'uint16_t, uint64_t'], + uxtCode, PredType.MERGE) + # UXTW + sveWidenUnaryInst('uxtw', 'Uxtw', 'SimdAluOp', + ['uint32_t, uint64_t'], + uxtCode, PredType.MERGE) + # UZP1, UZP2 (predicates) + uzpPredIterCode = ''' + constexpr unsigned sz = sizeof(Element); + int s; + int part = %d; + TheISA::VecPredRegContainer tmpPredC; + auto auxPDest = tmpPredC.as<uint8_t>(); + for (unsigned i = 0; i < eCount; i++) { + s = 2 * i + part; + for (unsigned j = 0; j < sz; j++) { + if (s < eCount) { + auxPDest[i * sz + j] = POp1_pb[s * sz + j]; + } else { + auxPDest[i * sz + j] = POp2_pb[(s - eCount) * sz + j]; + } + } + } + for (unsigned i = 0; i < eCount * sz; i++) { + PDest_pb[i] = auxPDest[i]; + } + ''' + svePredBinPermInst('uzp1', 'Uzp1Pred', 'SimdPredAluOp', unsignedTypes, + uzpPredIterCode % 0) + svePredBinPermInst('uzp2', 'Uzp2Pred', 'SimdPredAluOp', unsignedTypes, + uzpPredIterCode % 1) + # UZP1, UZP2 (vectors) + uzpIterCode = ''' + int s; + int part = %d; + TheISA::VecRegContainer tmpVecC; + auto auxDest = tmpVecC.as<Element>(); + for (unsigned i = 0; i < eCount; i++) { + s = 2 * i + part; + if (s < eCount) { + auxDest[i] = AA64FpOp1_x[s]; + } else { + auxDest[i] = AA64FpOp2_x[s - eCount]; + } + } + for (unsigned i = 0; i < eCount; i++) { + AA64FpDest_x[i] = auxDest[i]; + } + ''' + sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', unsignedTypes, '', + customIterCode=uzpIterCode % 0) + sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', unsignedTypes, '', + customIterCode=uzpIterCode % 1) + # WHILELE (32-bit) + whileLECode = ''' + cond = srcElem1 <= srcElem2; + ''' + sveWhileInst('whilele', 'Whilele32', 'SimdCmpOp', signedTypes, whileLECode, + SrcSize.Src32bit) + # WHILELE (64-bit) + sveWhileInst('whilele', 'Whilele64', 'SimdCmpOp', signedTypes, whileLECode, + SrcSize.Src64bit) + # WHILELO (32-bit) + whileLTCode = ''' + cond = srcElem1 < srcElem2; + ''' + sveWhileInst('whilelo', 'Whilelo32', 'SimdCmpOp', unsignedTypes, + whileLTCode, SrcSize.Src32bit) + # WHILELO (64-bit) + sveWhileInst('whilelo', 'Whilelo64', 'SimdCmpOp', unsignedTypes, + whileLTCode, SrcSize.Src64bit) + # WHILELS (32-bit) + sveWhileInst('whilels', 'Whilels32', 'SimdCmpOp', unsignedTypes, + whileLECode, SrcSize.Src32bit) + # WHILELS (64-bit) + sveWhileInst('whilels', 'Whilels64', 'SimdCmpOp', unsignedTypes, + whileLECode, SrcSize.Src64bit) + # WHILELT (32-bit) + sveWhileInst('whilelt', 'Whilelt32', 'SimdCmpOp', signedTypes, + whileLTCode, SrcSize.Src32bit) + # WHILELT (64-bit) + sveWhileInst('whilelt', 'Whilelt64', 'SimdCmpOp', signedTypes, + whileLTCode, SrcSize.Src64bit) + # WRFFR + wrffrCode = ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) { + Ffr_ub[i] = POp1_ub[i]; + }''' + svePredWriteFfrInst('wrffr', 'Wrffr', 'SimdPredAluOp', wrffrCode, False) + # ZIP1, ZIP2 (predicates) + zipPredIterCode = ''' + constexpr unsigned sz = sizeof(Element); + int s; + int part = %d; + TheISA::VecPredRegContainer tmpPredC; + auto auxPDest = tmpPredC.as<uint8_t>(); + for (unsigned i = 0; i < eCount / 2; i++) { + s = i + (part * (eCount / 2)); + for (unsigned j = 0; j < sz; j++) { + auxPDest[(2 * i) * sz + j] = POp1_pb[s * sz + j]; + auxPDest[(2 * i + 1) * sz + j] = POp2_pb[s * sz + j]; + } + } + for (unsigned i = 0; i < eCount * sz; i++) { + PDest_pb[i] = auxPDest[i]; + } + ''' + svePredBinPermInst('zip1', 'Zip1Pred', 'SimdPredAluOp', unsignedTypes, + zipPredIterCode % 0) + svePredBinPermInst('zip2', 'Zip2Pred', 'SimdPredAluOp', unsignedTypes, + zipPredIterCode % 1) + # ZIP1, ZIP2 (vectors) + zipIterCode = ''' + int s; + int part = %d; + TheISA::VecRegContainer tmpVecC; + auto auxDest = tmpVecC.as<Element>(); + for (unsigned i = 0; i < eCount / 2; i++) { + s = i + (part * (eCount / 2)); + auxDest[2 * i] = AA64FpOp1_x[s]; + auxDest[2 * i + 1] = AA64FpOp2_x[s]; + } + for (unsigned i = 0; i < eCount; i++) { + AA64FpDest_x[i] = auxDest[i]; + } + ''' + sveBinInst('zip1', 'Zip1', 'SimdAluOp', unsignedTypes, '', + customIterCode=zipIterCode % 0) + sveBinInst('zip2', 'Zip2', 'SimdAluOp', unsignedTypes, '', + customIterCode=zipIterCode % 1) + +}}; diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index 025f75755..fb3e4de35 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -46,6 +46,7 @@ def operand_types {{ 'uh' : 'uint16_t', 'sw' : 'int32_t', 'uw' : 'uint32_t', + 'sd' : 'int64_t', 'ud' : 'uint64_t', 'tud' : 'std::array<uint64_t, 2>', 'sf' : 'float', @@ -53,6 +54,10 @@ def operand_types {{ 'vc' : 'TheISA::VecRegContainer', # For operations that are implemented as a template 'x' : 'TPElem', + 'xs' : 'TPSElem', + 'xd' : 'TPDElem', + 'pc' : 'TheISA::VecPredRegContainer', + 'pb' : 'uint8_t' }}; let {{ @@ -129,6 +134,9 @@ let {{ def vectorRegElem(elem, ext = 'sf', zeroing = False): return (elem, ext, zeroing) + def vecPredReg(idx): + return ('VecPredReg', 'pc', idx, None, srtNormal) + def intReg(idx): return ('IntReg', 'uw', idx, 'IsInteger', srtNormal, maybePCRead, maybePCWrite) @@ -522,6 +530,25 @@ def operands {{ 'AA64FpDestQV1L': vectorRegElem('0', 'tud', zeroing = True) }), + 'AA64FpDestMerge': vectorReg('dest', + { + 'AA64FpDestMergeP0': vectorRegElem('0'), + 'AA64FpDestMergeP1': vectorRegElem('1'), + 'AA64FpDestMergeP2': vectorRegElem('2'), + 'AA64FpDestMergeP3': vectorRegElem('3'), + 'AA64FpDestMergeS': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpDestMergeD': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpDestMergeQ': vectorRegElem('0', 'tud', zeroing = True) + }), + + # Predicate register operands + 'GpOp': vecPredReg('gp'), + 'POp1': vecPredReg('op1'), + 'POp2': vecPredReg('op2'), + 'PDest': vecPredReg('dest'), + 'PDestMerge': vecPredReg('dest'), + 'Ffr': vecPredReg('PREDREG_FFR'), + #Abstracted control reg operands 'MiscDest': cntrlReg('dest'), 'MiscOp1': cntrlReg('op1'), diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa new file mode 100644 index 000000000..d7682c447 --- /dev/null +++ b/src/arch/arm/isa/templates/sve.isa @@ -0,0 +1,1034 @@ +// Copyright (c) 2018-2019 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli + +let {{ + sveEnabledCheckCode = ''' + if (FullSystem) { + fault = checkSveEnabled(xc->tcBase(), Cpsr, Cpacr64); + if (fault != NoFault) { + return fault; + } + } + ''' +}}; + +def template SveWideningUnaryPredOpDeclare {{ +template <class _SElement, class _DElement> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _SElement Element; + typedef _SElement SElement; + typedef _DElement DElement; + typedef _SElement TPSElem; + typedef _DElement TPDElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveUnaryPredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveUnaryUnpredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveShiftAndInsertOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, %(isSimdFp)s) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveWideImmUnpredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveWideImmPredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm, IntRegIndex _gp, + bool _isMerging = true) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm, _gp, _isMerging) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveBinImmUnpredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveBinImmPredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveBinDestrPredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op2, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op2, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveBinConstrPredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _gp, SvePredType _predType) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _gp, _predType) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveBinUnpredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveBinIdxUnpredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + uint8_t _index) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _index) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SvePredLogicalOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _gp, bool _isSel = false) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _gp, _isSel) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveCmpOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveIntCmpOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _gp, %(op2IsWide)s) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveCmpImmOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm, + IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveTerPredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveTerImmUnpredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveReducOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveWideningReducOpDeclare {{ +template <class _SElement, class _DElement> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _SElement Element; + typedef _SElement SElement; + typedef _DElement DElement; + typedef _SElement TPSElem; + typedef _DElement TPDElem; + + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveIndexIIOpDeclare {{ +template <class _Element> +class SveIndexII : public SveIndexIIOp +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + SveIndexII(ExtMachInst machInst, + IntRegIndex _dest, int8_t _imm1, int8_t _imm2) + : SveIndexIIOp("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm1, _imm2) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveIndexIROpDeclare {{ +template <class _Element> +class SveIndexIR : public SveIndexIROp +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + SveIndexIR(ExtMachInst machInst, + IntRegIndex _dest, int8_t _imm, IntRegIndex _op) + : SveIndexIROp("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm, _op) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveIndexRIOpDeclare {{ +template <class _Element> +class SveIndexRI : public SveIndexRIOp +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + SveIndexRI(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op, int8_t _imm) + : SveIndexRIOp("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op, _imm) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveIndexRROpDeclare {{ +template <class _Element> +class SveIndexRR : public SveIndexRROp +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + SveIndexRR(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) + : SveIndexRROp("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SvePredCountOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, %(srcIs32b)s, %(destIsVec)s) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SvePredCountPredOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; + +}}; + +def template SvePtrueOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint8_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveAdrOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + uint8_t _mult, SveAdrOffsetFormat _offsetFormat) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _offset, _mult, _offsetFormat) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveWhileOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, %(srcIs32b)s) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveCompTermOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, IntRegIndex _op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1, _op2) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveIntCmpImmOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, IntRegIndex _op1, + int64_t _op2, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _op1, + _op2, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveElemCountOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + uint8_t _pattern, uint8_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, + _pattern, _imm, %(dstIsVec)s, %(dstIs32b)s) + { + %(constructor)s; + esize = sizeof(Element); + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SvePartBrkOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, IntRegIndex _gp, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, + _gp, _op1, %(isMerging)s) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SvePartBrkPropOpDeclare {{ +// XXX: Might be done with SveTerPredOpDeclare and +// instantiating with uint8_t +class %(class_name)s : public %(base_class)s +{ + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, + _op1, _op2, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveSelectOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, + _op1, _gp, %(isCond)s, %(isScalar)s, %(isSimdFp)s) + { + %(constructor)s; + scalar_width = (sizeof(Element) == 8) ? 64 : 32; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveUnpackOpDeclare {{ +template <class _SElement, class _DElement> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _SElement Element; + typedef _SElement SElement; + typedef _DElement DElement; + typedef _SElement TPSElem; + typedef _DElement TPDElem; + public: + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SvePredicateTestOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SvePredUnaryOpWImplicitSrcDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SvePredUnaryPredOpWImplicitSrcDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, IntRegIndex _gp) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _gp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SvePredUnaryOpWImplicitDstDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveOpWImplicitSrcDstDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + %(class_name)s(ExtMachInst machInst) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveWideningTerImmOpDeclare {{ +template <class _SElement, class _DElement> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _DElement Element; + typedef _SElement SElement; + typedef _DElement DElement; + typedef _SElement TPSElem; + typedef _DElement TPDElem; + + public: + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _imm) + { + %(constructor)s; + esize = sizeof(Element); + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveWideningTerOpDeclare {{ +template <class _SElement, class _DElement> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _DElement Element; + typedef _SElement SElement; + typedef _DElement DElement; + typedef _SElement TPSElem; + typedef _DElement TPDElem; + + public: + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2) + { + %(constructor)s; + esize = sizeof(Element); + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveComplexOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _gp, uint8_t _rot) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _gp, _rot) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveComplexIndexOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + typedef _Element TPElem; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + uint8_t _rot, uint8_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _rot, _imm) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; +}; +}}; + +def template SveWideningOpExecute {{ + template <class SElement, class DElement> + Fault %(class_name)s<SElement, DElement>::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SveNonTemplatedOpExecute {{ + Fault %(class_name)s::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SveOpExecute {{ + template <class Element> + Fault %(class_name)s<Element>::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SveOpExecDeclare {{ + template + Fault %(class_name)s<%(targs)s>::execute( + ExecContext *, Trace::InstRecord *) const; +}}; diff --git a/src/arch/arm/isa/templates/templates.isa b/src/arch/arm/isa/templates/templates.isa index 14913b358..c0647b81f 100644 --- a/src/arch/arm/isa/templates/templates.isa +++ b/src/arch/arm/isa/templates/templates.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2011,2018 ARM Limited +// Copyright (c) 2010-2011, 2017-2018 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -80,3 +80,6 @@ ##include "crypto.isa" ##include "neon64.isa" + +//Templates for SVE instructions +##include "sve.isa" diff --git a/src/arch/arm/miscregs.cc b/src/arch/arm/miscregs.cc index 1d4002a03..5fd7d2c53 100644 --- a/src/arch/arm/miscregs.cc +++ b/src/arch/arm/miscregs.cc @@ -1695,7 +1695,11 @@ decodeAArch64SysReg(unsigned op0, unsigned op1, return MISCREG_ID_AA64PFR0_EL1; case 1: return MISCREG_ID_AA64PFR1_EL1; - case 2 ... 7: + case 2 ... 3: + return MISCREG_RAZ; + case 4: + return MISCREG_ID_AA64ZFR0_EL1; + case 5 ... 7: return MISCREG_RAZ; } break; @@ -1804,6 +1808,12 @@ decodeAArch64SysReg(unsigned op0, unsigned op1, return MISCREG_CPACR_EL1; } break; + case 2: + switch (op2) { + case 0: + return MISCREG_ZCR_EL1; + } + break; } break; case 4: @@ -1830,6 +1840,22 @@ decodeAArch64SysReg(unsigned op0, unsigned op1, return MISCREG_HACR_EL2; } break; + case 2: + switch (op2) { + case 0: + return MISCREG_ZCR_EL2; + } + break; + } + break; + case 5: + switch (crm) { + case 2: + switch (op2) { + case 0: + return MISCREG_ZCR_EL12; + } + break; } break; case 6: @@ -1852,6 +1878,12 @@ decodeAArch64SysReg(unsigned op0, unsigned op1, return MISCREG_CPTR_EL3; } break; + case 2: + switch (op2) { + case 0: + return MISCREG_ZCR_EL3; + } + break; case 3: switch (op2) { case 1: @@ -4923,6 +4955,18 @@ ISA::initializeMiscRegMetadata() InitReg(MISCREG_CNTHV_TVAL_EL2) .mon().hyp(); + // SVE + InitReg(MISCREG_ID_AA64ZFR0_EL1) + .allPrivileges().exceptUserMode().writes(0); + InitReg(MISCREG_ZCR_EL3) + .mon(); + InitReg(MISCREG_ZCR_EL2) + .hyp().mon(); + InitReg(MISCREG_ZCR_EL12) + .unimplemented().warnNotFail(); + InitReg(MISCREG_ZCR_EL1) + .allPrivileges().exceptUserMode(); + // Dummy registers InitReg(MISCREG_NOP) .allPrivileges(); diff --git a/src/arch/arm/miscregs.hh b/src/arch/arm/miscregs.hh index 0d40b27a4..feef79e73 100644 --- a/src/arch/arm/miscregs.hh +++ b/src/arch/arm/miscregs.hh @@ -891,9 +891,12 @@ namespace ArmISA MISCREG_ICH_LRC14, MISCREG_ICH_LRC15, - // These MISCREG_FREESLOT are available Misc Register - // slots for future registers to be implemented. - MISCREG_FREESLOT_1, + // SVE + MISCREG_ID_AA64ZFR0_EL1, + MISCREG_ZCR_EL3, + MISCREG_ZCR_EL2, + MISCREG_ZCR_EL12, + MISCREG_ZCR_EL1, // NUM_PHYS_MISCREGS specifies the number of actual physical // registers, not considering the following pseudo-registers @@ -1825,7 +1828,11 @@ namespace ArmISA "ich_lrc14", "ich_lrc15", - "freeslot2", + "id_aa64zfr0_el1", + "zcr_el3", + "zcr_el2", + "zcr_el12", + "zcr_el1", "num_phys_regs", diff --git a/src/arch/arm/miscregs_types.hh b/src/arch/arm/miscregs_types.hh index f668d9bc8..0a9621634 100644 --- a/src/arch/arm/miscregs_types.hh +++ b/src/arch/arm/miscregs_types.hh @@ -201,6 +201,7 @@ namespace ArmISA Bitfield<10> tfp; // AArch64 Bitfield<9> tcp9; Bitfield<8> tcp8; + Bitfield<8> tz; // SVE Bitfield<7> tcp7; Bitfield<6> tcp6; Bitfield<5> tcp5; @@ -375,6 +376,7 @@ namespace ArmISA Bitfield<13, 12> cp6; Bitfield<15, 14> cp7; Bitfield<17, 16> cp8; + Bitfield<17, 16> zen; // SVE Bitfield<19, 18> cp9; Bitfield<21, 20> cp10; Bitfield<21, 20> fpen; // AArch64 @@ -636,9 +638,17 @@ namespace ArmISA Bitfield<20> tta; Bitfield<13, 12> res1_13_12_el2; Bitfield<10> tfp; - Bitfield<9, 0> res1_9_0_el2; + Bitfield<9> res1_9_el2; + Bitfield<8> res1_8_el2; + Bitfield<8> ez; // SVE (CPTR_EL3) + Bitfield<8> tz; // SVE (CPTR_EL2) + Bitfield<7, 0> res1_7_0_el2; EndBitUnion(CPTR) + BitUnion64(ZCR) + Bitfield<3, 0> len; + EndBitUnion(ZCR) + } #endif // __ARCH_ARM_MISCREGS_TYPES_HH__ diff --git a/src/arch/arm/nativetrace.cc b/src/arch/arm/nativetrace.cc index 395232e00..6d588bab8 100644 --- a/src/arch/arm/nativetrace.cc +++ b/src/arch/arm/nativetrace.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2011, 2014, 2016 ARM Limited + * Copyright (c) 2010-2011, 2014, 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -126,7 +126,8 @@ Trace::ArmNativeTrace::ThreadState::update(ThreadContext *tc) changed[STATE_CPSR] = (newState[STATE_CPSR] != oldState[STATE_CPSR]); for (int i = 0; i < NumVecV7ArchRegs; i++) { - auto vec(tc->readVecReg(RegId(VecRegClass,i)).as<uint64_t, 2>()); + auto vec(tc->readVecReg(RegId(VecRegClass,i)) + .as<uint64_t, MaxSveVecLenInDWords>()); newState[STATE_F0 + 2*i] = vec[0]; newState[STATE_F0 + 2*i + 1] = vec[1]; } diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc index 553bac521..bc4495941 100644 --- a/src/arch/arm/process.cc +++ b/src/arch/arm/process.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2012, 2018 ARM Limited + * Copyright (c) 2010, 2012, 2017-2018 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -134,6 +134,8 @@ ArmProcess64::initState() // Enable the floating point coprocessors. cpacr.cp10 = 0x3; cpacr.cp11 = 0x3; + // Enable SVE. + cpacr.zen = 0x3; tc->setMiscReg(MISCREG_CPACR_EL1, cpacr); // Generically enable floating point support. FPEXC fpexc = tc->readMiscReg(MISCREG_FPEXC); diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh index fd59f3ed8..8ee48edc0 100644 --- a/src/arch/arm/registers.hh +++ b/src/arch/arm/registers.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2011, 2014, 2016 ARM Limited + * Copyright (c) 2010-2011, 2014, 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -62,20 +62,18 @@ using ArmISAInst::MaxInstDestRegs; using ArmISAInst::MaxMiscDestRegs; // Number of VecElem per Vector Register, computed based on the vector length -constexpr unsigned NumVecElemPerVecReg = 4; +constexpr unsigned NumVecElemPerVecReg = MaxSveVecLenInWords; + using VecElem = uint32_t; using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>; using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>; using VecRegContainer = VecReg::Container; -constexpr size_t VecRegSizeBytes = NumVecElemPerVecReg * sizeof(VecElem); - -// Dummy typedefs -using VecPredReg = ::DummyVecPredReg; -using ConstVecPredReg = ::DummyConstVecPredReg; -using VecPredRegContainer = ::DummyVecPredRegContainer; -constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits; -constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr; +using VecPredReg = ::VecPredRegT<VecElem, NumVecElemPerVecReg, + VecPredRegHasPackedRepr, false>; +using ConstVecPredReg = ::VecPredRegT<VecElem, NumVecElemPerVecReg, + VecPredRegHasPackedRepr, true>; +using VecPredRegContainer = VecPredReg::Container; // Constants Related to the number of registers const int NumIntArchRegs = NUM_ARCH_INTREGS; @@ -90,7 +88,8 @@ const int NumVecSpecialRegs = 8; const int NumIntRegs = NUM_INTREGS; const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs; const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs; -const int NumVecPredRegs = 1; +const int NumVecPredRegs = 17; // P0-P15, FFR +const int PREDREG_FFR = 16; const int NumCCRegs = NUM_CCREGS; const int NumMiscRegs = NUM_MISCREGS; diff --git a/src/arch/arm/system.cc b/src/arch/arm/system.cc index 70622c328..aa487767f 100644 --- a/src/arch/arm/system.cc +++ b/src/arch/arm/system.cc @@ -71,6 +71,8 @@ ArmSystem::ArmSystem(Params *p) _highestELIs64(p->highest_el_is_64), _physAddrRange64(p->phys_addr_range_64), _haveLargeAsid64(p->have_large_asid_64), + _haveSVE(p->have_sve), + _sveVL(p->sve_vl), _m5opRange(p->m5ops_base ? RangeSize(p->m5ops_base, 0x10000) : AddrRange(1, 0)), // Create an empty range if disabled diff --git a/src/arch/arm/system.hh b/src/arch/arm/system.hh index 1a6a64fe7..263dd289e 100644 --- a/src/arch/arm/system.hh +++ b/src/arch/arm/system.hh @@ -123,6 +123,14 @@ class ArmSystem : public System const bool _haveLargeAsid64; /** + * True if SVE is implemented (ARMv8) + */ + const bool _haveSVE; + + /** SVE vector length at reset, in quadwords */ + const unsigned _sveVL; + + /** * Range for memory-mapped m5 pseudo ops. The range will be * invalid/empty if disabled. */ @@ -227,6 +235,12 @@ class ArmSystem : public System /** Returns true if ASID is 16 bits in AArch64 (ARMv8) */ bool haveLargeAsid64() const { return _haveLargeAsid64; } + /** Returns true if SVE is implemented (ARMv8) */ + bool haveSVE() const { return _haveSVE; } + + /** Returns the SVE vector length at reset, in quadwords */ + unsigned sveVL() const { return _sveVL; } + /** Returns the supported physical address range in bits if the highest * implemented exception level is 64 bits (ARMv8) */ uint8_t physAddrRange64() const { return _physAddrRange64; } diff --git a/src/arch/arm/types.hh b/src/arch/arm/types.hh index 9ce02524e..a608a2046 100644 --- a/src/arch/arm/types.hh +++ b/src/arch/arm/types.hh @@ -72,6 +72,10 @@ namespace ArmISA Bitfield<63, 62> decoderFault; // See DecoderFault Bitfield<61> illegalExecution; + // SVE vector length, encoded in the same format as the ZCR_EL<x>.LEN + // bitfields + Bitfield<59, 56> sveLen; + // ITSTATE bits Bitfield<55, 48> itstate; Bitfield<55, 52> itstateCond; @@ -628,6 +632,7 @@ namespace ArmISA EC_HVC_64 = 0x16, EC_SMC_64 = 0x17, EC_TRAPPED_MSR_MRS_64 = 0x18, + EC_TRAPPED_SVE = 0x19, EC_PREFETCH_ABORT_TO_HYP = 0x20, EC_PREFETCH_ABORT_LOWER_EL = 0x20, // AArch64 alias EC_PREFETCH_ABORT_FROM_HYP = 0x21, @@ -754,6 +759,18 @@ namespace ArmISA } } + constexpr unsigned MaxSveVecLenInBits = 2048; + static_assert(MaxSveVecLenInBits >= 128 && + MaxSveVecLenInBits <= 2048 && + MaxSveVecLenInBits % 128 == 0, + "Unsupported max. SVE vector length"); + constexpr unsigned MaxSveVecLenInBytes = MaxSveVecLenInBits >> 3; + constexpr unsigned MaxSveVecLenInWords = MaxSveVecLenInBits >> 5; + constexpr unsigned MaxSveVecLenInDWords = MaxSveVecLenInBits >> 6; + + constexpr unsigned VecRegSizeBytes = MaxSveVecLenInBytes; + constexpr unsigned VecPredRegSizeBits = MaxSveVecLenInBytes; + constexpr unsigned VecPredRegHasPackedRepr = false; } // namespace ArmISA #endif diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc index bee801358..29b39b89e 100644 --- a/src/arch/arm/utility.cc +++ b/src/arch/arm/utility.cc @@ -297,6 +297,17 @@ ELIs32(ThreadContext *tc, ExceptionLevel el) return aarch32; } +bool +ELIsInHost(ThreadContext *tc, ExceptionLevel el) +{ + if (!ArmSystem::haveVirtualization(tc)) { + return false; + } + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + return (!isSecureBelowEL3(tc) && !ELIs32(tc, EL2) && hcr.e2h == 1 && + (el == EL2 || (el == EL0 && hcr.tge == 1))); +} + std::pair<bool, bool> ELUsingAArch32K(ThreadContext *tc, ExceptionLevel el) { diff --git a/src/arch/arm/utility.hh b/src/arch/arm/utility.hh index d802b944a..c6ff9469d 100644 --- a/src/arch/arm/utility.hh +++ b/src/arch/arm/utility.hh @@ -179,6 +179,12 @@ bool ELIs32(ThreadContext *tc, ExceptionLevel el); bool ELIs64(ThreadContext *tc, ExceptionLevel el); +/** + * Returns true if the current exception level `el` is executing a Host OS or + * an application of a Host OS (Armv8.1 Virtualization Host Extensions). + */ +bool ELIsInHost(ThreadContext *tc, ExceptionLevel el); + bool isBigEndian64(ThreadContext *tc); /** diff --git a/src/arch/generic/vec_reg.hh b/src/arch/generic/vec_reg.hh index f26a8c8ad..ed2545c64 100644 --- a/src/arch/generic/vec_reg.hh +++ b/src/arch/generic/vec_reg.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, 2018 ARM Limited + * Copyright (c) 2015-2018 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -154,6 +154,8 @@ #include "base/cprintf.hh" #include "base/logging.hh" +constexpr unsigned MaxVecRegLenInBytes = 256; + template <size_t Sz> class VecRegContainer; @@ -271,6 +273,8 @@ class VecRegContainer { static_assert(Sz > 0, "Cannot create Vector Register Container of zero size"); + static_assert(Sz <= MaxVecRegLenInBytes, + "Vector Register size limit exceeded"); public: static constexpr size_t SIZE = Sz; using Container = std::array<uint8_t,Sz>; @@ -519,6 +523,7 @@ class VecLaneT friend class VecRegContainer<32>; friend class VecRegContainer<64>; friend class VecRegContainer<128>; + friend class VecRegContainer<MaxVecRegLenInBytes>; /** My type alias. */ using MyClass = VecLaneT<VecElem, Const>; diff --git a/src/cpu/FuncUnit.py b/src/cpu/FuncUnit.py index a408de3ab..21e37be87 100644 --- a/src/cpu/FuncUnit.py +++ b/src/cpu/FuncUnit.py @@ -1,4 +1,4 @@ -# Copyright (c) 2010,2018 ARM Limited +# Copyright (c) 2010, 2017-2018 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -47,13 +47,16 @@ class OpClass(Enum): 'FloatMisc', 'FloatSqrt', 'SimdAdd', 'SimdAddAcc', 'SimdAlu', 'SimdCmp', 'SimdCvt', 'SimdMisc', 'SimdMult', 'SimdMultAcc', 'SimdShift', 'SimdShiftAcc', - 'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', 'SimdFloatCmp', - 'SimdFloatCvt', 'SimdFloatDiv', 'SimdFloatMisc', 'SimdFloatMult', - 'SimdFloatMultAcc', 'SimdFloatSqrt', + 'SimdDiv', 'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', + 'SimdFloatCmp', 'SimdFloatCvt', 'SimdFloatDiv', 'SimdFloatMisc', + 'SimdFloatMult', 'SimdFloatMultAcc', 'SimdFloatSqrt', + 'SimdReduceAdd', 'SimdReduceAlu', 'SimdReduceCmp', + 'SimdFloatReduceAdd', 'SimdFloatReduceCmp', 'SimdAes', 'SimdAesMix', 'SimdSha1Hash', 'SimdSha1Hash2', 'SimdSha256Hash', 'SimdSha256Hash2', 'SimdShaSigma2', - 'SimdShaSigma3', 'MemRead', 'MemWrite', - 'FloatMemRead', 'FloatMemWrite', + 'SimdShaSigma3', + 'SimdPredAlu', + 'MemRead', 'MemWrite', 'FloatMemRead', 'FloatMemWrite', 'IprAccess', 'InstPrefetch'] class OpDesc(SimObject): diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc index bbde89c00..a228893f2 100644 --- a/src/cpu/exetrace.cc +++ b/src/cpu/exetrace.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2017 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2001-2005 The Regents of The University of Michigan * All rights reserved. * @@ -118,7 +130,38 @@ Trace::ExeTracerRecord::traceInst(const StaticInstPtr &inst, bool ran) } if (Debug::ExecResult && data_status != DataInvalid) { - ccprintf(outs, " D=%#018x", data.as_int); + switch (data_status) { + case DataVec: + { + ccprintf(outs, " D=0x["); + auto dv = data.as_vec->as<uint32_t>(); + for (int i = TheISA::VecRegSizeBytes / 4 - 1; i >= 0; + i--) { + ccprintf(outs, "%08x", dv[i]); + if (i != 0) { + ccprintf(outs, "_"); + } + } + ccprintf(outs, "]"); + } + break; + case DataVecPred: + { + ccprintf(outs, " D=0b["); + auto pv = data.as_pred->as<uint8_t>(); + for (int i = TheISA::VecPredRegSizeBits - 1; i >= 0; i--) { + ccprintf(outs, pv[i] ? "1" : "0"); + if (i != 0 && i % 4 == 0) { + ccprintf(outs, "_"); + } + } + ccprintf(outs, "]"); + } + break; + default: + ccprintf(outs, " D=%#018x", data.as_int); + break; + } } if (Debug::ExecEffAddr && getMemValid()) diff --git a/src/cpu/minor/MinorCPU.py b/src/cpu/minor/MinorCPU.py index ae97f6c4f..5aebbf805 100644 --- a/src/cpu/minor/MinorCPU.py +++ b/src/cpu/minor/MinorCPU.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012-2014,2018 ARM Limited +# Copyright (c) 2012-2014, 2017-2018 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -148,15 +148,24 @@ class MinorDefaultFloatSimdFU(MinorFU): 'FloatMultAcc', 'FloatDiv', 'FloatSqrt', 'SimdAdd', 'SimdAddAcc', 'SimdAlu', 'SimdCmp', 'SimdCvt', 'SimdMisc', 'SimdMult', 'SimdMultAcc', 'SimdShift', 'SimdShiftAcc', - 'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', 'SimdFloatCmp', + 'SimdDiv', 'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', 'SimdFloatCmp', 'SimdFloatCvt', 'SimdFloatDiv', 'SimdFloatMisc', 'SimdFloatMult', - 'SimdFloatMultAcc', 'SimdFloatSqrt', 'SimdAes', 'SimdAesMix', + 'SimdFloatMultAcc', 'SimdFloatSqrt', 'SimdReduceAdd', 'SimdReduceAlu', + 'SimdReduceCmp', 'SimdFloatReduceAdd', 'SimdFloatReduceCmp', + 'SimdAes', 'SimdAesMix', 'SimdSha1Hash', 'SimdSha1Hash2', 'SimdSha256Hash', 'SimdSha256Hash2', 'SimdShaSigma2', 'SimdShaSigma3']) + timings = [MinorFUTiming(description='FloatSimd', srcRegsRelativeLats=[2])] opLat = 6 +class MinorDefaultPredFU(MinorFU): + opClasses = minorMakeOpClassSet(['SimdPredAlu']) + timings = [MinorFUTiming(description="Pred", + srcRegsRelativeLats=[2])] + opLat = 3 + class MinorDefaultMemFU(MinorFU): opClasses = minorMakeOpClassSet(['MemRead', 'MemWrite', 'FloatMemRead', 'FloatMemWrite']) @@ -171,8 +180,8 @@ class MinorDefaultMiscFU(MinorFU): class MinorDefaultFUPool(MinorFUPool): funcUnits = [MinorDefaultIntFU(), MinorDefaultIntFU(), MinorDefaultIntMulFU(), MinorDefaultIntDivFU(), - MinorDefaultFloatSimdFU(), MinorDefaultMemFU(), - MinorDefaultMiscFU()] + MinorDefaultFloatSimdFU(), MinorDefaultPredFU(), + MinorDefaultMemFU(), MinorDefaultMiscFU()] class ThreadPolicy(Enum): vals = ['SingleThreaded', 'RoundRobin', 'Random'] diff --git a/src/cpu/o3/FUPool.py b/src/cpu/o3/FUPool.py index 1461b405c..55fb82f84 100644 --- a/src/cpu/o3/FUPool.py +++ b/src/cpu/o3/FUPool.py @@ -1,3 +1,15 @@ +# Copyright (c) 2017 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# # Copyright (c) 2006-2007 The Regents of The University of Michigan # All rights reserved. # @@ -38,4 +50,4 @@ class FUPool(SimObject): class DefaultFUPool(FUPool): FUList = [ IntALU(), IntMultDiv(), FP_ALU(), FP_MultDiv(), ReadPort(), - SIMD_Unit(), WritePort(), RdWrPort(), IprPort() ] + SIMD_Unit(), PredALU(), WritePort(), RdWrPort(), IprPort() ] diff --git a/src/cpu/o3/FuncUnitConfig.py b/src/cpu/o3/FuncUnitConfig.py index ef114df09..3b02aab79 100644 --- a/src/cpu/o3/FuncUnitConfig.py +++ b/src/cpu/o3/FuncUnitConfig.py @@ -1,4 +1,4 @@ -# Copyright (c) 2010 ARM Limited +# Copyright (c) 2010, 2017 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -86,6 +86,7 @@ class SIMD_Unit(FUDesc): OpDesc(opClass='SimdMultAcc'), OpDesc(opClass='SimdShift'), OpDesc(opClass='SimdShiftAcc'), + OpDesc(opClass='SimdDiv'), OpDesc(opClass='SimdSqrt'), OpDesc(opClass='SimdFloatAdd'), OpDesc(opClass='SimdFloatAlu'), @@ -95,9 +96,18 @@ class SIMD_Unit(FUDesc): OpDesc(opClass='SimdFloatMisc'), OpDesc(opClass='SimdFloatMult'), OpDesc(opClass='SimdFloatMultAcc'), - OpDesc(opClass='SimdFloatSqrt') ] + OpDesc(opClass='SimdFloatSqrt'), + OpDesc(opClass='SimdReduceAdd'), + OpDesc(opClass='SimdReduceAlu'), + OpDesc(opClass='SimdReduceCmp'), + OpDesc(opClass='SimdFloatReduceAdd'), + OpDesc(opClass='SimdFloatReduceCmp') ] count = 4 +class PredALU(FUDesc): + opList = [ OpDesc(opClass='SimdPredAlu') ] + count = 1 + class ReadPort(FUDesc): opList = [ OpDesc(opClass='MemRead'), OpDesc(opClass='FloatMemRead') ] diff --git a/src/cpu/op_class.hh b/src/cpu/op_class.hh index 1bb88e1cd..ab5bdf247 100644 --- a/src/cpu/op_class.hh +++ b/src/cpu/op_class.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010,2018 ARM Limited + * Copyright (c) 2010, 2017-2018 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -73,7 +73,11 @@ static const OpClass SimdMultOp = Enums::SimdMult; static const OpClass SimdMultAccOp = Enums::SimdMultAcc; static const OpClass SimdShiftOp = Enums::SimdShift; static const OpClass SimdShiftAccOp = Enums::SimdShiftAcc; +static const OpClass SimdDivOp = Enums::SimdDiv; static const OpClass SimdSqrtOp = Enums::SimdSqrt; +static const OpClass SimdReduceAddOp = Enums::SimdReduceAdd; +static const OpClass SimdReduceAluOp = Enums::SimdReduceAlu; +static const OpClass SimdReduceCmpOp = Enums::SimdReduceCmp; static const OpClass SimdFloatAddOp = Enums::SimdFloatAdd; static const OpClass SimdFloatAluOp = Enums::SimdFloatAlu; static const OpClass SimdFloatCmpOp = Enums::SimdFloatCmp; @@ -83,6 +87,8 @@ static const OpClass SimdFloatMiscOp = Enums::SimdFloatMisc; static const OpClass SimdFloatMultOp = Enums::SimdFloatMult; static const OpClass SimdFloatMultAccOp = Enums::SimdFloatMultAcc; static const OpClass SimdFloatSqrtOp = Enums::SimdFloatSqrt; +static const OpClass SimdFloatReduceCmpOp = Enums::SimdFloatReduceCmp; +static const OpClass SimdFloatReduceAddOp = Enums::SimdFloatReduceAdd; static const OpClass SimdAesOp = Enums::SimdAes; static const OpClass SimdAesMixOp = Enums::SimdAesMix; static const OpClass SimdSha1HashOp = Enums::SimdSha1Hash; @@ -91,6 +97,7 @@ static const OpClass SimdSha256HashOp = Enums::SimdSha256Hash; static const OpClass SimdSha256Hash2Op = Enums::SimdSha256Hash2; static const OpClass SimdShaSigma2Op = Enums::SimdShaSigma2; static const OpClass SimdShaSigma3Op = Enums::SimdShaSigma3; +static const OpClass SimdPredAluOp = Enums::SimdPredAlu; static const OpClass MemReadOp = Enums::MemRead; static const OpClass MemWriteOp = Enums::MemWrite; static const OpClass FloatMemReadOp = Enums::FloatMemRead; diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index f86acedd6..c18bac2ef 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2018 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2001-2006 The Regents of The University of Michigan * All rights reserved. * @@ -66,7 +78,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys, BaseTLB *_dtb, TheISA::ISA *_isa) : ThreadState(_cpu, _thread_num, _process), isa(_isa), predicate(false), system(_sys), - itb(_itb), dtb(_dtb) + itb(_itb), dtb(_dtb), decoder(TheISA::Decoder(_isa)) { clearArchRegs(); tc = new ProxyThreadContext<SimpleThread>(this); @@ -77,7 +89,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys, BaseTLB *_itb, BaseTLB *_dtb, TheISA::ISA *_isa, bool use_kernel_stats) : ThreadState(_cpu, _thread_num, NULL), isa(_isa), system(_sys), itb(_itb), - dtb(_dtb) + dtb(_dtb), decoder(TheISA::Decoder(_isa)) { tc = new ProxyThreadContext<SimpleThread>(this); diff --git a/util/cpt_upgraders/arm-sve.py b/util/cpt_upgraders/arm-sve.py new file mode 100644 index 000000000..53fab7fd9 --- /dev/null +++ b/util/cpt_upgraders/arm-sve.py @@ -0,0 +1,37 @@ +def upgrader(cpt): + """ + Update the checkpoint to support initial SVE implemtation. + The updater is taking the following steps. + + 1) Set isa.haveSVE to false + 2) Set isa.sveVL to 1 + 3) Add SVE misc registers in the checkpoint + """ + if cpt.get('root','isa') == 'arm': + for sec in cpt.sections(): + import re + # Search for all ISA sections + if re.search('.*sys.*\.cpu.*\.isa$', sec): + + # haveSVE = false + cpt.set(sec, 'haveSVE', 'false') + + # sveVL (sve Vector Length in quadword) = 1 + # (This is a dummy value since haveSVE is set to false) + cpt.set(sec, 'sveVL', '1') + + # Updating SVE misc registers (dummy values) + mr = cpt.get(sec, 'miscRegs').split() + if len(mr) == 820: + print "MISCREG_SVE registers already seems to be inserted." + else: + # Replace MISCREG_FREESLOT_1 with MISCREG_ID_AA64ZFR0_EL1 + mr[-1] = 0; + + mr.append(0); # Add dummy value for MISCREG_ZCR_EL3 + mr.append(0); # Add dummy value for MISCREG_ZCR_EL2 + mr.append(0); # Add dummy value for MISCREG_ZCR_EL12 + mr.append(0); # Add dummy value for MISCREG_ZCR_EL1 + cpt.set(sec, 'miscRegs', ' '.join(str(x) for x in mr)) + +legacy_version = 15 |