summaryrefslogtreecommitdiff
path: root/src/arch/arm/isa/insts
diff options
context:
space:
mode:
authorARM gem5 Developers <none@none>2014-01-24 15:29:34 -0600
committerARM gem5 Developers <none@none>2014-01-24 15:29:34 -0600
commit612f8f074fa1099cf70faf495d46cc647762a031 (patch)
treebd1e99c43bf15292395eadd4b7ae3f5c823545c3 /src/arch/arm/isa/insts
parentf3585c841e964c98911784a187fc4f081a02a0a6 (diff)
downloadgem5-612f8f074fa1099cf70faf495d46cc647762a031.tar.xz
arm: Add support for ARMv8 (AArch64 & AArch32)
Note: AArch64 and AArch32 interworking is not supported. If you use an AArch64 kernel you are restricted to AArch64 user-mode binaries. This will be addressed in a later patch. Note: Virtualization is only supported in AArch32 mode. This will also be fixed in a later patch. Contributors: Giacomo Gabrielli (TrustZone, LPAE, system-level AArch64, AArch64 NEON, validation) Thomas Grocutt (AArch32 Virtualization, AArch64 FP, validation) Mbou Eyole (AArch64 NEON, validation) Ali Saidi (AArch64 Linux support, code integration, validation) Edmund Grimley-Evans (AArch64 FP) William Wang (AArch64 Linux support) Rene De Jong (AArch64 Linux support, performance opt.) Matt Horsnell (AArch64 MP, validation) Matt Evans (device models, code integration, validation) Chris Adeniyi-Jones (AArch64 syscall-emulation) Prakash Ramrakhyani (validation) Dam Sunwoo (validation) Chander Sudanthi (validation) Stephan Diestelhorst (validation) Andreas Hansson (code integration, performance opt.) Eric Van Hensbergen (performance opt.) Gabe Black
Diffstat (limited to 'src/arch/arm/isa/insts')
-rw-r--r--src/arch/arm/isa/insts/aarch64.isa58
-rw-r--r--src/arch/arm/isa/insts/branch.isa29
-rw-r--r--src/arch/arm/isa/insts/branch64.isa248
-rw-r--r--src/arch/arm/isa/insts/data.isa5
-rw-r--r--src/arch/arm/isa/insts/data64.isa465
-rw-r--r--src/arch/arm/isa/insts/div.isa12
-rw-r--r--src/arch/arm/isa/insts/fp.isa154
-rw-r--r--src/arch/arm/isa/insts/fp64.isa811
-rw-r--r--src/arch/arm/isa/insts/insts.isa21
-rw-r--r--src/arch/arm/isa/insts/ldr.isa8
-rw-r--r--src/arch/arm/isa/insts/ldr64.isa446
-rw-r--r--src/arch/arm/isa/insts/m5ops.isa212
-rw-r--r--src/arch/arm/isa/insts/macromem.isa71
-rw-r--r--src/arch/arm/isa/insts/mem.isa32
-rw-r--r--src/arch/arm/isa/insts/misc.isa446
-rw-r--r--src/arch/arm/isa/insts/misc64.isa147
-rw-r--r--src/arch/arm/isa/insts/neon.isa569
-rw-r--r--src/arch/arm/isa/insts/neon64.isa3355
-rw-r--r--src/arch/arm/isa/insts/neon64_mem.isa471
-rw-r--r--src/arch/arm/isa/insts/str.isa9
-rw-r--r--src/arch/arm/isa/insts/str64.isa372
-rw-r--r--src/arch/arm/isa/insts/swap.isa7
22 files changed, 7730 insertions, 218 deletions
diff --git a/src/arch/arm/isa/insts/aarch64.isa b/src/arch/arm/isa/insts/aarch64.isa
new file mode 100644
index 000000000..6fcf9b5d2
--- /dev/null
+++ b/src/arch/arm/isa/insts/aarch64.isa
@@ -0,0 +1,58 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+let {{
+ movzCode = 'Dest64 = ((uint64_t)imm1) << imm2;'
+ movzIop = InstObjParams("movz", "Movz", "RegImmImmOp", movzCode, [])
+ header_output += RegImmImmOpDeclare.subst(movzIop)
+ decoder_output += RegImmImmOpConstructor.subst(movzIop)
+ exec_output += BasicExecute.subst(movzIop)
+
+ movkCode = 'Dest64 = insertBits(Dest64, imm2 + 15, imm2, imm1);'
+ movkIop = InstObjParams("movk", "Movk", "RegImmImmOp", movkCode, [])
+ header_output += RegImmImmOpDeclare.subst(movkIop)
+ decoder_output += RegImmImmOpConstructor.subst(movkIop)
+ exec_output += BasicExecute.subst(movkIop)
+
+ movnCode = 'Dest64 = ~(((uint64_t)imm1) << imm2);'
+ movnIop = InstObjParams("movn", "Movn", "RegImmImmOp", movnCode, [])
+ header_output += RegImmImmOpDeclare.subst(movnIop)
+ decoder_output += RegImmImmOpConstructor.subst(movnIop)
+ exec_output += BasicExecute.subst(movnIop)
+}};
diff --git a/src/arch/arm/isa/insts/branch.isa b/src/arch/arm/isa/insts/branch.isa
index e360f4581..3ee9d88e4 100644
--- a/src/arch/arm/isa/insts/branch.isa
+++ b/src/arch/arm/isa/insts/branch.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2012 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -48,7 +48,7 @@ let {{
bCode = '''
NPC = (uint32_t)(PC + imm);
'''
- br_tgt_code = '''pcs.instNPC(branchPC.instPC() + imm);'''
+ br_tgt_code = '''pcs.instNPC((uint32_t)(branchPC.instPC() + imm));'''
instFlags = ["IsDirectControl"]
if (link):
bCode += '''
@@ -86,9 +86,9 @@ let {{
Name += "Imm"
# Since we're switching ISAs, the target ISA will be the opposite
# of the current ISA. Thumb is whether the target is ARM.
- newPC = '(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))'
+ newPC = '(uint32_t)(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))'
br_tgt_code = '''
- pcs.instNPC((branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) :
+ pcs.instNPC((uint32_t)(branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) :
(branchPC.instPC() + imm)));
'''
base = "BranchImmCond"
@@ -150,7 +150,26 @@ let {{
if imm:
decoder_output += BranchTarget.subst(blxIop)
- #Ignore BXJ for now
+ bxjcode = '''
+ HSTR hstr = Hstr;
+ CPSR cpsr = Cpsr;
+ SCR scr = Scr;
+
+ if (ArmSystem::haveVirtualization(xc->tcBase()) && hstr.tjdbx &&
+ !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) {
+ fault = new HypervisorTrap(machInst, op1, EC_TRAPPED_BXJ);
+ }
+ IWNPC = Op1;
+ '''
+
+ bxjIop = InstObjParams("bxj", "BxjReg", "BranchRegCond",
+ {"code": bxjcode,
+ "predicate_test": predicateTest,
+ "is_ras_pop": "op1 == INTREG_LR" },
+ ["IsIndirectControl"])
+ header_output += BranchRegCondDeclare.subst(bxjIop)
+ decoder_output += BranchRegCondConstructor.subst(bxjIop)
+ exec_output += PredOpExecute.subst(bxjIop)
#CBNZ, CBZ. These are always unconditional as far as predicates
for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")):
diff --git a/src/arch/arm/isa/insts/branch64.isa b/src/arch/arm/isa/insts/branch64.isa
new file mode 100644
index 000000000..89cee6c22
--- /dev/null
+++ b/src/arch/arm/isa/insts/branch64.isa
@@ -0,0 +1,248 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+// Giacomo Gabrielli
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ # B, BL
+ for (mnem, link) in (("b", False), ("bl", True)):
+ bCode = ('NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(), '
+ 'currEL(xc->tcBase()));\n')
+ instFlags = ['IsDirectControl', 'IsUncondControl']
+ if (link):
+ bCode += 'XLR = RawPC + 4;\n'
+ instFlags += ['IsCall']
+
+ bIop = InstObjParams(mnem, mnem.capitalize() + "64",
+ "BranchImm64", bCode, instFlags)
+ header_output += BranchImm64Declare.subst(bIop)
+ decoder_output += BranchImm64Constructor.subst(bIop)
+ exec_output += BasicExecute.subst(bIop)
+
+ # BR, BLR
+ for (mnem, link) in (("br", False), ("blr", True)):
+ bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), '
+ 'currEL(xc->tcBase()));\n')
+ instFlags = ['IsIndirectControl', 'IsUncondControl']
+ if (link):
+ bCode += 'XLR = RawPC + 4;\n'
+ instFlags += ['IsCall']
+
+ bIop = InstObjParams(mnem, mnem.capitalize() + "64",
+ "BranchReg64", bCode, instFlags)
+ header_output += BranchReg64Declare.subst(bIop)
+ decoder_output += BranchReg64Constructor.subst(bIop)
+ exec_output += BasicExecute.subst(bIop)
+
+ # B conditional
+ bCode = '''
+ if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode))
+ NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(),
+ currEL(xc->tcBase()));
+ else
+ NPC = NPC;
+ '''
+ bIop = InstObjParams("b", "BCond64", "BranchImmCond64", bCode,
+ ['IsCondControl', 'IsDirectControl'])
+ header_output += BranchImmCond64Declare.subst(bIop)
+ decoder_output += BranchImmCond64Constructor.subst(bIop)
+ exec_output += BasicExecute.subst(bIop)
+
+ # RET
+ bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), '
+ 'currEL(xc->tcBase()));\n')
+ instFlags = ['IsIndirectControl', 'IsUncondControl', 'IsReturn']
+
+ bIop = InstObjParams('ret', 'Ret64', "BranchRet64", bCode, instFlags)
+ header_output += BranchReg64Declare.subst(bIop)
+ decoder_output += BranchReg64Constructor.subst(bIop)
+ exec_output += BasicExecute.subst(bIop)
+
+ # ERET
+ bCode = '''Addr newPc;
+ CPSR cpsr = Cpsr;
+ CPSR spsr = Spsr;
+
+ ExceptionLevel curr_el = opModeToEL((OperatingMode) (uint8_t) cpsr.mode);
+ switch (curr_el) {
+ case EL3:
+ newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL3);
+ break;
+ case EL2:
+ newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL2);
+ break;
+ case EL1:
+ newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL1);
+ break;
+ default:
+ return new UndefinedInstruction(machInst, false, mnemonic);
+ break;
+ }
+ if (spsr.width && (newPc & mask(2))) {
+ // To avoid PC Alignment fault when returning to AArch32
+ if (spsr.t)
+ newPc = newPc & ~mask(1);
+ else
+ newPc = newPc & ~mask(2);
+ }
+ spsr.q = 0;
+ spsr.it1 = 0;
+ spsr.j = 0;
+ spsr.res0_23_22 = 0;
+ spsr.ge = 0;
+ spsr.it2 = 0;
+ spsr.t = 0;
+
+ OperatingMode mode = (OperatingMode) (uint8_t) spsr.mode;
+ bool illegal = false;
+ ExceptionLevel target_el;
+ if (badMode(mode)) {
+ illegal = true;
+ } else {
+ target_el = opModeToEL(mode);
+ if (((target_el == EL2) &&
+ !ArmSystem::haveVirtualization(xc->tcBase())) ||
+ (target_el > curr_el) ||
+ (spsr.width == 1)) {
+ illegal = true;
+ } else {
+ bool known = true;
+ bool from32 = (spsr.width == 1);
+ bool to32 = false;
+ if (false) { // TODO: !haveAArch32EL
+ to32 = false;
+ } else if (!ArmSystem::highestELIs64(xc->tcBase())) {
+ to32 = true;
+ } else {
+ bool scr_rw, hcr_rw;
+ if (ArmSystem::haveSecurity(xc->tcBase())) {
+ SCR scr = xc->tcBase()->readMiscReg(MISCREG_SCR_EL3);
+ scr_rw = scr.rw;
+ } else {
+ scr_rw = true;
+ }
+
+ if (ArmSystem::haveVirtualization(xc->tcBase())) {
+ HCR hcr = xc->tcBase()->readMiscReg(MISCREG_HCR_EL2);
+ hcr_rw = hcr.rw;
+ } else {
+ hcr_rw = scr_rw;
+ }
+
+ switch (target_el) {
+ case EL3:
+ to32 = false;
+ break;
+ case EL2:
+ to32 = !scr_rw;
+ break;
+ case EL1:
+ to32 = !scr_rw || !hcr_rw;
+ break;
+ case EL0:
+ if (curr_el == EL0) {
+ to32 = cpsr.width;
+ } else if (!scr_rw || !hcr_rw) {
+ // EL0 using AArch32 if EL1 using AArch32
+ to32 = true;
+ } else {
+ known = false;
+ to32 = false;
+ }
+ }
+ }
+ if (known)
+ illegal = (from32 != to32);
+ }
+ }
+
+ if (illegal) {
+ uint8_t old_mode = cpsr.mode;
+ spsr.mode = old_mode; // Preserve old mode when invalid
+ spsr.il = 1;
+ } else {
+ if (cpsr.width != spsr.width)
+ panic("AArch32/AArch64 interprocessing not supported yet");
+ }
+ Cpsr = spsr;
+
+ CondCodesNZ = spsr.nz;
+ CondCodesC = spsr.c;
+ CondCodesV = spsr.v;
+ NPC = purifyTaggedAddr(newPc, xc->tcBase(),
+ opModeToEL((OperatingMode) (uint8_t) spsr.mode));
+ LLSCLock = 0; // Clear exclusive monitor
+ SevMailbox = 1; //Set Event Register
+ '''
+ instFlags = ['IsSerializeAfter', 'IsNonSpeculative', 'IsSquashAfter']
+ bIop = InstObjParams('eret', 'Eret64', "BranchEret64", bCode, instFlags)
+ header_output += BasicDeclare.subst(bIop)
+ decoder_output += BasicConstructor64.subst(bIop)
+ exec_output += BasicExecute.subst(bIop)
+
+ # CBNZ, CBZ
+ for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")):
+ code = ('NPC = (Op164 %(test)s 0) ? '
+ 'purifyTaggedAddr(RawPC + imm, xc->tcBase(), '
+ 'currEL(xc->tcBase())) : NPC;\n')
+ code = code % {"test": test}
+ iop = InstObjParams(mnem, mnem.capitalize() + "64",
+ "BranchImmReg64", code,
+ ['IsCondControl', 'IsDirectControl'])
+ header_output += BranchImmReg64Declare.subst(iop)
+ decoder_output += BranchImmReg64Constructor.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ # TBNZ, TBZ
+ for (mnem, test) in (("tbz", "=="), ("tbnz", "!=")):
+ code = ('NPC = ((Op164 & imm1) %(test)s 0) ? '
+ 'purifyTaggedAddr(RawPC + imm2, xc->tcBase(), '
+ 'currEL(xc->tcBase())) : NPC;\n')
+ code = code % {"test": test}
+ iop = InstObjParams(mnem, mnem.capitalize() + "64",
+ "BranchImmImmReg64", code,
+ ['IsCondControl', 'IsDirectControl'])
+ header_output += BranchImmImmReg64Declare.subst(iop)
+ decoder_output += BranchImmImmReg64Constructor.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+}};
diff --git a/src/arch/arm/isa/insts/data.isa b/src/arch/arm/isa/insts/data.isa
index be56554b0..881676496 100644
--- a/src/arch/arm/isa/insts/data.isa
+++ b/src/arch/arm/isa/insts/data.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010, 2013 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -257,7 +257,8 @@ let {{
CPSR old_cpsr = Cpsr;
CPSR new_cpsr =
- cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi);
+ cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true,
+ sctlr.nmfi, xc->tcBase());
Cpsr = ~CondCodesMask & new_cpsr;
CondCodesNZ = new_cpsr.nz;
CondCodesC = new_cpsr.c;
diff --git a/src/arch/arm/isa/insts/data64.isa b/src/arch/arm/isa/insts/data64.isa
new file mode 100644
index 000000000..77d7541ca
--- /dev/null
+++ b/src/arch/arm/isa/insts/data64.isa
@@ -0,0 +1,465 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ def createCcCode64(carry, overflow):
+ code = ""
+ code += '''
+ uint16_t _iz, _in;
+ _in = bits(resTemp, intWidth - 1);
+ _iz = ((resTemp & mask(intWidth)) == 0);
+ CondCodesNZ = (_in << 1) | _iz;
+ DPRINTF(Arm, "(in, iz) = (%%d, %%d)\\n", _in, _iz);
+ '''
+ if overflow and overflow != "none":
+ code += '''
+ uint16_t _iv;
+ _iv = %s & 1;
+ CondCodesV = _iv;
+ DPRINTF(Arm, "(iv) = (%%d)\\n", _iv);
+ ''' % overflow
+ if carry and carry != "none":
+ code += '''
+ uint16_t _ic;
+ _ic = %s & 1;
+ CondCodesC = _ic;
+ DPRINTF(Arm, "(ic) = (%%d)\\n", _ic);
+ ''' % carry
+ return code
+
+ oldC = 'CondCodesC'
+ oldV = 'CondCodesV'
+ # Dicts of ways to set the carry flag.
+ carryCode64 = {
+ "none": "none",
+ "add": 'findCarry(intWidth, resTemp, Op164, secOp)',
+ "sub": 'findCarry(intWidth, resTemp, Op164, ~secOp)',
+ "logic": '0'
+ }
+ # Dict of ways to set the overflow flag.
+ overflowCode64 = {
+ "none": "none",
+ "add": 'findOverflow(intWidth, resTemp, Op164, secOp)',
+ "sub": 'findOverflow(intWidth, resTemp, Op164, ~secOp)',
+ "logic": '0'
+ }
+
+ immOp2 = "uint64_t secOp M5_VAR_USED = imm;"
+ sRegOp2 = "uint64_t secOp M5_VAR_USED = " + \
+ "shiftReg64(Op264, shiftAmt, shiftType, intWidth);"
+ eRegOp2 = "uint64_t secOp M5_VAR_USED = " + \
+ "extendReg64(Op264, extendType, shiftAmt, intWidth);"
+
+ def buildDataWork(mnem, code, flagType, suffix, buildCc, buildNonCc,
+ base, templateBase):
+ code = '''
+ uint64_t resTemp M5_VAR_USED = 0;
+ ''' + code
+ ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType])
+ Name = mnem.capitalize() + suffix
+ iop = InstObjParams(mnem, Name, base, code)
+ iopCc = InstObjParams(mnem + "s", Name + "Cc", base, code + ccCode)
+
+ def subst(iop):
+ global header_output, decoder_output, exec_output
+ header_output += eval(templateBase + "Declare").subst(iop)
+ decoder_output += eval(templateBase + "Constructor").subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ if buildNonCc:
+ subst(iop)
+ if buildCc:
+ subst(iopCc)
+
+ def buildXImmDataInst(mnem, code, flagType = "logic", \
+ buildCc = True, buildNonCc = True, \
+ suffix = "XImm"):
+ buildDataWork(mnem, immOp2 + code, flagType, suffix,
+ buildCc, buildNonCc, "DataXImmOp", "DataXImm")
+
+ def buildXSRegDataInst(mnem, code, flagType = "logic", \
+ buildCc = True, buildNonCc = True, \
+ suffix = "XSReg"):
+ buildDataWork(mnem, sRegOp2 + code, flagType, suffix,
+ buildCc, buildNonCc, "DataXSRegOp", "DataXSReg")
+
+ def buildXERegDataInst(mnem, code, flagType = "logic", \
+ buildCc = True, buildNonCc = True, \
+ suffix = "XEReg"):
+ buildDataWork(mnem, eRegOp2 + code, flagType, suffix,
+ buildCc, buildNonCc, "DataXERegOp", "DataXEReg")
+
+ def buildDataInst(mnem, code, flagType = "logic",
+ buildCc = True, buildNonCc = True):
+ buildXImmDataInst(mnem, code, flagType, buildCc, buildNonCc)
+ buildXSRegDataInst(mnem, code, flagType, buildCc, buildNonCc)
+ buildXERegDataInst(mnem, code, flagType, buildCc, buildNonCc)
+
+ buildXImmDataInst("adr", "Dest64 = RawPC + imm", buildCc = False);
+ buildXImmDataInst("adrp", "Dest64 = (RawPC & ~mask(12)) + imm",
+ buildCc = False);
+ buildDataInst("and", "Dest64 = resTemp = Op164 & secOp;")
+ buildDataInst("eor", "Dest64 = Op164 ^ secOp;", buildCc = False)
+ buildXSRegDataInst("eon", "Dest64 = Op164 ^ ~secOp;", buildCc = False)
+ buildDataInst("sub", "Dest64 = resTemp = Op164 - secOp;", "sub")
+ buildDataInst("add", "Dest64 = resTemp = Op164 + secOp;", "add")
+ buildXSRegDataInst("adc",
+ "Dest64 = resTemp = Op164 + secOp + %s;" % oldC, "add")
+ buildXSRegDataInst("sbc",
+ "Dest64 = resTemp = Op164 - secOp - !%s;" % oldC, "sub")
+ buildDataInst("orr", "Dest64 = Op164 | secOp;", buildCc = False)
+ buildXSRegDataInst("orn", "Dest64 = Op164 | ~secOp;", buildCc = False)
+ buildXSRegDataInst("bic", "Dest64 = resTemp = Op164 & ~secOp;")
+
+ def buildDataXImmInst(mnem, code, optArgs = []):
+ global header_output, decoder_output, exec_output
+ classNamePrefix = mnem[0].upper() + mnem[1:]
+ templateBase = "DataXImm"
+ iop = InstObjParams(mnem, classNamePrefix + "64",
+ templateBase + "Op", code, optArgs)
+ header_output += eval(templateBase + "Declare").subst(iop)
+ decoder_output += eval(templateBase + "Constructor").subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ def buildDataXRegInst(mnem, regOps, code, optArgs = [],
+ overrideOpClass=None):
+ global header_output, decoder_output, exec_output
+ templateBase = "DataX%dReg" % regOps
+ classNamePrefix = mnem[0].upper() + mnem[1:]
+ if overrideOpClass:
+ iop = InstObjParams(mnem, classNamePrefix + "64",
+ templateBase + "Op",
+ { 'code': code, 'op_class': overrideOpClass},
+ optArgs)
+ else:
+ iop = InstObjParams(mnem, classNamePrefix + "64",
+ templateBase + "Op", code, optArgs)
+ header_output += eval(templateBase + "Declare").subst(iop)
+ decoder_output += eval(templateBase + "Constructor").subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ buildDataXRegInst("madd", 3, "Dest64 = Op164 + Op264 * Op364",
+ overrideOpClass="IntMultOp")
+ buildDataXRegInst("msub", 3, "Dest64 = Op164 - Op264 * Op364",
+ overrideOpClass="IntMultOp")
+ buildDataXRegInst("smaddl", 3,
+ "XDest = XOp1 + sext<32>(WOp2) * sext<32>(WOp3)",
+ overrideOpClass="IntMultOp")
+ buildDataXRegInst("smsubl", 3,
+ "XDest = XOp1 - sext<32>(WOp2) * sext<32>(WOp3)",
+ overrideOpClass="IntMultOp")
+ buildDataXRegInst("smulh", 2, '''
+ uint64_t op1H = (int32_t)(XOp1 >> 32);
+ uint64_t op1L = (uint32_t)XOp1;
+ uint64_t op2H = (int32_t)(XOp2 >> 32);
+ uint64_t op2L = (uint32_t)XOp2;
+ uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L;
+ uint64_t mid2 = op1L * op2H;
+ uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32;
+ result += shiftReg64(mid1, 32, ASR, intWidth);
+ result += shiftReg64(mid2, 32, ASR, intWidth);
+ XDest = result + op1H * op2H;
+ ''', overrideOpClass="IntMultOp")
+ buildDataXRegInst("umaddl", 3, "XDest = XOp1 + WOp2 * WOp3",
+ overrideOpClass="IntMultOp")
+ buildDataXRegInst("umsubl", 3, "XDest = XOp1 - WOp2 * WOp3",
+ overrideOpClass="IntMultOp")
+ buildDataXRegInst("umulh", 2, '''
+ uint64_t op1H = (uint32_t)(XOp1 >> 32);
+ uint64_t op1L = (uint32_t)XOp1;
+ uint64_t op2H = (uint32_t)(XOp2 >> 32);
+ uint64_t op2L = (uint32_t)XOp2;
+ uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L;
+ uint64_t mid2 = op1L * op2H;
+ uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32;
+ result += mid1 >> 32;
+ result += mid2 >> 32;
+ XDest = result + op1H * op2H;
+ ''', overrideOpClass="IntMultOp")
+
+ buildDataXRegInst("asrv", 2,
+ "Dest64 = shiftReg64(Op164, Op264, ASR, intWidth)")
+ buildDataXRegInst("lslv", 2,
+ "Dest64 = shiftReg64(Op164, Op264, LSL, intWidth)")
+ buildDataXRegInst("lsrv", 2,
+ "Dest64 = shiftReg64(Op164, Op264, LSR, intWidth)")
+ buildDataXRegInst("rorv", 2,
+ "Dest64 = shiftReg64(Op164, Op264, ROR, intWidth)")
+ buildDataXRegInst("sdiv", 2, '''
+ int64_t op1 = Op164;
+ int64_t op2 = Op264;
+ if (intWidth == 32) {
+ op1 = sext<32>(op1);
+ op2 = sext<32>(op2);
+ }
+ Dest64 = op2 == -1 ? -op1 : op2 ? op1 / op2 : 0;
+ ''', overrideOpClass="IntDivOp")
+ buildDataXRegInst("udiv", 2, "Dest64 = Op264 ? Op164 / Op264 : 0",
+ overrideOpClass="IntDivOp")
+
+ buildDataXRegInst("cls", 1, '''
+ uint64_t op1 = Op164;
+ if (bits(op1, intWidth - 1))
+ op1 ^= mask(intWidth);
+ Dest64 = (op1 == 0) ? intWidth - 1 : (intWidth - 2 - findMsbSet(op1));
+ ''')
+ buildDataXRegInst("clz", 1, '''
+ Dest64 = (Op164 == 0) ? intWidth : (intWidth - 1 - findMsbSet(Op164));
+ ''')
+ buildDataXRegInst("rbit", 1, '''
+ uint64_t result = Op164;
+ uint64_t lBit = 1ULL << (intWidth - 1);
+ uint64_t rBit = 1ULL;
+ while (lBit > rBit) {
+ uint64_t maskBits = lBit | rBit;
+ uint64_t testBits = result & maskBits;
+ // If these bits are different, swap them by toggling them.
+ if (testBits && testBits != maskBits)
+ result ^= maskBits;
+ lBit >>= 1; rBit <<= 1;
+ }
+ Dest64 = result;
+ ''')
+ buildDataXRegInst("rev", 1, '''
+ if (intWidth == 32)
+ Dest64 = betole<uint32_t>(Op164);
+ else
+ Dest64 = betole<uint64_t>(Op164);
+ ''')
+ buildDataXRegInst("rev16", 1, '''
+ int count = intWidth / 16;
+ uint64_t result = 0;
+ for (unsigned i = 0; i < count; i++) {
+ uint16_t hw = Op164 >> (i * 16);
+ result |= (uint64_t)betole<uint16_t>(hw) << (i * 16);
+ }
+ Dest64 = result;
+ ''')
+ buildDataXRegInst("rev32", 1, '''
+ int count = intWidth / 32;
+ uint64_t result = 0;
+ for (unsigned i = 0; i < count; i++) {
+ uint32_t hw = Op164 >> (i * 32);
+ result |= (uint64_t)betole<uint32_t>(hw) << (i * 32);
+ }
+ Dest64 = result;
+ ''')
+
+ msrMrs64EnabledCheckCode = '''
+ // Check for read/write access right
+ if (!can%sAArch64SysReg(flat_idx, Scr64, cpsr, xc->tcBase())) {
+ if (flat_idx == MISCREG_DAIF ||
+ flat_idx == MISCREG_DC_ZVA_Xt ||
+ flat_idx == MISCREG_DC_CVAC_Xt ||
+ flat_idx == MISCREG_DC_CIVAC_Xt
+ )
+ return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64);
+ return new UndefinedInstruction(machInst, false, mnemonic);
+ }
+
+ // Check for traps to supervisor (FP/SIMD regs)
+ if (el <= EL1 && msrMrs64TrapToSup(flat_idx, el, Cpacr64))
+ return new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_SIMD_FP);
+
+ bool is_vfp_neon = false;
+
+ // Check for traps to hypervisor
+ if ((ArmSystem::haveVirtualization(xc->tcBase()) && el <= EL2) &&
+ msrMrs64TrapToHyp(flat_idx, %s, CptrEl264, Hcr64, &is_vfp_neon)) {
+ return new HypervisorTrap(machInst, is_vfp_neon ? 0x1E00000 : imm,
+ is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64);
+ }
+
+ // Check for traps to secure monitor
+ if ((ArmSystem::haveSecurity(xc->tcBase()) && el <= EL3) &&
+ msrMrs64TrapToMon(flat_idx, CptrEl364, el, &is_vfp_neon)) {
+ return new SecureMonitorTrap(machInst,
+ is_vfp_neon ? 0x1E00000 : imm,
+ is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64);
+ }
+ '''
+
+ buildDataXImmInst("mrs", '''
+ MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()->
+ flattenMiscIndex(op1);
+ CPSR cpsr = Cpsr;
+ ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
+ %s
+ XDest = MiscOp1_ud;
+ ''' % (msrMrs64EnabledCheckCode % ('Read', 'true'),),
+ ["IsSerializeBefore"])
+
+ buildDataXRegInst("mrsNZCV", 1, '''
+ CPSR cpsr = 0;
+ cpsr.nz = CondCodesNZ;
+ cpsr.c = CondCodesC;
+ cpsr.v = CondCodesV;
+ XDest = cpsr;
+ ''')
+
+ buildDataXImmInst("msr", '''
+ MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()->
+ flattenMiscIndex(dest);
+ CPSR cpsr = Cpsr;
+ ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
+ %s
+ MiscDest_ud = XOp1;
+ ''' % (msrMrs64EnabledCheckCode % ('Write', 'false'),),
+ ["IsSerializeAfter", "IsNonSpeculative"])
+
+ buildDataXRegInst("msrNZCV", 1, '''
+ CPSR cpsr = XOp1;
+ CondCodesNZ = cpsr.nz;
+ CondCodesC = cpsr.c;
+ CondCodesV = cpsr.v;
+ ''')
+
+ msrdczva_ea_code = '''
+ MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest);
+ CPSR cpsr = Cpsr;
+ ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
+ '''
+
+ msrdczva_ea_code += msrMrs64EnabledCheckCode % ('Write', 'false')
+ msrdczva_ea_code += '''
+ Request::Flags memAccessFlags = Request::CACHE_BLOCK_ZERO|ArmISA::TLB::MustBeOne;
+ EA = XBase;
+ assert(!(Dczid & 0x10));
+ uint64_t op_size = power(2, Dczid + 2);
+ EA &= ~(op_size - 1);
+
+ '''
+
+ msrDCZVAIop = InstObjParams("dczva", "Dczva", "SysDC64",
+ { "ea_code" : msrdczva_ea_code,
+ "memacc_code" : ";", "use_uops" : 0,
+ "op_wb" : ";", "fa_code" : ";"}, ['IsStore', 'IsMemRef']);
+ header_output += DCStore64Declare.subst(msrDCZVAIop);
+ decoder_output += DCStore64Constructor.subst(msrDCZVAIop);
+ exec_output += DCStore64Execute.subst(msrDCZVAIop);
+ exec_output += DCStore64InitiateAcc.subst(msrDCZVAIop);
+ exec_output += Store64CompleteAcc.subst(msrDCZVAIop);
+
+
+
+ buildDataXImmInst("msrSP", '''
+ if (!canWriteAArch64SysReg(
+ (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest),
+ Scr64, Cpsr, xc->tcBase())) {
+ return new UndefinedInstruction(machInst, false, mnemonic);
+ }
+ MiscDest_ud = imm;
+ ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"])
+
+ buildDataXImmInst("msrDAIFSet", '''
+ if (!canWriteAArch64SysReg(
+ (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest),
+ Scr64, Cpsr, xc->tcBase())) {
+ return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64);
+ }
+ CPSR cpsr = Cpsr;
+ cpsr.daif = cpsr.daif | imm;
+ Cpsr = cpsr;
+ ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"])
+
+ buildDataXImmInst("msrDAIFClr", '''
+ if (!canWriteAArch64SysReg(
+ (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest),
+ Scr64, Cpsr, xc->tcBase())) {
+ return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64);
+ }
+ CPSR cpsr = Cpsr;
+ cpsr.daif = cpsr.daif & ~imm;
+ Cpsr = cpsr;
+ ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"])
+
+ def buildDataXCompInst(mnem, instType, suffix, code):
+ global header_output, decoder_output, exec_output
+ templateBase = "DataXCond%s" % instType
+ iop = InstObjParams(mnem, mnem.capitalize() + suffix + "64",
+ templateBase + "Op", code)
+ header_output += eval(templateBase + "Declare").subst(iop)
+ decoder_output += eval(templateBase + "Constructor").subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ def buildDataXCondImmInst(mnem, code):
+ buildDataXCompInst(mnem, "CompImm", "Imm", code)
+ def buildDataXCondRegInst(mnem, code):
+ buildDataXCompInst(mnem, "CompReg", "Reg", code)
+ def buildDataXCondSelInst(mnem, code):
+ buildDataXCompInst(mnem, "Sel", "", code)
+
+ def condCompCode(flagType, op, imm):
+ ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType])
+ opDecl = "uint64_t secOp M5_VAR_USED = imm;"
+ if not imm:
+ opDecl = "uint64_t secOp M5_VAR_USED = Op264;"
+ return opDecl + '''
+ if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
+ uint64_t resTemp = Op164 ''' + op + ''' secOp;
+ ''' + ccCode + '''
+ } else {
+ CondCodesNZ = (defCc >> 2) & 0x3;
+ CondCodesC = (defCc >> 1) & 0x1;
+ CondCodesV = defCc & 0x1;
+ }
+ '''
+
+ buildDataXCondImmInst("ccmn", condCompCode("add", "+", True))
+ buildDataXCondImmInst("ccmp", condCompCode("sub", "-", True))
+ buildDataXCondRegInst("ccmn", condCompCode("add", "+", False))
+ buildDataXCondRegInst("ccmp", condCompCode("sub", "-", False))
+
+ condSelCode = '''
+ if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
+ Dest64 = Op164;
+ } else {
+ Dest64 = %(altVal)s;
+ }
+ '''
+ buildDataXCondSelInst("csel", condSelCode % {"altVal" : "Op264"})
+ buildDataXCondSelInst("csinc", condSelCode % {"altVal" : "Op264 + 1"})
+ buildDataXCondSelInst("csinv", condSelCode % {"altVal" : "~Op264"})
+ buildDataXCondSelInst("csneg", condSelCode % {"altVal" : "-Op264"})
+}};
diff --git a/src/arch/arm/isa/insts/div.isa b/src/arch/arm/isa/insts/div.isa
index 1ff6ef9e4..0896ea94f 100644
--- a/src/arch/arm/isa/insts/div.isa
+++ b/src/arch/arm/isa/insts/div.isa
@@ -40,12 +40,6 @@
let {{
sdivCode = '''
if (Op2_sw == 0) {
- if (((SCTLR)Sctlr).dz) {
- if (FullSystem)
- return new UndefinedInstruction;
- else
- return new UndefinedInstruction(false, mnemonic);
- }
Dest_sw = 0;
} else if (Op1_sw == INT_MIN && Op2_sw == -1) {
Dest_sw = INT_MIN;
@@ -63,12 +57,6 @@ let {{
udivCode = '''
if (Op2_uw == 0) {
- if (((SCTLR)Sctlr).dz) {
- if (FullSystem)
- return new UndefinedInstruction;
- else
- return new UndefinedInstruction(false, mnemonic);
- }
Dest_uw = 0;
} else {
Dest_uw = Op1_uw / Op2_uw;
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa
index b701995f4..60f030c3d 100644
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -191,14 +191,17 @@ let {{
decoder_output = ""
exec_output = ""
- vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegOp",
- { "code": vmsrEnabledCheckCode + \
- "MiscDest = Op1;",
+ vmsrCode = vmsrEnabledCheckCode + '''
+ MiscDest = Op1;
+ '''
+
+ vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegImmOp",
+ { "code": vmsrCode,
"predicate_test": predicateTest,
"op_class": "SimdFloatMiscOp" },
["IsSerializeAfter","IsNonSpeculative"])
- header_output += FpRegRegOpDeclare.subst(vmsrIop);
- decoder_output += FpRegRegOpConstructor.subst(vmsrIop);
+ header_output += FpRegRegImmOpDeclare.subst(vmsrIop);
+ decoder_output += FpRegRegImmOpConstructor.subst(vmsrIop);
exec_output += PredOpExecute.subst(vmsrIop);
vmsrFpscrCode = vmsrEnabledCheckCode + '''
@@ -215,14 +218,36 @@ let {{
decoder_output += FpRegRegOpConstructor.subst(vmsrFpscrIop);
exec_output += PredOpExecute.subst(vmsrFpscrIop);
- vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegOp",
- { "code": vmrsEnabledCheckCode + \
- "Dest = MiscOp1;",
+ vmrsCode = vmrsEnabledCheckCode + '''
+ CPSR cpsr = Cpsr;
+ SCR scr = Scr;
+ if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) {
+ HCR hcr = Hcr;
+ bool hypTrap = false;
+ switch(xc->tcBase()->flattenMiscIndex(op1)) {
+ case MISCREG_FPSID:
+ hypTrap = hcr.tid0;
+ break;
+ case MISCREG_MVFR0:
+ case MISCREG_MVFR1:
+ hypTrap = hcr.tid3;
+ break;
+ }
+ if (hypTrap) {
+ return new HypervisorTrap(machInst, imm,
+ EC_TRAPPED_CP10_MRC_VMRS);
+ }
+ }
+ Dest = MiscOp1;
+ '''
+
+ vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegImmOp",
+ { "code": vmrsCode,
"predicate_test": predicateTest,
"op_class": "SimdFloatMiscOp" },
["IsSerializeBefore"])
- header_output += FpRegRegOpDeclare.subst(vmrsIop);
- decoder_output += FpRegRegOpConstructor.subst(vmrsIop);
+ header_output += FpRegRegImmOpDeclare.subst(vmrsIop);
+ decoder_output += FpRegRegImmOpConstructor.subst(vmrsIop);
exec_output += PredOpExecute.subst(vmrsIop);
vmrsFpscrIop = InstObjParams("vmrs", "VmrsFpscr", "FpRegRegOp",
@@ -323,7 +348,7 @@ let {{
decoder_output += FpRegRegOpConstructor.subst(vmovRegQIop);
exec_output += PredOpExecute.subst(vmovRegQIop);
- vmovCoreRegBCode = vfpEnabledCheckCode + '''
+ vmovCoreRegBCode = simdEnabledCheckCode + '''
FpDest_uw = insertBits(FpDest_uw, imm * 8 + 7, imm * 8, Op1_ub);
'''
vmovCoreRegBIop = InstObjParams("vmov", "VmovCoreRegB", "FpRegRegImmOp",
@@ -334,7 +359,7 @@ let {{
decoder_output += FpRegRegImmOpConstructor.subst(vmovCoreRegBIop);
exec_output += PredOpExecute.subst(vmovCoreRegBIop);
- vmovCoreRegHCode = vfpEnabledCheckCode + '''
+ vmovCoreRegHCode = simdEnabledCheckCode + '''
FpDest_uw = insertBits(FpDest_uw, imm * 16 + 15, imm * 16, Op1_uh);
'''
vmovCoreRegHIop = InstObjParams("vmov", "VmovCoreRegH", "FpRegRegImmOp",
@@ -453,6 +478,17 @@ let {{
singleCode = singleSimpleCode + '''
FpscrExc = fpscr;
'''
+ singleTernOp = vfpEnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ VfpSavedState state = prepFpState(fpscr.rMode);
+ float cOp1 = FpOp1;
+ float cOp2 = FpOp2;
+ float cOp3 = FpDestP0;
+ FpDestP0 = ternaryOp(fpscr, %(palam)s, %(op)s,
+ fpscr.fz, fpscr.dn, fpscr.rMode);
+ finishVfp(fpscr, state, fpscr.fz);
+ FpscrExc = fpscr;
+ '''
singleBinOp = "binaryOp(fpscr, FpOp1, FpOp2," + \
"%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
singleUnaryOp = "unaryOp(fpscr, FpOp1, %(func)s, fpscr.fz, fpscr.rMode)"
@@ -463,6 +499,19 @@ let {{
FpDestP1_uw = dblHi(dest);
FpscrExc = fpscr;
'''
+ doubleTernOp = vfpEnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ VfpSavedState state = prepFpState(fpscr.rMode);
+ double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw);
+ double cOp2 = dbl(FpOp2P0_uw, FpOp2P1_uw);
+ double cOp3 = dbl(FpDestP0_uw, FpDestP1_uw);
+ double cDest = ternaryOp(fpscr, %(palam)s, %(op)s,
+ fpscr.fz, fpscr.dn, fpscr.rMode);
+ FpDestP0_uw = dblLow(cDest);
+ FpDestP1_uw = dblHi(cDest);
+ finishVfp(fpscr, state, fpscr.fz);
+ FpscrExc = fpscr;
+ '''
doubleBinOp = '''
binaryOp(fpscr, dbl(FpOp1P0_uw, FpOp1P1_uw),
dbl(FpOp2P0_uw, FpOp2P1_uw),
@@ -473,6 +522,37 @@ let {{
fpscr.fz, fpscr.rMode)
'''
+ def buildTernaryFpOp(Name, base, opClass, singleOp, doubleOp, paramStr):
+ global header_output, decoder_output, exec_output
+
+ code = singleTernOp % { "op": singleOp, "palam": paramStr }
+ sIop = InstObjParams(Name.lower() + "s", Name + "S", base,
+ { "code": code,
+ "predicate_test": predicateTest,
+ "op_class": opClass }, [])
+ code = doubleTernOp % { "op": doubleOp, "palam": paramStr }
+ dIop = InstObjParams(Name.lower() + "d", Name + "D", base,
+ { "code": code,
+ "predicate_test": predicateTest,
+ "op_class": opClass }, [])
+
+ declareTempl = eval(base + "Declare");
+ constructorTempl = eval(base + "Constructor");
+
+ for iop in sIop, dIop:
+ header_output += declareTempl.subst(iop)
+ decoder_output += constructorTempl.subst(iop)
+ exec_output += PredOpExecute.subst(iop)
+
+ buildTernaryFpOp("Vfma", "FpRegRegRegOp", "SimdFloatMultAccOp",
+ "fpMulAdd<float>", "fpMulAdd<double>", " cOp1, cOp2, cOp3" )
+ buildTernaryFpOp("Vfms", "FpRegRegRegOp", "SimdFloatMultAccOp",
+ "fpMulAdd<float>", "fpMulAdd<double>", "-cOp1, cOp2, cOp3" )
+ buildTernaryFpOp("Vfnma", "FpRegRegRegOp", "SimdFloatMultAccOp",
+ "fpMulAdd<float>", "fpMulAdd<double>", "-cOp1, cOp2, -cOp3" )
+ buildTernaryFpOp("Vfnms", "FpRegRegRegOp", "SimdFloatMultAccOp",
+ "fpMulAdd<float>", "fpMulAdd<double>", " cOp1, cOp2, -cOp3" )
+
def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp):
global header_output, decoder_output, exec_output
@@ -830,7 +910,7 @@ let {{
VfpSavedState state = prepFpState(fpscr.rMode);
vfpFlushToZero(fpscr, FpOp1);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0, false);
+ FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0, false);
__asm__ __volatile__("" :: "m" (FpDest_uw));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -849,7 +929,7 @@ let {{
vfpFlushToZero(fpscr, cOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
- uint64_t result = vfpFpDToFixed(cOp1, false, false, 0, false);
+ uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0, false);
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = result;
@@ -868,7 +948,7 @@ let {{
VfpSavedState state = prepFpState(fpscr.rMode);
vfpFlushToZero(fpscr, FpOp1);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0, false);
+ FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0, false);
__asm__ __volatile__("" :: "m" (FpDest_sw));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -887,7 +967,7 @@ let {{
vfpFlushToZero(fpscr, cOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
- int64_t result = vfpFpDToFixed(cOp1, true, false, 0, false);
+ int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0, false);
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = result;
@@ -907,7 +987,7 @@ let {{
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0);
+ FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0);
__asm__ __volatile__("" :: "m" (FpDest_uw));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -927,7 +1007,7 @@ let {{
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
- uint64_t result = vfpFpDToFixed(cOp1, false, false, 0);
+ uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0);
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = result;
@@ -947,7 +1027,7 @@ let {{
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0);
+ FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0);
__asm__ __volatile__("" :: "m" (FpDest_sw));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -967,7 +1047,7 @@ let {{
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
- int64_t result = vfpFpDToFixed(cOp1, true, false, 0);
+ int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0);
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = result;
@@ -1333,7 +1413,7 @@ let {{
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest_sw = vfpFpSToFixed(FpOp1, true, false, imm);
+ FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, imm);
__asm__ __volatile__("" :: "m" (FpDest_sw));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -1352,7 +1432,7 @@ let {{
vfpFlushToZero(fpscr, cOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
- uint64_t mid = vfpFpDToFixed(cOp1, true, false, imm);
+ uint64_t mid = vfpFpToFixed<double>(cOp1, true, 32, imm);
__asm__ __volatile__("" :: "m" (mid));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = mid;
@@ -1372,7 +1452,7 @@ let {{
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest_uw = vfpFpSToFixed(FpOp1, false, false, imm);
+ FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, imm);
__asm__ __volatile__("" :: "m" (FpDest_uw));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -1391,7 +1471,7 @@ let {{
vfpFlushToZero(fpscr, cOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
- uint64_t mid = vfpFpDToFixed(cOp1, false, false, imm);
+ uint64_t mid = vfpFpToFixed<double>(cOp1, false, 32, imm);
__asm__ __volatile__("" :: "m" (mid));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = mid;
@@ -1410,7 +1490,7 @@ let {{
FPSCR fpscr = (FPSCR) FpscrExc;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1_sw) : "m" (FpOp1_sw));
- FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, false, imm);
+ FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, 32, imm);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -1428,7 +1508,7 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm);
+ double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm);
__asm__ __volatile__("" :: "m" (cDest));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = dblLow(cDest);
@@ -1447,7 +1527,7 @@ let {{
FPSCR fpscr = (FPSCR) FpscrExc;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1_uw) : "m" (FpOp1_uw));
- FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, false, imm);
+ FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, 32, imm);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -1465,7 +1545,7 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm);
+ double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm);
__asm__ __volatile__("" :: "m" (cDest));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = dblLow(cDest);
@@ -1485,7 +1565,7 @@ let {{
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest_sh = vfpFpSToFixed(FpOp1, true, true, imm);
+ FpDest_sh = vfpFpToFixed<float>(FpOp1, true, 16, imm);
__asm__ __volatile__("" :: "m" (FpDest_sh));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -1505,7 +1585,7 @@ let {{
vfpFlushToZero(fpscr, cOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
- uint64_t result = vfpFpDToFixed(cOp1, true, true, imm);
+ uint64_t result = vfpFpToFixed<double>(cOp1, true, 16, imm);
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = result;
@@ -1526,7 +1606,7 @@ let {{
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest_uh = vfpFpSToFixed(FpOp1, false, true, imm);
+ FpDest_uh = vfpFpToFixed<float>(FpOp1, false, 16, imm);
__asm__ __volatile__("" :: "m" (FpDest_uh));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -1546,7 +1626,7 @@ let {{
vfpFlushToZero(fpscr, cOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
- uint64_t mid = vfpFpDToFixed(cOp1, false, true, imm);
+ uint64_t mid = vfpFpToFixed<double>(cOp1, false, 16, imm);
__asm__ __volatile__("" :: "m" (mid));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = mid;
@@ -1566,7 +1646,7 @@ let {{
FPSCR fpscr = (FPSCR) FpscrExc;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1_sh) : "m" (FpOp1_sh));
- FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, true, imm);
+ FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, 16, imm);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -1585,7 +1665,7 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm);
+ double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm);
__asm__ __volatile__("" :: "m" (cDest));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = dblLow(cDest);
@@ -1605,7 +1685,7 @@ let {{
FPSCR fpscr = (FPSCR) FpscrExc;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1_uh) : "m" (FpOp1_uh));
- FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, true, imm);
+ FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, 16, imm);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
@@ -1624,7 +1704,7 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm);
+ double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm);
__asm__ __volatile__("" :: "m" (cDest));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = dblLow(cDest);
diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa
new file mode 100644
index 000000000..95dec5062
--- /dev/null
+++ b/src/arch/arm/isa/insts/fp64.isa
@@ -0,0 +1,811 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2012-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Thomas Grocutt
+// Edmund Grimley Evans
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ fmovImmSCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = bits(imm, 31, 0);
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp",
+ { "code": fmovImmSCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegImmOpDeclare.subst(fmovImmSIop);
+ decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop);
+ exec_output += BasicExecute.subst(fmovImmSIop);
+
+ fmovImmDCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = bits(imm, 31, 0);
+ AA64FpDestP1_uw = bits(imm, 63, 32);
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp",
+ { "code": fmovImmDCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegImmOpDeclare.subst(fmovImmDIop);
+ decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop);
+ exec_output += BasicExecute.subst(fmovImmDIop);
+
+ fmovRegSCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = AA64FpOp1P0_uw;
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp",
+ { "code": fmovRegSCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovRegSIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop);
+ exec_output += BasicExecute.subst(fmovRegSIop);
+
+ fmovRegDCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = AA64FpOp1P0_uw;
+ AA64FpDestP1_uw = AA64FpOp1P1_uw;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp",
+ { "code": fmovRegDCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovRegDIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop);
+ exec_output += BasicExecute.subst(fmovRegDIop);
+
+ fmovCoreRegWCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = WOp1_uw;
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp",
+ { "code": fmovCoreRegWCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop);
+ exec_output += BasicExecute.subst(fmovCoreRegWIop);
+
+ fmovCoreRegXCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP0_uw = XOp1_ud;
+ AA64FpDestP1_uw = XOp1_ud >> 32;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp",
+ { "code": fmovCoreRegXCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop);
+ exec_output += BasicExecute.subst(fmovCoreRegXIop);
+
+ fmovUCoreRegXCode = vfp64EnabledCheckCode + '''
+ AA64FpDestP2_uw = XOp1_ud;
+ AA64FpDestP3_uw = XOp1_ud >> 32;
+ '''
+ fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp",
+ { "code": fmovUCoreRegXCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop);
+ exec_output += BasicExecute.subst(fmovUCoreRegXIop);
+
+ fmovRegCoreWCode = vfp64EnabledCheckCode + '''
+ WDest = AA64FpOp1P0_uw;
+ '''
+ fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp",
+ { "code": fmovRegCoreWCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop);
+ exec_output += BasicExecute.subst(fmovRegCoreWIop);
+
+ fmovRegCoreXCode = vfp64EnabledCheckCode + '''
+ XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw;
+ '''
+ fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp",
+ { "code": fmovRegCoreXCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop);
+ exec_output += BasicExecute.subst(fmovRegCoreXIop);
+
+ fmovURegCoreXCode = vfp64EnabledCheckCode + '''
+ XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw;
+ '''
+ fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp",
+ { "code": fmovURegCoreXCode,
+ "op_class": "SimdFloatMiscOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop);
+ exec_output += BasicExecute.subst(fmovURegCoreXIop);
+}};
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ singleIntConvCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint32_t cOp1 = AA64FpOp1P0_uw;
+ uint32_t cDest = %(op)s;
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ singleIntConvCode2 = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint32_t cOp1 = AA64FpOp1P0_uw;
+ uint32_t cOp2 = AA64FpOp2P0_uw;
+ uint32_t cDest = %(op)s;
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \
+ "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
+ singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)"
+
+ doubleIntConvCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
+ uint64_t cDest = %(op)s;
+ AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
+ AA64FpDestP1_uw = cDest >> 32;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ doubleIntConvCode2 = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
+ uint64_t cOp2 = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw;
+ uint64_t cDest = %(op)s;
+ AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
+ AA64FpDestP1_uw = cDest >> 32;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ doubleBinOp = '''
+ binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw),
+ dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw),
+ %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode);
+ '''
+ doubleUnaryOp = '''
+ unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s,
+ fpscr.fz, fpscr.rMode)
+ '''
+
+ def buildTernaryFpOp(name, opClass, sOp, dOp):
+ global header_output, decoder_output, exec_output
+ for isDouble in True, False:
+ code = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ '''
+ if isDouble:
+ code += '''
+ uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+ uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32;
+ uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32;
+ uint64_t cDest;
+ ''' "cDest = " + dOp + ";" + '''
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = cDest >> 32;
+ '''
+ else:
+ code += '''
+ uint32_t cOp1 = AA64FpOp1P0_uw;
+ uint32_t cOp2 = AA64FpOp2P0_uw;
+ uint32_t cOp3 = AA64FpOp3P0_uw;
+ uint32_t cDest;
+ ''' "cDest = " + sOp + ";" + '''
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = 0;
+ '''
+ code += '''
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"),
+ "FpRegRegRegRegOp",
+ { "code": code, "op_class": opClass }, [])
+
+ header_output += AA64FpRegRegRegRegOpDeclare.subst(iop)
+ decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp",
+ "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)",
+ "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" )
+ buildTernaryFpOp("FMSub", "SimdFloatMultAccOp",
+ "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
+ "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
+ buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp",
+ "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
+ "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
+ buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp",
+ "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
+ "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" )
+
+ def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp):
+ global header_output, decoder_output, exec_output
+
+ code = singleIntConvCode2 % { "op": singleOp }
+ sIop = InstObjParams(name, Name + "S", base,
+ { "code": code,
+ "op_class": opClass }, [])
+
+ code = doubleIntConvCode2 % { "op": doubleOp }
+ dIop = InstObjParams(name, Name + "D", base,
+ { "code": code,
+ "op_class": opClass }, [])
+
+ declareTempl = eval( base + "Declare");
+ constructorTempl = eval("AA64" + base + "Constructor");
+
+ for iop in sIop, dIop:
+ header_output += declareTempl.subst(iop)
+ decoder_output += constructorTempl.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp",
+ "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp",
+ "fplibSub<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibSub<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp",
+ "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp",
+ "fplibMul<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibMul<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp",
+ "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
+ "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))")
+ buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp",
+ "fplibMin<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibMin<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp",
+ "fplibMax<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibMax<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp",
+ "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)")
+ buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp",
+ "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)",
+ "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)")
+
+ def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None):
+ if doubleOp is None:
+ doubleOp = singleOp
+ global header_output, decoder_output, exec_output
+
+ code = singleIntConvCode % { "op": singleOp }
+ sIop = InstObjParams(name, Name + "S", base,
+ { "code": code,
+ "op_class": opClass }, [])
+ code = doubleIntConvCode % { "op": doubleOp }
+ dIop = InstObjParams(name, Name + "D", base,
+ { "code": code,
+ "op_class": opClass }, [])
+
+ declareTempl = eval( base + "Declare");
+ constructorTempl = eval("AA64" + base + "Constructor");
+
+ for iop in sIop, dIop:
+ header_output += declareTempl.subst(iop)
+ decoder_output += constructorTempl.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp",
+ "fplibSqrt<uint32_t>(cOp1, fpscr)", "fplibSqrt<uint64_t>(cOp1, fpscr)")
+
+ def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp,
+ doubleOp = None, isIntConv = True):
+ if doubleOp is None:
+ doubleOp = singleOp
+ global header_output, decoder_output, exec_output
+
+ if isIntConv:
+ sCode = singleIntConvCode
+ dCode = doubleIntConvCode
+ else:
+ sCode = singleCode
+ dCode = doubleCode
+
+ for code, op, suffix in [[sCode, singleOp, "S"],
+ [dCode, doubleOp, "D"]]:
+ iop = InstObjParams(name, Name + suffix, base,
+ { "code": code % { "op": op },
+ "op_class": opClass }, [])
+
+ declareTempl = eval( base + "Declare");
+ constructorTempl = eval("AA64" + base + "Constructor");
+
+ header_output += declareTempl.subst(iop)
+ decoder_output += constructorTempl.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibNeg<uint32_t>(cOp1)", "fplibNeg<uint64_t>(cOp1)")
+ buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibAbs<uint32_t>(cOp1)", "fplibAbs<uint64_t>(cOp1)")
+ buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)")
+ buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)")
+ buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)")
+ buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)")
+ buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)")
+ buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)")
+ buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp",
+ "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
+ "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)")
+}};
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ # Creates the integer to floating point instructions, including variants for
+ # signed/unsigned, float/double, etc
+ for regL, regOpL, width in [["W", "w", 32],
+ ["X", "d", 64]]:
+ for isDouble in True, False:
+ for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)],
+ ["S", "int%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]:
+ fcvtIntFpDCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ %s
+ ''' %(usCode)
+
+ if isDouble:
+ fcvtIntFpDCode += '''
+ uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0,
+ %s, FPCRRounding(fpscr), fpscr);
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = cDest >> 32;
+ ''' % ("true" if us == "U" else "false")
+ else:
+ fcvtIntFpDCode += '''
+ uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0,
+ %s, FPCRRounding(fpscr), fpscr);
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = 0;
+ ''' % ("true" if us == "U" else "false")
+ fcvtIntFpDCode += '''
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S")
+ mnem = "%scvtf" %(us.lower())
+ fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp",
+ { "code": fcvtIntFpDCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop);
+ exec_output += BasicExecute.subst(fcvtIntFpDIop);
+
+ # Generates the floating point to integer conversion instructions in various
+ # variants, eg signed/unsigned
+ def buildFpCvtIntOp(isDouble, isSigned, isXReg):
+ global header_output, decoder_output, exec_output
+
+ for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"],
+ ["P", "FPRounding_POSINF"],
+ ["M", "FPRounding_NEGINF"],
+ ["Z", "FPRounding_ZERO"],
+ ["A", "FPRounding_TIEAWAY"]]:
+ fcvtFpIntCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;'''
+ if isDouble:
+ fcvtFpIntCode += '''
+ uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+ '''
+ else:
+ fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
+
+ fcvtFpIntCode += '''
+ %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr);
+ FpscrExc = fpscr;
+ ''' %("X" if isXReg else "W",
+ "64" if isDouble else "32",
+ "64" if isXReg else "32",
+ "false" if isSigned else "true",
+ roundingMode)
+
+ instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U",
+ "X" if isXReg else "W",
+ "D" if isDouble else "S", rmode)
+ mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u")
+ fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp",
+ { "code": fcvtFpIntCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop);
+ decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop);
+ exec_output += BasicExecute.subst(fcvtFpIntIop);
+
+ # Now actually do the building with the different variants
+ for isDouble in True, False:
+ for isSigned in True, False:
+ for isXReg in True, False:
+ buildFpCvtIntOp(isDouble, isSigned, isXReg)
+
+ fcvtFpSFpDCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw,
+ FPCRRounding(fpscr), fpscr);
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = cDest >> 32;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+ fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp",
+ { "code": fcvtFpSFpDCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop);
+ exec_output += BasicExecute.subst(fcvtFpSFpDIop);
+
+ fcvtFpDFpSCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+ AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1,
+ FPCRRounding(fpscr), fpscr);
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+ fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp",
+ {"code": fcvtFpDFpSCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop);
+ exec_output += BasicExecute.subst(fcvtFpDFpSIop);
+
+ # Half precision to single or double precision conversion
+ for isDouble in True, False:
+ code = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ %s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw,
+ FPCRRounding(fpscr), fpscr);
+ ''' % ("uint64_t" if isDouble else "uint32_t",
+ "64" if isDouble else "32")
+ if isDouble:
+ code += '''
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = cDest >> 32;
+ '''
+ else:
+ code += '''
+ AA64FpDestP0_uw = cDest;
+ AA64FpDestP1_uw = 0;
+ '''
+ code += '''
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ instName = "FcvtFpHFp%s" %("D" if isDouble else "S")
+ fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp",
+ { "code": code,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop);
+ exec_output += BasicExecute.subst(fcvtFpHFpIop);
+
+ # single or double precision to Half precision conversion
+ for isDouble in True, False:
+ code = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ %s;
+ AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1,
+ FPCRRounding(fpscr), fpscr);
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
+ if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw",
+ "64" if isDouble else "32")
+
+ instName = "FcvtFp%sFpH" %("D" if isDouble else "S")
+ fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp",
+ { "code": code,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop);
+ decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop);
+ exec_output += BasicExecute.subst(fcvtFpFpHIop);
+
+ # Build the various versions of the floating point compare instructions
+ def buildFCmpOp(isQuiet, isDouble, isImm):
+ global header_output, decoder_output, exec_output
+
+ fcmpCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ %s cOp1 = %s;
+ ''' % ("uint64_t" if isDouble else "uint32_t",
+ "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32"
+ if isDouble else "AA64FpDestP0_uw")
+ if isImm:
+ fcmpCode += '''
+ %s cOp2 = imm;
+ ''' % ("uint64_t" if isDouble else "uint32_t")
+ else:
+ fcmpCode += '''
+ %s cOp2 = %s;
+ ''' % ("uint64_t" if isDouble else "uint32_t",
+ "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
+ if isDouble else "AA64FpOp1P0_uw")
+ fcmpCode += '''
+ int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
+ CondCodesNZ = cc >> 2 & 3;
+ CondCodesC = cc >> 1 & 1;
+ CondCodesV = cc & 1;
+ FpCondCodes = fpscr & FpCondCodesMask;
+ FpscrExc = fpscr;
+ ''' % ("64" if isDouble else "32", "false" if isQuiet else "true")
+
+ typeName = "Imm" if isImm else "Reg"
+ instName = "FCmp%s%s%s" %("" if isQuiet else "E", typeName,
+ "D" if isDouble else "S")
+ fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName,
+ "FpReg%sOp" %(typeName),
+ {"code": fcmpCode,
+ "op_class": "SimdFloatCmpOp"}, [])
+
+ declareTemp = eval("FpReg%sOpDeclare" %(typeName));
+ constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName));
+ header_output += declareTemp.subst(fcmpIop);
+ decoder_output += constructorTemp.subst(fcmpIop);
+ exec_output += BasicExecute.subst(fcmpIop);
+
+ for isQuiet in True, False:
+ for isDouble in True, False:
+ for isImm in True, False:
+ buildFCmpOp(isQuiet, isDouble, isImm)
+
+ # Build the various versions of the conditional floating point compare
+ # instructions
+ def buildFCCmpOp(isQuiet, isDouble):
+ global header_output, decoder_output, exec_output
+
+ fccmpCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
+ %s cOp1 = %s;
+ %s cOp2 = %s;
+ int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
+ CondCodesNZ = cc >> 2 & 3;
+ CondCodesC = cc >> 1 & 1;
+ CondCodesV = cc & 1;
+ } else {
+ CondCodesNZ = (defCc >> 2) & 0x3;
+ CondCodesC = (defCc >> 1) & 0x1;
+ CondCodesV = defCc & 0x1;
+ }
+ FpCondCodes = fpscr & FpCondCodesMask;
+ FpscrExc = fpscr;
+ ''' % ("uint64_t" if isDouble else "uint32_t",
+ "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
+ if isDouble else "AA64FpOp1P0_uw",
+ "uint64_t" if isDouble else "uint32_t",
+ "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32"
+ if isDouble else "AA64FpOp2P0_uw",
+ "64" if isDouble else "32", "false" if isQuiet else "true")
+
+ instName = "FCCmp%sReg%s" %("" if isQuiet else "E",
+ "D" if isDouble else "S")
+ fccmpIop = InstObjParams("fccmp%s" %("" if isQuiet else "e"),
+ instName, "FpCondCompRegOp",
+ {"code": fccmpCode,
+ "op_class": "SimdFloatCmpOp"}, [])
+ header_output += DataXCondCompRegDeclare.subst(fccmpIop);
+ decoder_output += DataXCondCompRegConstructor.subst(fccmpIop);
+ exec_output += BasicExecute.subst(fccmpIop);
+
+ for isQuiet in True, False:
+ for isDouble in True, False:
+ buildFCCmpOp(isQuiet, isDouble)
+
+}};
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ # Generates the variants of the floating to fixed point instructions
+ def buildFpCvtFixedOp(isSigned, isDouble, isXReg):
+ global header_output, decoder_output, exec_output
+
+ fcvtFpFixedCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ '''
+ if isDouble:
+ fcvtFpFixedCode += '''
+ uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+ '''
+ else:
+ fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
+ fcvtFpFixedCode += '''
+ %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s,
+ FPRounding_ZERO, fpscr);
+ FpscrExc = fpscr;
+ ''' %("X" if isXReg else "W",
+ "64" if isDouble else "32",
+ "64" if isXReg else "32",
+ "false" if isSigned else "true")
+
+ instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U",
+ "D" if isDouble else "S",
+ "X" if isXReg else "W")
+ mnem = "fcvtz%s" %("s" if isSigned else "u")
+ fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
+ { "code": fcvtFpFixedCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop);
+ decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop);
+ exec_output += BasicExecute.subst(fcvtFpFixedIop);
+
+ # Generates the variants of the fixed to floating point instructions
+ def buildFixedCvtFpOp(isSigned, isDouble, isXReg):
+ global header_output, decoder_output, exec_output
+
+ srcRegType = "X" if isXReg else "W"
+ fcvtFixedFpCode = vfp64EnabledCheckCode + '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ %s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm,
+ %s, FPCRRounding(fpscr), fpscr);
+ ''' %("uint64_t" if isDouble else "uint32_t",
+ "64" if isDouble else "32",
+ "int" if isSigned else "uint", "64" if isXReg else "32",
+ srcRegType,
+ "false" if isSigned else "true")
+ if isDouble:
+ fcvtFixedFpCode += '''
+ AA64FpDestP0_uw = result;
+ AA64FpDestP1_uw = result >> 32;
+ '''
+ else:
+ fcvtFixedFpCode += '''
+ AA64FpDestP0_uw = result;
+ AA64FpDestP1_uw = 0;
+ '''
+ fcvtFixedFpCode += '''
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ FpscrExc = fpscr;
+ '''
+
+ instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U",
+ "D" if isDouble else "S",
+ srcRegType)
+ mnem = "%scvtf" %("s" if isSigned else "u")
+ fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
+ { "code": fcvtFixedFpCode,
+ "op_class": "SimdFloatCvtOp" }, [])
+ header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop);
+ decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop);
+ exec_output += BasicExecute.subst(fcvtFixedFpIop);
+
+ # loop over the variants building the instructions for each
+ for isXReg in True, False:
+ for isDouble in True, False:
+ for isSigned in True, False:
+ buildFpCvtFixedOp(isSigned, isDouble, isXReg)
+ buildFixedCvtFpOp(isSigned, isDouble, isXReg)
+}};
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ for isDouble in True, False:
+ code = '''
+ if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
+ AA64FpDestP0_uw = AA64FpOp1P0_uw;
+ '''
+ if isDouble:
+ code += '''
+ AA64FpDestP1_uw = AA64FpOp1P1_uw;
+ } else {
+ AA64FpDestP0_uw = AA64FpOp2P0_uw;
+ AA64FpDestP1_uw = AA64FpOp2P1_uw;
+ }
+ '''
+ else:
+ code += '''
+ } else {
+ AA64FpDestP0_uw = AA64FpOp2P0_uw;
+ }
+ AA64FpDestP1_uw = 0;
+ '''
+ code += '''
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+
+ iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"),
+ "FpCondSelOp", code)
+ header_output += DataXCondSelDeclare.subst(iop)
+ decoder_output += DataXCondSelConstructor.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+}};
diff --git a/src/arch/arm/isa/insts/insts.isa b/src/arch/arm/isa/insts/insts.isa
index c01e87df8..9d90f7779 100644
--- a/src/arch/arm/isa/insts/insts.isa
+++ b/src/arch/arm/isa/insts/insts.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2012 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -37,6 +37,9 @@
//
// Authors: Gabe Black
+//AArch64 instructions
+##include "aarch64.isa"
+
//Basic forms of various templates
##include "basic.isa"
@@ -46,8 +49,15 @@
//Loads of a single item
##include "ldr.isa"
+//Loads of a single item, AArch64
+##include "ldr64.isa"
+
//Miscellaneous instructions that don't fit elsewhere
##include "misc.isa"
+##include "misc64.isa"
+
+//Stores of a single item, AArch64
+##include "str64.isa"
//Stores of a single item
##include "str.isa"
@@ -61,8 +71,12 @@
//Data processing instructions
##include "data.isa"
+//AArch64 data processing instructions
+##include "data64.isa"
+
//Branches
##include "branch.isa"
+##include "branch64.isa"
//Multiply
##include "mult.isa"
@@ -72,9 +86,14 @@
//VFP
##include "fp.isa"
+##include "fp64.isa"
//Neon
##include "neon.isa"
+//AArch64 Neon
+##include "neon64.isa"
+##include "neon64_mem.isa"
+
//m5 Psuedo-ops
##include "m5ops.isa"
diff --git a/src/arch/arm/isa/insts/ldr.isa b/src/arch/arm/isa/insts/ldr.isa
index f599fa4b9..6bfe40118 100644
--- a/src/arch/arm/isa/insts/ldr.isa
+++ b/src/arch/arm/isa/insts/ldr.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -38,6 +38,7 @@
// Authors: Gabe Black
let {{
+ import math
header_output = ""
decoder_output = ""
@@ -78,7 +79,8 @@ let {{
newDecoder,
newExec) = self.fillTemplates(self.name, self.Name, codeBlobs,
self.memFlags, instFlags, base,
- wbDecl, pcDecl, self.rasPop)
+ wbDecl, pcDecl, self.rasPop,
+ self.size, self.sign)
header_output += newHeader
decoder_output += newDecoder
@@ -160,7 +162,7 @@ let {{
self.size, self.sign, self.user)
# Add memory request flags where necessary
- self.memFlags.append("%d" % (self.size - 1))
+ self.memFlags.append("%d" % int(math.log(self.size, 2)))
if self.user:
self.memFlags.append("ArmISA::TLB::UserMode")
diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa
new file mode 100644
index 000000000..78460f661
--- /dev/null
+++ b/src/arch/arm/isa/insts/ldr64.isa
@@ -0,0 +1,446 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ class LoadInst64(LoadStoreInst):
+ execBase = 'Load64'
+ micro = False
+
+ def __init__(self, mnem, Name, size=4, sign=False, user=False,
+ literal=False, flavor="normal", top=False):
+ super(LoadInst64, self).__init__()
+
+ self.name = mnem
+ self.Name = Name
+ self.size = size
+ self.sign = sign
+ self.user = user
+ self.literal = literal
+ self.flavor = flavor
+ self.top = top
+
+ self.memFlags = ["ArmISA::TLB::MustBeOne"]
+ self.instFlags = []
+ self.codeBlobs = {"postacc_code" : ""}
+
+ # Add memory request flags where necessary
+ if self.user:
+ self.memFlags.append("ArmISA::TLB::UserMode")
+
+ if self.flavor == "dprefetch":
+ self.memFlags.append("Request::PREFETCH")
+ self.instFlags = ['IsDataPrefetch']
+ elif self.flavor == "iprefetch":
+ self.memFlags.append("Request::PREFETCH")
+ self.instFlags = ['IsInstPrefetch']
+ if self.micro:
+ self.instFlags.append("IsMicroop")
+
+ if self.flavor in ("acexp", "exp"):
+ # For exclusive pair ops alignment check is based on total size
+ self.memFlags.append("%d" % int(math.log(self.size, 2) + 1))
+ elif not (self.size == 16 and self.top):
+ # Only the first microop should perform alignment checking.
+ self.memFlags.append("%d" % int(math.log(self.size, 2)))
+
+ if self.flavor not in ("acquire", "acex", "exclusive",
+ "acexp", "exp"):
+ self.memFlags.append("ArmISA::TLB::AllowUnaligned")
+
+ if self.flavor in ("acquire", "acex", "acexp"):
+ self.instFlags.extend(["IsMemBarrier",
+ "IsWriteBarrier",
+ "IsReadBarrier"])
+ if self.flavor in ("acex", "exclusive", "exp", "acexp"):
+ self.memFlags.append("Request::LLSC")
+
+ def buildEACode(self):
+ # Address computation code
+ eaCode = ""
+ if self.flavor == "fp":
+ eaCode += vfp64EnabledCheckCode
+
+ if self.literal:
+ eaCode += "EA = RawPC"
+ else:
+ eaCode += SPAlignmentCheckCode + "EA = XBase"
+
+ if self.size == 16:
+ if self.top:
+ eaCode += " + (isBigEndian64(xc->tcBase()) ? 0 : 8)"
+ else:
+ eaCode += " + (isBigEndian64(xc->tcBase()) ? 8 : 0)"
+ if not self.post:
+ eaCode += self.offset
+ eaCode += ";"
+
+ self.codeBlobs["ea_code"] = eaCode
+
+ def emitHelper(self, base='Memory64', wbDecl=None):
+ global header_output, decoder_output, exec_output
+
+ # If this is a microop itself, don't allow anything that would
+ # require further microcoding.
+ if self.micro:
+ assert not wbDecl
+
+ fa_code = None
+ if not self.micro and self.flavor in ("normal", "widen", "acquire"):
+ fa_code = '''
+ fault->annotate(ArmFault::SAS, %s);
+ fault->annotate(ArmFault::SSE, %s);
+ fault->annotate(ArmFault::SRT, dest);
+ fault->annotate(ArmFault::SF, %s);
+ fault->annotate(ArmFault::AR, %s);
+ ''' % ("0" if self.size == 1 else
+ "1" if self.size == 2 else
+ "2" if self.size == 4 else "3",
+ "true" if self.sign else "false",
+ "true" if (self.size == 8 or
+ self.flavor == "widen") else "false",
+ "true" if self.flavor == "acquire" else "false")
+
+ (newHeader, newDecoder, newExec) = \
+ self.fillTemplates(self.name, self.Name, self.codeBlobs,
+ self.memFlags, self.instFlags,
+ base, wbDecl, faCode=fa_code)
+
+ header_output += newHeader
+ decoder_output += newDecoder
+ exec_output += newExec
+
+ class LoadImmInst64(LoadInst64):
+ def __init__(self, *args, **kargs):
+ super(LoadImmInst64, self).__init__(*args, **kargs)
+ self.offset = " + imm"
+
+ self.wbDecl = "MicroAddXiUop(machInst, base, base, imm);"
+
+ class LoadRegInst64(LoadInst64):
+ def __init__(self, *args, **kargs):
+ super(LoadRegInst64, self).__init__(*args, **kargs)
+ self.offset = " + extendReg64(XOffset, type, shiftAmt, 64)"
+
+ self.wbDecl = \
+ "MicroAddXERegUop(machInst, base, base, " + \
+ " offset, type, shiftAmt);"
+
+ class LoadRawRegInst64(LoadInst64):
+ def __init__(self, *args, **kargs):
+ super(LoadRawRegInst64, self).__init__(*args, **kargs)
+ self.offset = ""
+
+ class LoadSingle64(LoadInst64):
+ def emit(self):
+ self.buildEACode()
+
+ # Code that actually handles the access
+ if self.flavor in ("dprefetch", "iprefetch"):
+ accCode = 'uint64_t temp M5_VAR_USED = Mem%s;'
+ elif self.flavor == "fp":
+ if self.size in (1, 2, 4):
+ accCode = '''
+ AA64FpDestP0_uw = cSwap(Mem%s,
+ isBigEndian64(xc->tcBase()));
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ elif self.size == 8 or (self.size == 16 and not self.top):
+ accCode = '''
+ uint64_t data = cSwap(Mem%s,
+ isBigEndian64(xc->tcBase()));
+ AA64FpDestP0_uw = (uint32_t)data;
+ AA64FpDestP1_uw = (data >> 32);
+ '''
+ # Only zero out the other half if this isn't part of a
+ # pair of 8 byte loads implementing a 16 byte load.
+ if self.size == 8:
+ accCode += '''
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ '''
+ elif self.size == 16 and self.top:
+ accCode = '''
+ uint64_t data = cSwap(Mem%s,
+ isBigEndian64(xc->tcBase()));
+ AA64FpDestP2_uw = (uint32_t)data;
+ AA64FpDestP3_uw = (data >> 32);
+ '''
+ elif self.flavor == "widen" or self.size == 8:
+ accCode = "XDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));"
+ else:
+ accCode = "WDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));"
+ if self.size == 16:
+ accCode = accCode % buildMemSuffix(self.sign, 8)
+ else:
+ accCode = accCode % buildMemSuffix(self.sign, self.size)
+
+ self.codeBlobs["memacc_code"] = accCode
+
+ # Push it out to the output files
+ wbDecl = None
+ if self.writeback and not self.micro:
+ wbDecl = self.wbDecl
+ self.emitHelper(self.base, wbDecl)
+
+ class LoadDouble64(LoadInst64):
+ def emit(self):
+ self.buildEACode()
+
+ # Code that actually handles the access
+ if self.flavor == "fp":
+ accCode = '''
+ uint64_t data = cSwap(Mem_ud, isBigEndian64(xc->tcBase()));
+ AA64FpDestP0_uw = (uint32_t)data;
+ AA64FpDestP1_uw = 0;
+ AA64FpDestP2_uw = 0;
+ AA64FpDestP3_uw = 0;
+ AA64FpDest2P0_uw = (data >> 32);
+ AA64FpDest2P1_uw = 0;
+ AA64FpDest2P2_uw = 0;
+ AA64FpDest2P3_uw = 0;
+ '''
+ else:
+ if self.sign:
+ if self.size == 4:
+ accCode = '''
+ uint64_t data = cSwap(Mem_ud,
+ isBigEndian64(xc->tcBase()));
+ XDest = sext<32>((uint32_t)data);
+ XDest2 = sext<32>(data >> 32);
+ '''
+ elif self.size == 8:
+ accCode = '''
+ XDest = sext<64>(Mem_tud.a);
+ XDest2 = sext<64>(Mem_tud.b);
+ '''
+ else:
+ if self.size == 4:
+ accCode = '''
+ uint64_t data = cSwap(Mem_ud,
+ isBigEndian64(xc->tcBase()));
+ XDest = (uint32_t)data;
+ XDest2 = data >> 32;
+ '''
+ elif self.size == 8:
+ accCode = '''
+ XDest = Mem_tud.a;
+ XDest2 = Mem_tud.b;
+ '''
+ self.codeBlobs["memacc_code"] = accCode
+
+ # Push it out to the output files
+ wbDecl = None
+ if self.writeback and not self.micro:
+ wbDecl = self.wbDecl
+ self.emitHelper(self.base, wbDecl)
+
+ class LoadImm64(LoadImmInst64, LoadSingle64):
+ decConstBase = 'LoadStoreImm64'
+ base = 'ArmISA::MemoryImm64'
+ writeback = False
+ post = False
+
+ class LoadPre64(LoadImmInst64, LoadSingle64):
+ decConstBase = 'LoadStoreImm64'
+ base = 'ArmISA::MemoryPreIndex64'
+ writeback = True
+ post = False
+
+ class LoadPost64(LoadImmInst64, LoadSingle64):
+ decConstBase = 'LoadStoreImm64'
+ base = 'ArmISA::MemoryPostIndex64'
+ writeback = True
+ post = True
+
+ class LoadReg64(LoadRegInst64, LoadSingle64):
+ decConstBase = 'LoadStoreReg64'
+ base = 'ArmISA::MemoryReg64'
+ writeback = False
+ post = False
+
+ class LoadRaw64(LoadRawRegInst64, LoadSingle64):
+ decConstBase = 'LoadStoreRaw64'
+ base = 'ArmISA::MemoryRaw64'
+ writeback = False
+ post = False
+
+ class LoadEx64(LoadRawRegInst64, LoadSingle64):
+ decConstBase = 'LoadStoreEx64'
+ base = 'ArmISA::MemoryEx64'
+ writeback = False
+ post = False
+
+ class LoadLit64(LoadImmInst64, LoadSingle64):
+ decConstBase = 'LoadStoreLit64'
+ base = 'ArmISA::MemoryLiteral64'
+ writeback = False
+ post = False
+
+ def buildLoads64(mnem, NameBase, size, sign, flavor="normal"):
+ LoadImm64(mnem, NameBase + "_IMM", size, sign, flavor=flavor).emit()
+ LoadPre64(mnem, NameBase + "_PRE", size, sign, flavor=flavor).emit()
+ LoadPost64(mnem, NameBase + "_POST", size, sign, flavor=flavor).emit()
+ LoadReg64(mnem, NameBase + "_REG", size, sign, flavor=flavor).emit()
+
+ buildLoads64("ldrb", "LDRB64", 1, False)
+ buildLoads64("ldrsb", "LDRSBW64", 1, True)
+ buildLoads64("ldrsb", "LDRSBX64", 1, True, flavor="widen")
+ buildLoads64("ldrh", "LDRH64", 2, False)
+ buildLoads64("ldrsh", "LDRSHW64", 2, True)
+ buildLoads64("ldrsh", "LDRSHX64", 2, True, flavor="widen")
+ buildLoads64("ldrsw", "LDRSW64", 4, True, flavor="widen")
+ buildLoads64("ldr", "LDRW64", 4, False)
+ buildLoads64("ldr", "LDRX64", 8, False)
+ buildLoads64("ldr", "LDRBFP64", 1, False, flavor="fp")
+ buildLoads64("ldr", "LDRHFP64", 2, False, flavor="fp")
+ buildLoads64("ldr", "LDRSFP64", 4, False, flavor="fp")
+ buildLoads64("ldr", "LDRDFP64", 8, False, flavor="fp")
+
+ LoadImm64("prfm", "PRFM64_IMM", 8, flavor="dprefetch").emit()
+ LoadReg64("prfm", "PRFM64_REG", 8, flavor="dprefetch").emit()
+ LoadLit64("prfm", "PRFM64_LIT", 8, literal=True, flavor="dprefetch").emit()
+ LoadImm64("prfum", "PRFUM64_IMM", 8, flavor="dprefetch").emit()
+
+ LoadImm64("ldurb", "LDURB64_IMM", 1, False).emit()
+ LoadImm64("ldursb", "LDURSBW64_IMM", 1, True).emit()
+ LoadImm64("ldursb", "LDURSBX64_IMM", 1, True, flavor="widen").emit()
+ LoadImm64("ldurh", "LDURH64_IMM", 2, False).emit()
+ LoadImm64("ldursh", "LDURSHW64_IMM", 2, True).emit()
+ LoadImm64("ldursh", "LDURSHX64_IMM", 2, True, flavor="widen").emit()
+ LoadImm64("ldursw", "LDURSW64_IMM", 4, True, flavor="widen").emit()
+ LoadImm64("ldur", "LDURW64_IMM", 4, False).emit()
+ LoadImm64("ldur", "LDURX64_IMM", 8, False).emit()
+ LoadImm64("ldur", "LDURBFP64_IMM", 1, flavor="fp").emit()
+ LoadImm64("ldur", "LDURHFP64_IMM", 2, flavor="fp").emit()
+ LoadImm64("ldur", "LDURSFP64_IMM", 4, flavor="fp").emit()
+ LoadImm64("ldur", "LDURDFP64_IMM", 8, flavor="fp").emit()
+
+ LoadImm64("ldtrb", "LDTRB64_IMM", 1, False, True).emit()
+ LoadImm64("ldtrsb", "LDTRSBW64_IMM", 1, True, True).emit()
+ LoadImm64("ldtrsb", "LDTRSBX64_IMM", 1, True, True, flavor="widen").emit()
+ LoadImm64("ldtrh", "LDTRH64_IMM", 2, False, True).emit()
+ LoadImm64("ldtrsh", "LDTRSHW64_IMM", 2, True, True).emit()
+ LoadImm64("ldtrsh", "LDTRSHX64_IMM", 2, True, True, flavor="widen").emit()
+ LoadImm64("ldtrsw", "LDTRSW64_IMM", 4, True, flavor="widen").emit()
+ LoadImm64("ldtr", "LDTRW64_IMM", 4, False, True).emit()
+ LoadImm64("ldtr", "LDTRX64_IMM", 8, False, True).emit()
+
+ LoadLit64("ldrsw", "LDRSWL64_LIT", 4, True, \
+ literal=True, flavor="widen").emit()
+ LoadLit64("ldr", "LDRWL64_LIT", 4, False, literal=True).emit()
+ LoadLit64("ldr", "LDRXL64_LIT", 8, False, literal=True).emit()
+ LoadLit64("ldr", "LDRSFP64_LIT", 4, literal=True, flavor="fp").emit()
+ LoadLit64("ldr", "LDRDFP64_LIT", 8, literal=True, flavor="fp").emit()
+
+ LoadRaw64("ldar", "LDARX64", 8, flavor="acquire").emit()
+ LoadRaw64("ldar", "LDARW64", 4, flavor="acquire").emit()
+ LoadRaw64("ldarh", "LDARH64", 2, flavor="acquire").emit()
+ LoadRaw64("ldarb", "LDARB64", 1, flavor="acquire").emit()
+
+ LoadEx64("ldaxr", "LDAXRX64", 8, flavor="acex").emit()
+ LoadEx64("ldaxr", "LDAXRW64", 4, flavor="acex").emit()
+ LoadEx64("ldaxrh", "LDAXRH64", 2, flavor="acex").emit()
+ LoadEx64("ldaxrb", "LDAXRB64", 1, flavor="acex").emit()
+
+ LoadEx64("ldxr", "LDXRX64", 8, flavor="exclusive").emit()
+ LoadEx64("ldxr", "LDXRW64", 4, flavor="exclusive").emit()
+ LoadEx64("ldxrh", "LDXRH64", 2, flavor="exclusive").emit()
+ LoadEx64("ldxrb", "LDXRB64", 1, flavor="exclusive").emit()
+
+ class LoadImmU64(LoadImm64):
+ decConstBase = 'LoadStoreImmU64'
+ micro = True
+
+ class LoadImmDU64(LoadImmInst64, LoadDouble64):
+ decConstBase = 'LoadStoreImmDU64'
+ base = 'ArmISA::MemoryDImm64'
+ micro = True
+ post = False
+ writeback = False
+
+ class LoadImmDouble64(LoadImmInst64, LoadDouble64):
+ decConstBase = 'LoadStoreImmDU64'
+ base = 'ArmISA::MemoryDImm64'
+ micro = False
+ post = False
+ writeback = False
+
+ class LoadRegU64(LoadReg64):
+ decConstBase = 'LoadStoreRegU64'
+ micro = True
+
+ class LoadLitU64(LoadLit64):
+ decConstBase = 'LoadStoreLitU64'
+ micro = True
+
+ LoadImmDouble64("ldaxp", "LDAXPW64", 4, flavor="acexp").emit()
+ LoadImmDouble64("ldaxp", "LDAXPX64", 8, flavor="acexp").emit()
+ LoadImmDouble64("ldxp", "LDXPW64", 4, flavor="exp").emit()
+ LoadImmDouble64("ldxp", "LDXPX64", 8, flavor="exp").emit()
+
+ LoadImmU64("ldrxi_uop", "MicroLdrXImmUop", 8).emit()
+ LoadRegU64("ldrxr_uop", "MicroLdrXRegUop", 8).emit()
+ LoadLitU64("ldrxl_uop", "MicroLdrXLitUop", 8, literal=True).emit()
+ LoadImmU64("ldrfpxi_uop", "MicroLdrFpXImmUop", 8, flavor="fp").emit()
+ LoadRegU64("ldrfpxr_uop", "MicroLdrFpXRegUop", 8, flavor="fp").emit()
+ LoadLitU64("ldrfpxl_uop", "MicroLdrFpXLitUop", 8, literal=True,
+ flavor="fp").emit()
+ LoadImmU64("ldrqbfpxi_uop", "MicroLdrQBFpXImmUop",
+ 16, flavor="fp", top = False).emit()
+ LoadRegU64("ldrqbfpxr_uop", "MicroLdrQBFpXRegUop",
+ 16, flavor="fp", top = False).emit()
+ LoadLitU64("ldrqbfpxl_uop", "MicroLdrQBFpXLitUop",
+ 16, literal=True, flavor="fp", top = False).emit()
+ LoadImmU64("ldrqtfpxi_uop", "MicroLdrQTFpXImmUop",
+ 16, flavor="fp", top = True).emit()
+ LoadRegU64("ldrqtfpxr_uop", "MicroLdrQTFpXRegUop",
+ 16, flavor="fp", top = True).emit()
+ LoadLitU64("ldrqtfpxl_uop", "MicroLdrQTFpXLitUop",
+ 16, literal=True, flavor="fp", top = True).emit()
+ LoadImmDU64("ldrduxi_uop", "MicroLdrDUXImmUop", 4, sign=False).emit()
+ LoadImmDU64("ldrdsxi_uop", "MicroLdrDSXImmUop", 4, sign=True).emit()
+ LoadImmDU64("ldrdfpxi_uop", "MicroLdrDFpXImmUop", 4, flavor="fp").emit()
+}};
diff --git a/src/arch/arm/isa/insts/m5ops.isa b/src/arch/arm/isa/insts/m5ops.isa
index 06ed34af8..928d1be0d 100644
--- a/src/arch/arm/isa/insts/m5ops.isa
+++ b/src/arch/arm/isa/insts/m5ops.isa
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010, 2012-2013 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -58,6 +58,7 @@ let {{
armCode = '''
PseudoInst::arm(xc->tcBase());
'''
+
armIop = InstObjParams("arm", "Arm", "PredOp",
{ "code": armCode,
"predicate_test": predicateTest },
@@ -69,6 +70,7 @@ let {{
quiesceCode = '''
PseudoInst::quiesce(xc->tcBase());
'''
+
quiesceIop = InstObjParams("quiesce", "Quiesce", "PredOp",
{ "code": quiesceCode,
"predicate_test": predicateTest },
@@ -81,6 +83,10 @@ let {{
PseudoInst::quiesceNs(xc->tcBase(), join32to64(R1, R0));
'''
+ quiesceNsCode64 = '''
+ PseudoInst::quiesceNs(xc->tcBase(), X0);
+ '''
+
quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs", "PredOp",
{ "code": quiesceNsCode,
"predicate_test": predicateTest },
@@ -89,10 +95,22 @@ let {{
decoder_output += BasicConstructor.subst(quiesceNsIop)
exec_output += QuiescePredOpExecute.subst(quiesceNsIop)
+ quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs64", "PredOp",
+ { "code": quiesceNsCode64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative", "IsQuiesce"])
+ header_output += BasicDeclare.subst(quiesceNsIop)
+ decoder_output += BasicConstructor.subst(quiesceNsIop)
+ exec_output += QuiescePredOpExecute.subst(quiesceNsIop)
+
quiesceCyclesCode = '''
PseudoInst::quiesceCycles(xc->tcBase(), join32to64(R1, R0));
'''
+ quiesceCyclesCode64 = '''
+ PseudoInst::quiesceCycles(xc->tcBase(), X0);
+ '''
+
quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles", "PredOp",
{ "code": quiesceCyclesCode,
"predicate_test": predicateTest },
@@ -101,12 +119,23 @@ let {{
decoder_output += BasicConstructor.subst(quiesceCyclesIop)
exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop)
+ quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles64", "PredOp",
+ { "code": quiesceCyclesCode64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative", "IsQuiesce", "IsUnverifiable"])
+ header_output += BasicDeclare.subst(quiesceCyclesIop)
+ decoder_output += BasicConstructor.subst(quiesceCyclesIop)
+ exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop)
+
quiesceTimeCode = '''
uint64_t qt_val = PseudoInst::quiesceTime(xc->tcBase());
R0 = bits(qt_val, 31, 0);
R1 = bits(qt_val, 63, 32);
'''
+ quiesceTimeCode64 = '''
+ X0 = PseudoInst::quiesceTime(xc->tcBase());
+ '''
quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime", "PredOp",
{ "code": quiesceTimeCode,
"predicate_test": predicateTest },
@@ -115,12 +144,23 @@ let {{
decoder_output += BasicConstructor.subst(quiesceTimeIop)
exec_output += PredOpExecute.subst(quiesceTimeIop)
+ quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime64", "PredOp",
+ { "code": quiesceTimeCode64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative", "IsUnverifiable"])
+ header_output += BasicDeclare.subst(quiesceTimeIop)
+ decoder_output += BasicConstructor.subst(quiesceTimeIop)
+ exec_output += PredOpExecute.subst(quiesceTimeIop)
+
rpnsCode = '''
uint64_t rpns_val = PseudoInst::rpns(xc->tcBase());
R0 = bits(rpns_val, 31, 0);
R1 = bits(rpns_val, 63, 32);
'''
+ rpnsCode64 = '''
+ X0 = PseudoInst::rpns(xc->tcBase());
+ '''
rpnsIop = InstObjParams("rpns", "Rpns", "PredOp",
{ "code": rpnsCode,
"predicate_test": predicateTest },
@@ -129,10 +169,22 @@ let {{
decoder_output += BasicConstructor.subst(rpnsIop)
exec_output += PredOpExecute.subst(rpnsIop)
+ rpnsIop = InstObjParams("rpns", "Rpns64", "PredOp",
+ { "code": rpnsCode64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative", "IsUnverifiable"])
+ header_output += BasicDeclare.subst(rpnsIop)
+ decoder_output += BasicConstructor.subst(rpnsIop)
+ exec_output += PredOpExecute.subst(rpnsIop)
+
wakeCpuCode = '''
PseudoInst::wakeCPU(xc->tcBase(), join32to64(R1,R0));
'''
+ wakeCpuCode64 = '''
+ PseudoInst::wakeCPU(xc->tcBase(), X0);
+ '''
+
wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU", "PredOp",
{ "code": wakeCpuCode,
"predicate_test": predicateTest },
@@ -141,6 +193,14 @@ let {{
decoder_output += BasicConstructor.subst(wakeCPUIop)
exec_output += PredOpExecute.subst(wakeCPUIop)
+ wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU64", "PredOp",
+ { "code": wakeCpuCode64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative", "IsUnverifiable"])
+ header_output += BasicDeclare.subst(wakeCPUIop)
+ decoder_output += BasicConstructor.subst(wakeCPUIop)
+ exec_output += PredOpExecute.subst(wakeCPUIop)
+
deprecated_ivlbIop = InstObjParams("deprecated_ivlb", "Deprecated_ivlb", "PredOp",
{ "code": '''warn_once("Obsolete M5 ivlb instruction encountered.\\n");''',
"predicate_test": predicateTest })
@@ -171,6 +231,11 @@ let {{
m5exit_code = '''
PseudoInst::m5exit(xc->tcBase(), join32to64(R1, R0));
'''
+
+ m5exit_code64 = '''
+ PseudoInst::m5exit(xc->tcBase(), X0);
+ '''
+
m5exitIop = InstObjParams("m5exit", "M5exit", "PredOp",
{ "code": m5exit_code,
"predicate_test": predicateTest },
@@ -190,6 +255,14 @@ let {{
decoder_output += BasicConstructor.subst(m5failIop)
exec_output += PredOpExecute.subst(m5failIop)
+ m5exitIop = InstObjParams("m5exit", "M5exit64", "PredOp",
+ { "code": m5exit_code64,
+ "predicate_test": predicateTest },
+ ["No_OpClass", "IsNonSpeculative"])
+ header_output += BasicDeclare.subst(m5exitIop)
+ decoder_output += BasicConstructor.subst(m5exitIop)
+ exec_output += PredOpExecute.subst(m5exitIop)
+
loadsymbolCode = '''
PseudoInst::loadsymbol(xc->tcBase());
'''
@@ -208,6 +281,10 @@ let {{
R1 = bits(ip_val, 63, 32);
'''
+ initparamCode64 = '''
+ X0 = PseudoInst::initParam(xc->tcBase());
+ '''
+
initparamIop = InstObjParams("initparam", "Initparam", "PredOp",
{ "code": initparamCode,
"predicate_test": predicateTest },
@@ -216,10 +293,21 @@ let {{
decoder_output += BasicConstructor.subst(initparamIop)
exec_output += PredOpExecute.subst(initparamIop)
+ initparamIop = InstObjParams("initparam", "Initparam64", "PredOp",
+ { "code": initparamCode64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative"])
+ header_output += BasicDeclare.subst(initparamIop)
+ decoder_output += BasicConstructor.subst(initparamIop)
+ exec_output += PredOpExecute.subst(initparamIop)
+
resetstats_code = '''
PseudoInst::resetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2));
'''
+ resetstats_code64 = '''
+ PseudoInst::resetstats(xc->tcBase(), X0, X1);
+ '''
resetstatsIop = InstObjParams("resetstats", "Resetstats", "PredOp",
{ "code": resetstats_code,
"predicate_test": predicateTest },
@@ -228,9 +316,22 @@ let {{
decoder_output += BasicConstructor.subst(resetstatsIop)
exec_output += PredOpExecute.subst(resetstatsIop)
+ resetstatsIop = InstObjParams("resetstats", "Resetstats64", "PredOp",
+ { "code": resetstats_code64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative"])
+ header_output += BasicDeclare.subst(resetstatsIop)
+ decoder_output += BasicConstructor.subst(resetstatsIop)
+ exec_output += PredOpExecute.subst(resetstatsIop)
+
dumpstats_code = '''
PseudoInst::dumpstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2));
'''
+
+ dumpstats_code64 = '''
+ PseudoInst::dumpstats(xc->tcBase(), X0, X1);
+ '''
+
dumpstatsIop = InstObjParams("dumpstats", "Dumpstats", "PredOp",
{ "code": dumpstats_code,
"predicate_test": predicateTest },
@@ -239,9 +340,22 @@ let {{
decoder_output += BasicConstructor.subst(dumpstatsIop)
exec_output += PredOpExecute.subst(dumpstatsIop)
+ dumpstatsIop = InstObjParams("dumpstats", "Dumpstats64", "PredOp",
+ { "code": dumpstats_code64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative"])
+ header_output += BasicDeclare.subst(dumpstatsIop)
+ decoder_output += BasicConstructor.subst(dumpstatsIop)
+ exec_output += PredOpExecute.subst(dumpstatsIop)
+
dumpresetstats_code = '''
PseudoInst::dumpresetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2));
'''
+
+ dumpresetstats_code64 = '''
+ PseudoInst::dumpresetstats(xc->tcBase(), X0, X1);
+ '''
+
dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats", "PredOp",
{ "code": dumpresetstats_code,
"predicate_test": predicateTest },
@@ -250,9 +364,22 @@ let {{
decoder_output += BasicConstructor.subst(dumpresetstatsIop)
exec_output += PredOpExecute.subst(dumpresetstatsIop)
+ dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats64", "PredOp",
+ { "code": dumpresetstats_code64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative"])
+ header_output += BasicDeclare.subst(dumpresetstatsIop)
+ decoder_output += BasicConstructor.subst(dumpresetstatsIop)
+ exec_output += PredOpExecute.subst(dumpresetstatsIop)
+
m5checkpoint_code = '''
PseudoInst::m5checkpoint(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2));
'''
+
+ m5checkpoint_code64 = '''
+ PseudoInst::m5checkpoint(xc->tcBase(), X0, X1);
+ '''
+
m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint", "PredOp",
{ "code": m5checkpoint_code,
"predicate_test": predicateTest },
@@ -261,11 +388,27 @@ let {{
decoder_output += BasicConstructor.subst(m5checkpointIop)
exec_output += PredOpExecute.subst(m5checkpointIop)
+ m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint64", "PredOp",
+ { "code": m5checkpoint_code64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative", "IsUnverifiable"])
+ header_output += BasicDeclare.subst(m5checkpointIop)
+ decoder_output += BasicConstructor.subst(m5checkpointIop)
+ exec_output += PredOpExecute.subst(m5checkpointIop)
+
m5readfileCode = '''
int n = 4;
uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false);
R0 = PseudoInst::readfile(xc->tcBase(), R0, join32to64(R3,R2), offset);
'''
+
+ m5readfileCode64 = '''
+ int n = 4;
+ uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false);
+ n = 6;
+ X0 = PseudoInst::readfile(xc->tcBase(), (uint32_t)X0, X1, offset);
+ '''
+
m5readfileIop = InstObjParams("m5readfile", "M5readfile", "PredOp",
{ "code": m5readfileCode,
"predicate_test": predicateTest },
@@ -274,6 +417,14 @@ let {{
decoder_output += BasicConstructor.subst(m5readfileIop)
exec_output += PredOpExecute.subst(m5readfileIop)
+ m5readfileIop = InstObjParams("m5readfile", "M5readfile64", "PredOp",
+ { "code": m5readfileCode64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative", "IsUnverifiable"])
+ header_output += BasicDeclare.subst(m5readfileIop)
+ decoder_output += BasicConstructor.subst(m5readfileIop)
+ exec_output += PredOpExecute.subst(m5readfileIop)
+
m5writefileCode = '''
int n = 4;
uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false);
@@ -282,6 +433,16 @@ let {{
R0 = PseudoInst::writefile(xc->tcBase(), R0, join32to64(R3,R2), offset,
filenameAddr);
'''
+
+ m5writefileCode64 = '''
+ int n = 4;
+ uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false);
+ n = 6;
+ Addr filenameAddr = getArgument(xc->tcBase(), n, sizeof(Addr), false);
+ X0 = PseudoInst::writefile(xc->tcBase(), (uint32_t)X0, X1, offset,
+ filenameAddr);
+ '''
+
m5writefileIop = InstObjParams("m5writefile", "M5writefile", "PredOp",
{ "code": m5writefileCode,
"predicate_test": predicateTest },
@@ -290,6 +451,14 @@ let {{
decoder_output += BasicConstructor.subst(m5writefileIop)
exec_output += PredOpExecute.subst(m5writefileIop)
+ m5writefileIop = InstObjParams("m5writefile", "M5writefile64", "PredOp",
+ { "code": m5writefileCode64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative"])
+ header_output += BasicDeclare.subst(m5writefileIop)
+ decoder_output += BasicConstructor.subst(m5writefileIop)
+ exec_output += PredOpExecute.subst(m5writefileIop)
+
m5breakIop = InstObjParams("m5break", "M5break", "PredOp",
{ "code": "PseudoInst::debugbreak(xc->tcBase());",
"predicate_test": predicateTest },
@@ -309,6 +478,9 @@ let {{
m5addsymbolCode = '''
PseudoInst::addsymbol(xc->tcBase(), join32to64(R1, R0), R2);
'''
+ m5addsymbolCode64 = '''
+ PseudoInst::addsymbol(xc->tcBase(), X0, (uint32_t)X1);
+ '''
m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol", "PredOp",
{ "code": m5addsymbolCode,
"predicate_test": predicateTest },
@@ -317,8 +489,17 @@ let {{
decoder_output += BasicConstructor.subst(m5addsymbolIop)
exec_output += PredOpExecute.subst(m5addsymbolIop)
+ m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol64", "PredOp",
+ { "code": m5addsymbolCode64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative"])
+ header_output += BasicDeclare.subst(m5addsymbolIop)
+ decoder_output += BasicConstructor.subst(m5addsymbolIop)
+ exec_output += PredOpExecute.subst(m5addsymbolIop)
+
m5panicCode = '''panic("M5 panic instruction called at pc=%#x.",
xc->pcState().pc());'''
+
m5panicIop = InstObjParams("m5panic", "M5panic", "PredOp",
{ "code": m5panicCode,
"predicate_test": predicateTest },
@@ -332,6 +513,13 @@ let {{
join32to64(R1, R0),
join32to64(R3, R2)
);'''
+
+ m5workbeginCode64 = '''PseudoInst::workbegin(
+ xc->tcBase(),
+ X0,
+ X1
+ );'''
+
m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin", "PredOp",
{ "code": m5workbeginCode,
"predicate_test": predicateTest },
@@ -340,11 +528,26 @@ let {{
decoder_output += BasicConstructor.subst(m5workbeginIop)
exec_output += PredOpExecute.subst(m5workbeginIop)
+ m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin64", "PredOp",
+ { "code": m5workbeginCode64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative"])
+ header_output += BasicDeclare.subst(m5workbeginIop)
+ decoder_output += BasicConstructor.subst(m5workbeginIop)
+ exec_output += PredOpExecute.subst(m5workbeginIop)
+
m5workendCode = '''PseudoInst::workend(
xc->tcBase(),
join32to64(R1, R0),
join32to64(R3, R2)
);'''
+
+ m5workendCode64 = '''PseudoInst::workend(
+ xc->tcBase(),
+ X0,
+ X1
+ );'''
+
m5workendIop = InstObjParams("m5workend", "M5workend", "PredOp",
{ "code": m5workendCode,
"predicate_test": predicateTest },
@@ -353,4 +556,11 @@ let {{
decoder_output += BasicConstructor.subst(m5workendIop)
exec_output += PredOpExecute.subst(m5workendIop)
+ m5workendIop = InstObjParams("m5workend", "M5workend64", "PredOp",
+ { "code": m5workendCode64,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative"])
+ header_output += BasicDeclare.subst(m5workendIop)
+ decoder_output += BasicConstructor.subst(m5workendIop)
+ exec_output += PredOpExecute.subst(m5workendIop)
}};
diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa
index db36a3fff..f164595dd 100644
--- a/src/arch/arm/isa/insts/macromem.isa
+++ b/src/arch/arm/isa/insts/macromem.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -91,7 +91,8 @@ let {{
SCTLR sctlr = Sctlr;
CPSR new_cpsr =
- cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi);
+ cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true,
+ sctlr.nmfi, xc->tcBase());
Cpsr = ~CondCodesMask & new_cpsr;
CondCodesNZ = new_cpsr.nz;
CondCodesC = new_cpsr.c;
@@ -158,8 +159,8 @@ let {{
header_output = decoder_output = exec_output = ''
- loadIops = (microLdrUopIop, microLdrRetUopIop,
- microLdrFpUopIop, microLdrDBFpUopIop, microLdrDTFpUopIop)
+ loadIops = (microLdrUopIop, microLdrRetUopIop, microLdrFpUopIop,
+ microLdrDBFpUopIop, microLdrDTFpUopIop)
storeIops = (microStrUopIop, microStrFpUopIop,
microStrDBFpUopIop, microStrDTFpUopIop)
for iop in loadIops + storeIops:
@@ -178,7 +179,7 @@ let {{
let {{
exec_output = header_output = ''
- eaCode = 'EA = URa + imm;'
+ eaCode = 'EA = XURa + imm;'
for size in (1, 2, 3, 4, 6, 8, 12, 16):
# Set up the memory access.
@@ -592,6 +593,26 @@ let {{
URa = URb + shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC);
'''
+ microAddXiUopIop = InstObjParams('addxi_uop', 'MicroAddXiUop',
+ 'MicroIntImmXOp',
+ 'XURa = XURb + imm;',
+ ['IsMicroop'])
+
+ microAddXiSpAlignUopIop = InstObjParams('addxi_uop', 'MicroAddXiSpAlignUop',
+ 'MicroIntImmXOp', '''
+ if (isSP((IntRegIndex) urb) && bits(XURb, 3, 0) &&
+ SPAlignmentCheckEnabled(xc->tcBase())) {
+ return new SPAlignmentFault();
+ }
+ XURa = XURb + imm;
+ ''', ['IsMicroop'])
+
+ microAddXERegUopIop = InstObjParams('addxr_uop', 'MicroAddXERegUop',
+ 'MicroIntRegXOp',
+ 'XURa = XURb + ' + \
+ 'extendReg64(XURc, type, shiftAmt, 64);',
+ ['IsMicroop'])
+
microAddUopIop = InstObjParams('add_uop', 'MicroAddUop',
'MicroIntRegOp',
{'code': microAddUopCode,
@@ -604,6 +625,11 @@ let {{
'predicate_test': predicateTest},
['IsMicroop'])
+ microSubXiUopIop = InstObjParams('subxi_uop', 'MicroSubXiUop',
+ 'MicroIntImmXOp',
+ 'XURa = XURb - imm;',
+ ['IsMicroop'])
+
microSubUopCode = '''
URa = URb - shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC);
'''
@@ -631,8 +657,8 @@ let {{
SCTLR sctlr = Sctlr;
pNPC = URa;
CPSR new_cpsr =
- cpsrWriteByInstr(cpsrOrCondCodes, URb,
- 0xF, true, sctlr.nmfi);
+ cpsrWriteByInstr(cpsrOrCondCodes, URb, Scr, Nsacr,
+ 0xF, true, sctlr.nmfi, xc->tcBase());
Cpsr = ~CondCodesMask & new_cpsr;
NextThumb = new_cpsr.t;
NextJazelle = new_cpsr.j;
@@ -651,25 +677,37 @@ let {{
['IsMicroop'])
header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \
+ MicroIntImmDeclare.subst(microAddXiUopIop) + \
+ MicroIntImmDeclare.subst(microAddXiSpAlignUopIop) + \
MicroIntImmDeclare.subst(microSubiUopIop) + \
+ MicroIntImmDeclare.subst(microSubXiUopIop) + \
MicroIntRegDeclare.subst(microAddUopIop) + \
MicroIntRegDeclare.subst(microSubUopIop) + \
+ MicroIntXERegDeclare.subst(microAddXERegUopIop) + \
MicroIntMovDeclare.subst(microUopRegMovIop) + \
MicroIntMovDeclare.subst(microUopRegMovRetIop) + \
MicroSetPCCPSRDeclare.subst(microUopSetPCCPSRIop)
decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \
+ MicroIntImmXConstructor.subst(microAddXiUopIop) + \
+ MicroIntImmXConstructor.subst(microAddXiSpAlignUopIop) + \
MicroIntImmConstructor.subst(microSubiUopIop) + \
+ MicroIntImmXConstructor.subst(microSubXiUopIop) + \
MicroIntRegConstructor.subst(microAddUopIop) + \
MicroIntRegConstructor.subst(microSubUopIop) + \
+ MicroIntXERegConstructor.subst(microAddXERegUopIop) + \
MicroIntMovConstructor.subst(microUopRegMovIop) + \
MicroIntMovConstructor.subst(microUopRegMovRetIop) + \
MicroSetPCCPSRConstructor.subst(microUopSetPCCPSRIop)
exec_output = PredOpExecute.subst(microAddiUopIop) + \
+ BasicExecute.subst(microAddXiUopIop) + \
+ BasicExecute.subst(microAddXiSpAlignUopIop) + \
PredOpExecute.subst(microSubiUopIop) + \
+ BasicExecute.subst(microSubXiUopIop) + \
PredOpExecute.subst(microAddUopIop) + \
PredOpExecute.subst(microSubUopIop) + \
+ BasicExecute.subst(microAddXERegUopIop) + \
PredOpExecute.subst(microUopRegMovIop) + \
PredOpExecute.subst(microUopRegMovRetIop) + \
PredOpExecute.subst(microUopSetPCCPSRIop)
@@ -681,6 +719,25 @@ let {{
header_output = MacroMemDeclare.subst(iop)
decoder_output = MacroMemConstructor.subst(iop)
+ iop = InstObjParams("ldpstp", "LdpStp", 'PairMemOp', "", [])
+ header_output += PairMemDeclare.subst(iop)
+ decoder_output += PairMemConstructor.subst(iop)
+
+ iopImm = InstObjParams("bigfpmemimm", "BigFpMemImm", "BigFpMemImmOp", "")
+ iopPre = InstObjParams("bigfpmempre", "BigFpMemPre", "BigFpMemPreOp", "")
+ iopPost = InstObjParams("bigfpmempost", "BigFpMemPost", "BigFpMemPostOp", "")
+ for iop in (iopImm, iopPre, iopPost):
+ header_output += BigFpMemImmDeclare.subst(iop)
+ decoder_output += BigFpMemImmConstructor.subst(iop)
+
+ iop = InstObjParams("bigfpmemreg", "BigFpMemReg", "BigFpMemRegOp", "")
+ header_output += BigFpMemRegDeclare.subst(iop)
+ decoder_output += BigFpMemRegConstructor.subst(iop)
+
+ iop = InstObjParams("bigfpmemlit", "BigFpMemLit", "BigFpMemLitOp", "")
+ header_output += BigFpMemLitDeclare.subst(iop)
+ decoder_output += BigFpMemLitConstructor.subst(iop)
+
iop = InstObjParams("vldmult", "VldMult", 'VldMultOp', "", [])
header_output += VMemMultDeclare.subst(iop)
decoder_output += VMemMultConstructor.subst(iop)
diff --git a/src/arch/arm/isa/insts/mem.isa b/src/arch/arm/isa/insts/mem.isa
index c39f1b14f..aed6bab0d 100644
--- a/src/arch/arm/isa/insts/mem.isa
+++ b/src/arch/arm/isa/insts/mem.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2012 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -48,8 +48,8 @@ let {{
self.constructTemplate = eval(self.decConstBase + 'Constructor')
def fillTemplates(self, name, Name, codeBlobs, memFlags, instFlags,
- base = 'Memory', wbDecl = None, pcDecl = None,
- rasPop = False):
+ base='Memory', wbDecl=None, pcDecl=None,
+ rasPop=False, size=4, sign=False, faCode=None):
# Make sure flags are in lists (convert to lists if not).
memFlags = makeList(memFlags)
instFlags = makeList(instFlags)
@@ -63,6 +63,22 @@ let {{
codeBlobs["ea_code"] = eaCode
+ if faCode:
+ # For AArch64 the fa_code snippet comes already assembled here
+ codeBlobs["fa_code"] = faCode
+ elif wbDecl == None:
+ codeBlobs["fa_code"] = '''
+ if (dest != INTREG_PC) {
+ fault->annotate(ArmFault::SAS, %s);
+ fault->annotate(ArmFault::SSE, %s);
+ fault->annotate(ArmFault::SRT, dest);
+ }
+ ''' %("0" if size == 1 else
+ "1" if size == 2 else "2",
+ "true" if sign else "false")
+ else:
+ codeBlobs["fa_code"] = ''
+
macroName = Name
instFlagsCopy = list(instFlags)
codeBlobsCopy = dict(codeBlobs)
@@ -108,6 +124,7 @@ let {{
"use_uops" : use_uops,
"use_pc" : use_pc,
"use_wb" : use_wb,
+ "fa_code" : '',
"is_ras_pop" : is_ras_pop },
['IsMacroop'])
header_output += self.declareTemplate.subst(iop)
@@ -176,8 +193,13 @@ let {{
return Name
def buildMemSuffix(sign, size):
- if size == 4:
- memSuffix = ''
+ if size == 8:
+ memSuffix = '_ud'
+ elif size == 4:
+ if sign:
+ memSuffix = '_sw'
+ else:
+ memSuffix = '_uw'
elif size == 2:
if sign:
memSuffix = '_sh'
diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa
index b8425a240..678a125fb 100644
--- a/src/arch/arm/isa/insts/misc.isa
+++ b/src/arch/arm/isa/insts/misc.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010-2012 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -40,21 +40,102 @@
let {{
svcCode = '''
- if (FullSystem) {
- fault = new SupervisorCall;
- } else {
- fault = new SupervisorCall(machInst);
- }
+ fault = new SupervisorCall(machInst, imm);
'''
- svcIop = InstObjParams("svc", "Svc", "PredOp",
+ svcIop = InstObjParams("svc", "Svc", "ImmOp",
{ "code": svcCode,
"predicate_test": predicateTest },
["IsSyscall", "IsNonSpeculative", "IsSerializeAfter"])
- header_output = BasicDeclare.subst(svcIop)
- decoder_output = BasicConstructor.subst(svcIop)
+ header_output = ImmOpDeclare.subst(svcIop)
+ decoder_output = ImmOpConstructor.subst(svcIop)
exec_output = PredOpExecute.subst(svcIop)
+ smcCode = '''
+ HCR hcr = Hcr;
+ CPSR cpsr = Cpsr;
+ SCR scr = Scr;
+
+ if ((cpsr.mode != MODE_USER) && FullSystem) {
+ if (ArmSystem::haveVirtualization(xc->tcBase()) &&
+ !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) && hcr.tsc) {
+ fault = new HypervisorTrap(machInst, 0, EC_SMC_TO_HYP);
+ } else {
+ if (scr.scd) {
+ fault = disabledFault();
+ } else {
+ fault = new SecureMonitorCall(machInst);
+ }
+ }
+ } else {
+ fault = disabledFault();
+ }
+ '''
+
+ smcIop = InstObjParams("smc", "Smc", "PredOp",
+ { "code": smcCode,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative", "IsSerializeAfter"])
+ header_output += BasicDeclare.subst(smcIop)
+ decoder_output += BasicConstructor.subst(smcIop)
+ exec_output += PredOpExecute.subst(smcIop)
+
+ hvcCode = '''
+ CPSR cpsr = Cpsr;
+ SCR scr = Scr;
+
+ // Filter out the various cases where this instruction isn't defined
+ if (!FullSystem || !ArmSystem::haveVirtualization(xc->tcBase()) ||
+ (cpsr.mode == MODE_USER) ||
+ (ArmSystem::haveSecurity(xc->tcBase()) && (!scr.ns || !scr.hce))) {
+ fault = disabledFault();
+ } else {
+ fault = new HypervisorCall(machInst, imm);
+ }
+ '''
+
+ hvcIop = InstObjParams("hvc", "Hvc", "ImmOp",
+ { "code": hvcCode,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative", "IsSerializeAfter"])
+ header_output += ImmOpDeclare.subst(hvcIop)
+ decoder_output += ImmOpConstructor.subst(hvcIop)
+ exec_output += PredOpExecute.subst(hvcIop)
+
+ eretCode = '''
+ SCTLR sctlr = Sctlr;
+ CPSR old_cpsr = Cpsr;
+ old_cpsr.nz = CondCodesNZ;
+ old_cpsr.c = CondCodesC;
+ old_cpsr.v = CondCodesV;
+ old_cpsr.ge = CondCodesGE;
+
+ CPSR new_cpsr = cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF,
+ true, sctlr.nmfi, xc->tcBase());
+ Cpsr = ~CondCodesMask & new_cpsr;
+ CondCodesNZ = new_cpsr.nz;
+ CondCodesC = new_cpsr.c;
+ CondCodesV = new_cpsr.v;
+ CondCodesGE = new_cpsr.ge;
+
+ NextThumb = (new_cpsr).t;
+ NextJazelle = (new_cpsr).j;
+ NextItState = (((new_cpsr).it2 << 2) & 0xFC)
+ | ((new_cpsr).it1 & 0x3);
+
+ NPC = (old_cpsr.mode == MODE_HYP) ? ElrHyp : LR;
+ '''
+
+ eretIop = InstObjParams("eret", "Eret", "PredOp",
+ { "code": eretCode,
+ "predicate_test": predicateTest },
+ ["IsNonSpeculative", "IsSerializeAfter"])
+ header_output += BasicDeclare.subst(eretIop)
+ decoder_output += BasicConstructor.subst(eretIop)
+ exec_output += PredOpExecute.subst(eretIop)
+
+
+
}};
let {{
@@ -87,6 +168,59 @@ let {{
decoder_output += MrsConstructor.subst(mrsSpsrIop)
exec_output += PredOpExecute.subst(mrsSpsrIop)
+ mrsBankedRegCode = '''
+ bool isIntReg;
+ int regIdx;
+
+ if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) {
+ if (isIntReg) {
+ Dest = DecodedBankedIntReg;
+ } else {
+ Dest = xc->readMiscReg(regIdx);
+ }
+ } else {
+ return new UndefinedInstruction(machInst, false, mnemonic);
+ }
+ '''
+ mrsBankedRegIop = InstObjParams("mrs", "MrsBankedReg", "MrsOp",
+ { "code": mrsBankedRegCode,
+ "predicate_test": predicateTest },
+ ["IsSerializeBefore"])
+ header_output += MrsBankedRegDeclare.subst(mrsBankedRegIop)
+ decoder_output += MrsBankedRegConstructor.subst(mrsBankedRegIop)
+ exec_output += PredOpExecute.subst(mrsBankedRegIop)
+
+ msrBankedRegCode = '''
+ bool isIntReg;
+ int regIdx;
+
+ if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) {
+ if (isIntReg) {
+ // This is a bit nasty, you would have thought that
+ // DecodedBankedIntReg wouldn't be written to unless the
+ // conditions on the IF statements above are met, however if
+ // you look at the generated C code you'll find that they are.
+ // However this is safe as DecodedBankedIntReg (which is used
+ // in operands.isa to get the index of DecodedBankedIntReg)
+ // will return INTREG_DUMMY if its not a valid integer
+ // register, so redirecting the write to somewhere we don't
+ // care about.
+ DecodedBankedIntReg = Op1;
+ } else {
+ xc->setMiscReg(regIdx, Op1);
+ }
+ } else {
+ return new UndefinedInstruction(machInst, false, mnemonic);
+ }
+ '''
+ msrBankedRegIop = InstObjParams("msr", "MsrBankedReg", "MsrRegOp",
+ { "code": msrBankedRegCode,
+ "predicate_test": predicateTest },
+ ["IsSerializeAfter"])
+ header_output += MsrBankedRegDeclare.subst(msrBankedRegIop)
+ decoder_output += MsrBankedRegConstructor.subst(msrBankedRegIop)
+ exec_output += PredOpExecute.subst(msrBankedRegIop)
+
msrCpsrRegCode = '''
SCTLR sctlr = Sctlr;
CPSR old_cpsr = Cpsr;
@@ -96,7 +230,8 @@ let {{
old_cpsr.ge = CondCodesGE;
CPSR new_cpsr =
- cpsrWriteByInstr(old_cpsr, Op1, byteMask, false, sctlr.nmfi);
+ cpsrWriteByInstr(old_cpsr, Op1, Scr, Nsacr, byteMask, false,
+ sctlr.nmfi, xc->tcBase());
Cpsr = ~CondCodesMask & new_cpsr;
CondCodesNZ = new_cpsr.nz;
CondCodesC = new_cpsr.c;
@@ -128,7 +263,8 @@ let {{
old_cpsr.v = CondCodesV;
old_cpsr.ge = CondCodesGE;
CPSR new_cpsr =
- cpsrWriteByInstr(old_cpsr, imm, byteMask, false, sctlr.nmfi);
+ cpsrWriteByInstr(old_cpsr, imm, Scr, Nsacr, byteMask, false,
+ sctlr.nmfi, xc->tcBase());
Cpsr = ~CondCodesMask & new_cpsr;
CondCodesNZ = new_cpsr.nz;
CondCodesC = new_cpsr.c;
@@ -488,12 +624,10 @@ let {{
decoder_output += BasicConstructor.subst(bkptIop)
exec_output += BasicExecute.subst(bkptIop)
- nopIop = InstObjParams("nop", "NopInst", "PredOp", \
- { "code" : "", "predicate_test" : predicateTest },
- ['IsNop'])
+ nopIop = InstObjParams("nop", "NopInst", "ArmStaticInst", "", ['IsNop'])
header_output += BasicDeclare.subst(nopIop)
- decoder_output += BasicConstructor.subst(nopIop)
- exec_output += PredOpExecute.subst(nopIop)
+ decoder_output += BasicConstructor64.subst(nopIop)
+ exec_output += BasicExecute.subst(nopIop)
yieldIop = InstObjParams("yield", "YieldInst", "PredOp", \
{ "code" : "", "predicate_test" : predicateTest })
@@ -502,14 +636,31 @@ let {{
exec_output += PredOpExecute.subst(yieldIop)
wfeCode = '''
- // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending
+ HCR hcr = Hcr;
+ CPSR cpsr = Cpsr;
+ SCR scr = Scr64;
+ SCTLR sctlr = Sctlr;
+
+ // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending,
+ ThreadContext *tc = xc->tcBase();
if (SevMailbox == 1) {
SevMailbox = 0;
- PseudoInst::quiesceSkip(xc->tcBase());
- } else if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkInterrupts(xc->tcBase())) {
- PseudoInst::quiesceSkip(xc->tcBase());
+ PseudoInst::quiesceSkip(tc);
+ } else if (tc->getCpuPtr()->getInterruptController()->checkInterrupts(tc)) {
+ PseudoInst::quiesceSkip(tc);
+ } else if (cpsr.el == EL0 && !sctlr.ntwe) {
+ PseudoInst::quiesceSkip(tc);
+ fault = new SupervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE);
+ } else if (ArmSystem::haveVirtualization(tc) &&
+ !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) &&
+ hcr.twe) {
+ PseudoInst::quiesceSkip(tc);
+ fault = new HypervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE);
+ } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twe) {
+ PseudoInst::quiesceSkip(tc);
+ fault = new SecureMonitorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE);
} else {
- PseudoInst::quiesce(xc->tcBase());
+ PseudoInst::quiesce(tc);
}
'''
wfePredFixUpCode = '''
@@ -528,12 +679,30 @@ let {{
exec_output += QuiescePredOpExecuteWithFixup.subst(wfeIop)
wfiCode = '''
+ HCR hcr = Hcr;
+ CPSR cpsr = Cpsr;
+ SCR scr = Scr64;
+ SCTLR sctlr = Sctlr;
+
// WFI doesn't sleep if interrupts are pending (masked or not)
- if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkRaw()) {
- PseudoInst::quiesceSkip(xc->tcBase());
+ ThreadContext *tc = xc->tcBase();
+ if (tc->getCpuPtr()->getInterruptController()->checkWfiWake(hcr, cpsr,
+ scr)) {
+ PseudoInst::quiesceSkip(tc);
+ } else if (cpsr.el == EL0 && !sctlr.ntwi) {
+ PseudoInst::quiesceSkip(tc);
+ fault = new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE);
+ } else if (ArmSystem::haveVirtualization(tc) && hcr.twi &&
+ (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr)) {
+ PseudoInst::quiesceSkip(tc);
+ fault = new HypervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE);
+ } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twi) {
+ PseudoInst::quiesceSkip(tc);
+ fault = new SecureMonitorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE);
} else {
- PseudoInst::quiesce(xc->tcBase());
+ PseudoInst::quiesce(tc);
}
+ tc->getCpuPtr()->clearInterrupt(INT_ABT, 0);
'''
wfiIop = InstObjParams("wfi", "WfiInst", "PredOp", \
{ "code" : wfiCode, "predicate_test" : predicateTest },
@@ -564,6 +733,16 @@ let {{
decoder_output += BasicConstructor.subst(sevIop)
exec_output += PredOpExecute.subst(sevIop)
+ sevlCode = '''
+ SevMailbox = 1;
+ '''
+ sevlIop = InstObjParams("sevl", "SevlInst", "PredOp", \
+ { "code" : sevlCode, "predicate_test" : predicateTest },
+ ["IsNonSpeculative", "IsSquashAfter", "IsUnverifiable"])
+ header_output += BasicDeclare.subst(sevlIop)
+ decoder_output += BasicConstructor.subst(sevlIop)
+ exec_output += BasicExecute.subst(sevlIop)
+
itIop = InstObjParams("it", "ItInst", "PredOp", \
{ "code" : ";",
"predicate_test" : predicateTest }, [])
@@ -571,10 +750,7 @@ let {{
decoder_output += BasicConstructor.subst(itIop)
exec_output += PredOpExecute.subst(itIop)
unknownCode = '''
- if (FullSystem)
- return new UndefinedInstruction;
- else
- return new UndefinedInstruction(machInst, true);
+ return new UndefinedInstruction(machInst, true);
'''
unknownIop = InstObjParams("unknown", "Unknown", "UnknownOp", \
{ "code": unknownCode,
@@ -626,108 +802,152 @@ let {{
exec_output += PredOpExecute.subst(bfiIop)
mrc14code = '''
- CPSR cpsr = Cpsr;
- if (cpsr.mode == MODE_USER) {
- if (FullSystem)
- return new UndefinedInstruction;
- else
- return new UndefinedInstruction(false, mnemonic);
+ MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(op1);
+ if (!canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) {
+ return new UndefinedInstruction(machInst, false, mnemonic);
+ }
+ if (mcrMrc14TrapToHyp((const MiscRegIndex) op1, Hcr, Cpsr, Scr, Hdcr,
+ Hstr, Hcptr, imm)) {
+ return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC);
}
Dest = MiscOp1;
'''
- mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegOp",
+ mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegImmOp",
{ "code": mrc14code,
"predicate_test": predicateTest }, [])
- header_output += RegRegOpDeclare.subst(mrc14Iop)
- decoder_output += RegRegOpConstructor.subst(mrc14Iop)
+ header_output += RegRegImmOpDeclare.subst(mrc14Iop)
+ decoder_output += RegRegImmOpConstructor.subst(mrc14Iop)
exec_output += PredOpExecute.subst(mrc14Iop)
mcr14code = '''
- CPSR cpsr = Cpsr;
- if (cpsr.mode == MODE_USER) {
- if (FullSystem)
- return new UndefinedInstruction;
- else
- return new UndefinedInstruction(false, mnemonic);
+ MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest);
+ if (!canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) {
+ return new UndefinedInstruction(machInst, false, mnemonic);
+ }
+ if (mcrMrc14TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr,
+ Hstr, Hcptr, imm)) {
+ return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC);
}
MiscDest = Op1;
'''
- mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegOp",
+ mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegImmOp",
{ "code": mcr14code,
"predicate_test": predicateTest },
["IsSerializeAfter","IsNonSpeculative"])
- header_output += RegRegOpDeclare.subst(mcr14Iop)
- decoder_output += RegRegOpConstructor.subst(mcr14Iop)
+ header_output += RegRegImmOpDeclare.subst(mcr14Iop)
+ decoder_output += RegRegImmOpConstructor.subst(mcr14Iop)
exec_output += PredOpExecute.subst(mcr14Iop)
- mrc14UserIop = InstObjParams("mrc", "Mrc14User", "RegRegOp",
- { "code": "Dest = MiscOp1;",
- "predicate_test": predicateTest }, [])
- header_output += RegRegOpDeclare.subst(mrc14UserIop)
- decoder_output += RegRegOpConstructor.subst(mrc14UserIop)
- exec_output += PredOpExecute.subst(mrc14UserIop)
-
- mcr14UserIop = InstObjParams("mcr", "Mcr14User", "RegRegOp",
- { "code": "MiscDest = Op1",
- "predicate_test": predicateTest },
- ["IsSerializeAfter","IsNonSpeculative"])
- header_output += RegRegOpDeclare.subst(mcr14UserIop)
- decoder_output += RegRegOpConstructor.subst(mcr14UserIop)
- exec_output += PredOpExecute.subst(mcr14UserIop)
-
mrc15code = '''
- CPSR cpsr = Cpsr;
- if (cpsr.mode == MODE_USER) {
- if (FullSystem)
- return new UndefinedInstruction;
- else
- return new UndefinedInstruction(false, mnemonic);
+ int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase());
+ MiscRegIndex miscReg = (MiscRegIndex)
+ xc->tcBase()->flattenMiscIndex(preFlatOp1);
+ bool hypTrap = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr,
+ Hcptr, imm);
+ bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase());
+
+ // if we're in non secure PL1 mode then we can trap regargless of whether
+ // the register is accessable, in other modes we trap if only if the register
+ // IS accessable.
+ if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) {
+ return new UndefinedInstruction(machInst, false, mnemonic);
}
- Dest = MiscOp1;
+ if (hypTrap) {
+ return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC);
+ }
+ Dest = MiscNsBankedOp1;
'''
- mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegOp",
+ mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegImmOp",
{ "code": mrc15code,
"predicate_test": predicateTest }, [])
- header_output += RegRegOpDeclare.subst(mrc15Iop)
- decoder_output += RegRegOpConstructor.subst(mrc15Iop)
+ header_output += RegRegImmOpDeclare.subst(mrc15Iop)
+ decoder_output += RegRegImmOpConstructor.subst(mrc15Iop)
exec_output += PredOpExecute.subst(mrc15Iop)
mcr15code = '''
- CPSR cpsr = Cpsr;
- if (cpsr.mode == MODE_USER) {
- if (FullSystem)
- return new UndefinedInstruction;
- else
- return new UndefinedInstruction(false, mnemonic);
+ int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase());
+ MiscRegIndex miscReg = (MiscRegIndex)
+ xc->tcBase()->flattenMiscIndex(preFlatDest);
+ bool hypTrap = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr,
+ Hcptr, imm);
+ bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase());
+
+ // if we're in non secure PL1 mode then we can trap regargless of whether
+ // the register is accessable, in other modes we trap if only if the register
+ // IS accessable.
+ if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) {
+ return new UndefinedInstruction(machInst, false, mnemonic);
}
- MiscDest = Op1;
+ if (hypTrap) {
+ return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC);
+ }
+ MiscNsBankedDest = Op1;
'''
- mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegOp",
+ mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegImmOp",
{ "code": mcr15code,
"predicate_test": predicateTest },
["IsSerializeAfter","IsNonSpeculative"])
- header_output += RegRegOpDeclare.subst(mcr15Iop)
- decoder_output += RegRegOpConstructor.subst(mcr15Iop)
+ header_output += RegRegImmOpDeclare.subst(mcr15Iop)
+ decoder_output += RegRegImmOpConstructor.subst(mcr15Iop)
exec_output += PredOpExecute.subst(mcr15Iop)
- mrc15UserIop = InstObjParams("mrc", "Mrc15User", "RegRegOp",
- { "code": "Dest = MiscOp1;",
- "predicate_test": predicateTest }, [])
- header_output += RegRegOpDeclare.subst(mrc15UserIop)
- decoder_output += RegRegOpConstructor.subst(mrc15UserIop)
- exec_output += PredOpExecute.subst(mrc15UserIop)
-
- mcr15UserIop = InstObjParams("mcr", "Mcr15User", "RegRegOp",
- { "code": "MiscDest = Op1",
- "predicate_test": predicateTest },
- ["IsSerializeAfter","IsNonSpeculative"])
- header_output += RegRegOpDeclare.subst(mcr15UserIop)
- decoder_output += RegRegOpConstructor.subst(mcr15UserIop)
- exec_output += PredOpExecute.subst(mcr15UserIop)
+
+ mrrc15code = '''
+ int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase());
+ MiscRegIndex miscReg = (MiscRegIndex)
+ xc->tcBase()->flattenMiscIndex(preFlatOp1);
+ bool hypTrap = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm);
+ bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase());
+
+ // if we're in non secure PL1 mode then we can trap regargless of whether
+ // the register is accessable, in other modes we trap if only if the register
+ // IS accessable.
+ if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) {
+ return new UndefinedInstruction(machInst, false, mnemonic);
+ }
+ if (hypTrap) {
+ return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC);
+ }
+ Dest = bits(MiscNsBankedOp164, 63, 32);
+ Dest2 = bits(MiscNsBankedOp164, 31, 0);
+ '''
+ mrrc15Iop = InstObjParams("mrrc", "Mrrc15", "MrrcOp",
+ { "code": mrrc15code,
+ "predicate_test": predicateTest }, [])
+ header_output += MrrcOpDeclare.subst(mrrc15Iop)
+ decoder_output += MrrcOpConstructor.subst(mrrc15Iop)
+ exec_output += PredOpExecute.subst(mrrc15Iop)
+
+
+ mcrr15code = '''
+ int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase());
+ MiscRegIndex miscReg = (MiscRegIndex)
+ xc->tcBase()->flattenMiscIndex(preFlatDest);
+ bool hypTrap = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm);
+ bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase());
+
+ // if we're in non secure PL1 mode then we can trap regargless of whether
+ // the register is accessable, in other modes we trap if only if the register
+ // IS accessable.
+ if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) {
+ return new UndefinedInstruction(machInst, false, mnemonic);
+ }
+ if (hypTrap) {
+ return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC);
+ }
+ MiscNsBankedDest64 = ((uint64_t) Op1 << 32) | Op2;
+ '''
+ mcrr15Iop = InstObjParams("mcrr", "Mcrr15", "McrrOp",
+ { "code": mcrr15code,
+ "predicate_test": predicateTest }, [])
+ header_output += McrrOpDeclare.subst(mcrr15Iop)
+ decoder_output += McrrOpConstructor.subst(mcrr15Iop)
+ exec_output += PredOpExecute.subst(mcrr15Iop)
+
enterxCode = '''
NextThumb = true;
@@ -775,35 +995,53 @@ let {{
exec_output += PredOpExecute.subst(clrexIop)
isbCode = '''
+ // If the barrier is due to a CP15 access check for hyp traps
+ if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15ISB, Hcr, Cpsr, Scr,
+ Hdcr, Hstr, Hcptr, imm)) {
+ return new HypervisorTrap(machInst, imm,
+ EC_TRAPPED_CP15_MCR_MRC);
+ }
fault = new FlushPipe;
'''
- isbIop = InstObjParams("isb", "Isb", "PredOp",
+ isbIop = InstObjParams("isb", "Isb", "ImmOp",
{"code": isbCode,
"predicate_test": predicateTest},
['IsSerializeAfter'])
- header_output += BasicDeclare.subst(isbIop)
- decoder_output += BasicConstructor.subst(isbIop)
+ header_output += ImmOpDeclare.subst(isbIop)
+ decoder_output += ImmOpConstructor.subst(isbIop)
exec_output += PredOpExecute.subst(isbIop)
dsbCode = '''
+ // If the barrier is due to a CP15 access check for hyp traps
+ if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DSB, Hcr, Cpsr, Scr,
+ Hdcr, Hstr, Hcptr, imm)) {
+ return new HypervisorTrap(machInst, imm,
+ EC_TRAPPED_CP15_MCR_MRC);
+ }
fault = new FlushPipe;
'''
- dsbIop = InstObjParams("dsb", "Dsb", "PredOp",
+ dsbIop = InstObjParams("dsb", "Dsb", "ImmOp",
{"code": dsbCode,
"predicate_test": predicateTest},
['IsMemBarrier', 'IsSerializeAfter'])
- header_output += BasicDeclare.subst(dsbIop)
- decoder_output += BasicConstructor.subst(dsbIop)
+ header_output += ImmOpDeclare.subst(dsbIop)
+ decoder_output += ImmOpConstructor.subst(dsbIop)
exec_output += PredOpExecute.subst(dsbIop)
dmbCode = '''
+ // If the barrier is due to a CP15 access check for hyp traps
+ if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DMB, Hcr, Cpsr, Scr,
+ Hdcr, Hstr, Hcptr, imm)) {
+ return new HypervisorTrap(machInst, imm,
+ EC_TRAPPED_CP15_MCR_MRC);
+ }
'''
- dmbIop = InstObjParams("dmb", "Dmb", "PredOp",
+ dmbIop = InstObjParams("dmb", "Dmb", "ImmOp",
{"code": dmbCode,
"predicate_test": predicateTest},
['IsMemBarrier'])
- header_output += BasicDeclare.subst(dmbIop)
- decoder_output += BasicConstructor.subst(dmbIop)
+ header_output += ImmOpDeclare.subst(dmbIop)
+ decoder_output += ImmOpConstructor.subst(dmbIop)
exec_output += PredOpExecute.subst(dmbIop)
dbgCode = '''
diff --git a/src/arch/arm/isa/insts/misc64.isa b/src/arch/arm/isa/insts/misc64.isa
new file mode 100644
index 000000000..6ebbcc2ba
--- /dev/null
+++ b/src/arch/arm/isa/insts/misc64.isa
@@ -0,0 +1,147 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+let {{
+ svcCode = '''
+ fault = new SupervisorCall(machInst, bits(machInst, 20, 5));
+ '''
+
+ svcIop = InstObjParams("svc", "Svc64", "ArmStaticInst",
+ svcCode, ["IsSyscall", "IsNonSpeculative",
+ "IsSerializeAfter"])
+ header_output = BasicDeclare.subst(svcIop)
+ decoder_output = BasicConstructor64.subst(svcIop)
+ exec_output = BasicExecute.subst(svcIop)
+
+ # @todo: extend to take into account Virtualization.
+ smcCode = '''
+ SCR scr = Scr64;
+ CPSR cpsr = Cpsr;
+
+ if (!ArmSystem::haveSecurity(xc->tcBase()) || inUserMode(cpsr) || scr.smd) {
+ fault = disabledFault();
+ } else {
+ fault = new SecureMonitorCall(machInst);
+ }
+ '''
+
+ smcIop = InstObjParams("smc", "Smc64", "ArmStaticInst",
+ smcCode, ["IsNonSpeculative", "IsSerializeAfter"])
+ header_output += BasicDeclare.subst(smcIop)
+ decoder_output += BasicConstructor64.subst(smcIop)
+ exec_output += BasicExecute.subst(smcIop)
+
+ def subst(templateBase, iop):
+ global header_output, decoder_output, exec_output
+ header_output += eval(templateBase + "Declare").subst(iop)
+ decoder_output += eval(templateBase + "Constructor").subst(iop)
+ exec_output += BasicExecute.subst(iop)
+
+ bfmMaskCode = '''
+ uint64_t bitMask;
+ int diff = imm2 - imm1;
+ if (imm1 <= imm2) {
+ bitMask = mask(diff + 1);
+ } else {
+ bitMask = mask(imm2 + 1);
+ bitMask = (bitMask >> imm1) | (bitMask << (intWidth - imm1));
+ diff += intWidth;
+ }
+ uint64_t topBits M5_VAR_USED = ~mask(diff+1);
+ uint64_t result = (Op164 >> imm1) | (Op164 << (intWidth - imm1));
+ result &= bitMask;
+ '''
+
+ bfmCode = bfmMaskCode + 'Dest64 = result | (Dest64 & ~bitMask);'
+ bfmIop = InstObjParams("bfm", "Bfm64", "RegRegImmImmOp64", bfmCode);
+ subst("RegRegImmImmOp64", bfmIop)
+
+ ubfmCode = bfmMaskCode + 'Dest64 = result;'
+ ubfmIop = InstObjParams("ubfm", "Ubfm64", "RegRegImmImmOp64", ubfmCode);
+ subst("RegRegImmImmOp64", ubfmIop)
+
+ sbfmCode = bfmMaskCode + \
+ 'Dest64 = result | (bits(Op164, imm2) ? topBits : 0);'
+ sbfmIop = InstObjParams("sbfm", "Sbfm64", "RegRegImmImmOp64", sbfmCode);
+ subst("RegRegImmImmOp64", sbfmIop)
+
+ extrCode = '''
+ if (imm == 0) {
+ Dest64 = Op264;
+ } else {
+ Dest64 = (Op164 << (intWidth - imm)) | (Op264 >> imm);
+ }
+ '''
+ extrIop = InstObjParams("extr", "Extr64", "RegRegRegImmOp64", extrCode);
+ subst("RegRegRegImmOp64", extrIop);
+
+ unknownCode = '''
+ return new UndefinedInstruction(machInst, true);
+ '''
+ unknown64Iop = InstObjParams("unknown", "Unknown64", "UnknownOp64",
+ unknownCode)
+ header_output += BasicDeclare.subst(unknown64Iop)
+ decoder_output += BasicConstructor64.subst(unknown64Iop)
+ exec_output += BasicExecute.subst(unknown64Iop)
+
+ isbIop = InstObjParams("isb", "Isb64", "ArmStaticInst",
+ "fault = new FlushPipe;", ['IsSerializeAfter'])
+ header_output += BasicDeclare.subst(isbIop)
+ decoder_output += BasicConstructor64.subst(isbIop)
+ exec_output += BasicExecute.subst(isbIop)
+
+ dsbIop = InstObjParams("dsb", "Dsb64", "ArmStaticInst",
+ "fault = new FlushPipe;",
+ ['IsMemBarrier', 'IsSerializeAfter'])
+ header_output += BasicDeclare.subst(dsbIop)
+ decoder_output += BasicConstructor64.subst(dsbIop)
+ exec_output += BasicExecute.subst(dsbIop)
+
+ dmbIop = InstObjParams("dmb", "Dmb64", "ArmStaticInst", "",
+ ['IsMemBarrier'])
+ header_output += BasicDeclare.subst(dmbIop)
+ decoder_output += BasicConstructor64.subst(dmbIop)
+ exec_output += BasicExecute.subst(dmbIop)
+
+ clrexIop = InstObjParams("clrex", "Clrex64", "ArmStaticInst",
+ "LLSCLock = 0;")
+ header_output += BasicDeclare.subst(clrexIop)
+ decoder_output += BasicConstructor64.subst(clrexIop)
+ exec_output += BasicExecute.subst(clrexIop)
+}};
diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa
index 876bb3bb7..ca5c3038c 100644
--- a/src/arch/arm/isa/insts/neon.isa
+++ b/src/arch/arm/isa/insts/neon.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -94,8 +94,8 @@ output header {{
template <template <typename T> class Base>
StaticInstPtr
decodeNeonUThreeUSReg(unsigned size,
- ExtMachInst machInst, IntRegIndex dest,
- IntRegIndex op1, IntRegIndex op2)
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
{
switch (size) {
case 0:
@@ -112,8 +112,8 @@ output header {{
template <template <typename T> class Base>
StaticInstPtr
decodeNeonSThreeUSReg(unsigned size,
- ExtMachInst machInst, IntRegIndex dest,
- IntRegIndex op1, IntRegIndex op2)
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
{
switch (size) {
case 0:
@@ -129,6 +129,38 @@ output header {{
template <template <typename T> class Base>
StaticInstPtr
+ decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1,
+ IntRegIndex op2)
+ {
+ switch (size) {
+ case 1:
+ return new Base<int16_t>(machInst, dest, op1, op2);
+ case 2:
+ return new Base<int32_t>(machInst, dest, op1, op2);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1,
+ IntRegIndex op2, uint64_t imm)
+ {
+ switch (size) {
+ case 1:
+ return new Base<int16_t>(machInst, dest, op1, op2, imm);
+ case 2:
+ return new Base<int32_t>(machInst, dest, op1, op2, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
ExtMachInst machInst, IntRegIndex dest,
IntRegIndex op1, IntRegIndex op2)
@@ -177,6 +209,38 @@ output header {{
template <template <typename T> class BaseD,
template <typename T> class BaseQ>
StaticInstPtr
+ decodeNeonSThreeXReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ if (q) {
+ return decodeNeonSThreeUReg<BaseQ>(
+ size, machInst, dest, op1, op2);
+ } else {
+ return decodeNeonSThreeUSReg<BaseD>(
+ size, machInst, dest, op1, op2);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUThreeXReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ if (q) {
+ return decodeNeonUThreeUReg<BaseQ>(
+ size, machInst, dest, op1, op2);
+ } else {
+ return decodeNeonUThreeUSReg<BaseD>(
+ size, machInst, dest, op1, op2);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
ExtMachInst machInst, IntRegIndex dest,
IntRegIndex op1, IntRegIndex op2)
@@ -241,6 +305,124 @@ output header {{
template <template <typename T> class BaseD,
template <typename T> class BaseQ>
StaticInstPtr
+ decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
+ {
+ if (q) {
+ if (size)
+ return new BaseQ<uint64_t>(machInst, dest, op1, op2);
+ else
+ return new BaseQ<uint32_t>(machInst, dest, op1, op2);
+ } else {
+ if (size)
+ return new Unknown(machInst);
+ else
+ return new BaseD<uint32_t>(machInst, dest, op1, op2);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
+ {
+ if (size)
+ return new Base<uint64_t>(machInst, dest, op1, op2);
+ else
+ return new Base<uint32_t>(machInst, dest, op1, op2);
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1,
+ IntRegIndex op2, uint64_t imm)
+ {
+ if (size)
+ return new Base<uint64_t>(machInst, dest, op1, op2, imm);
+ else
+ return new Base<uint32_t>(machInst, dest, op1, op2, imm);
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1,
+ IntRegIndex op2, uint64_t imm)
+ {
+ if (q) {
+ switch (size) {
+ case 1:
+ return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
+ case 2:
+ return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
+ case 2:
+ return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1,
+ IntRegIndex op2, uint64_t imm)
+ {
+ if (q) {
+ switch (size) {
+ case 1:
+ return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
+ case 2:
+ return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
+ case 2:
+ return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1,
+ IntRegIndex op2, uint64_t imm)
+ {
+ if (q) {
+ if (size)
+ return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
+ else
+ return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
+ } else {
+ if (size)
+ return new Unknown(machInst);
+ else
+ return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
decodeNeonUTwoShiftReg(bool q, unsigned size,
ExtMachInst machInst, IntRegIndex dest,
IntRegIndex op1, uint64_t imm)
@@ -345,6 +527,46 @@ output header {{
}
}
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUTwoShiftUReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, uint64_t imm)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint8_t>(machInst, dest, op1, imm);
+ case 1:
+ return new Base<uint16_t>(machInst, dest, op1, imm);
+ case 2:
+ return new Base<uint32_t>(machInst, dest, op1, imm);
+ case 3:
+ return new Base<uint64_t>(machInst, dest, op1, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonSTwoShiftUReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, uint64_t imm)
+ {
+ switch (size) {
+ case 0:
+ return new Base<int8_t>(machInst, dest, op1, imm);
+ case 1:
+ return new Base<int16_t>(machInst, dest, op1, imm);
+ case 2:
+ return new Base<int32_t>(machInst, dest, op1, imm);
+ case 3:
+ return new Base<int64_t>(machInst, dest, op1, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
template <template <typename T> class BaseD,
template <typename T> class BaseQ>
StaticInstPtr
@@ -411,6 +633,66 @@ output header {{
}
}
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1, uint64_t imm)
+ {
+ if (q) {
+ return decodeNeonUTwoShiftUReg<BaseQ>(
+ size, machInst, dest, op1, imm);
+ } else {
+ return decodeNeonUTwoShiftUSReg<BaseD>(
+ size, machInst, dest, op1, imm);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1, uint64_t imm)
+ {
+ if (q) {
+ return decodeNeonSTwoShiftUReg<BaseQ>(
+ size, machInst, dest, op1, imm);
+ } else {
+ return decodeNeonSTwoShiftUSReg<BaseD>(
+ size, machInst, dest, op1, imm);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1, uint64_t imm)
+ {
+ if (size)
+ return new Base<uint64_t>(machInst, dest, op1, imm);
+ else
+ return new Base<uint32_t>(machInst, dest, op1, imm);
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1, uint64_t imm)
+ {
+ if (q) {
+ if (size)
+ return new BaseQ<uint64_t>(machInst, dest, op1, imm);
+ else
+ return new BaseQ<uint32_t>(machInst, dest, op1, imm);
+ } else {
+ if (size)
+ return new Unknown(machInst);
+ else
+ return new BaseD<uint32_t>(machInst, dest, op1, imm);
+ }
+ }
+
template <template <typename T> class Base>
StaticInstPtr
decodeNeonUTwoMiscUSReg(unsigned size,
@@ -451,8 +733,8 @@ output header {{
template <typename T> class BaseQ>
StaticInstPtr
decodeNeonUTwoMiscSReg(bool q, unsigned size,
- ExtMachInst machInst, IntRegIndex dest,
- IntRegIndex op1)
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
{
if (q) {
return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
@@ -465,8 +747,8 @@ output header {{
template <typename T> class BaseQ>
StaticInstPtr
decodeNeonSTwoMiscSReg(bool q, unsigned size,
- ExtMachInst machInst, IntRegIndex dest,
- IntRegIndex op1)
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
{
if (q) {
return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
@@ -498,8 +780,8 @@ output header {{
template <template <typename T> class Base>
StaticInstPtr
decodeNeonSTwoMiscUReg(unsigned size,
- ExtMachInst machInst, IntRegIndex dest,
- IntRegIndex op1)
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
{
switch (size) {
case 0:
@@ -559,6 +841,221 @@ output header {{
}
}
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1)
+ {
+ if (q) {
+ return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
+ } else {
+ return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1)
+ {
+ if (q) {
+ return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
+ } else {
+ return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1)
+ {
+ if (q) {
+ if (size)
+ return new BaseQ<uint64_t>(machInst, dest, op1);
+ else
+ return new BaseQ<uint32_t>(machInst, dest, op1);
+ } else {
+ if (size)
+ return new Unknown(machInst);
+ else
+ return new BaseD<uint32_t>(machInst, dest, op1);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1)
+ {
+ if (size)
+ return new BaseQ<uint64_t>(machInst, dest, op1);
+ else
+ return new BaseD<uint32_t>(machInst, dest, op1);
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1)
+ {
+ if (size)
+ return new Base<uint64_t>(machInst, dest, op1);
+ else
+ return new Base<uint32_t>(machInst, dest, op1);
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1)
+ {
+ if (q) {
+ switch (size) {
+ case 0x0:
+ return new BaseQ<uint8_t>(machInst, dest, op1);
+ case 0x1:
+ return new BaseQ<uint16_t>(machInst, dest, op1);
+ case 0x2:
+ return new BaseQ<uint32_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 0x0:
+ return new BaseD<uint8_t>(machInst, dest, op1);
+ case 0x1:
+ return new BaseD<uint16_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ,
+ template <typename T> class BaseBQ>
+ StaticInstPtr
+ decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1)
+ {
+ if (q) {
+ switch (size) {
+ case 0x0:
+ return new BaseQ<uint8_t>(machInst, dest, op1);
+ case 0x1:
+ return new BaseQ<uint16_t>(machInst, dest, op1);
+ case 0x2:
+ return new BaseBQ<uint32_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 0x0:
+ return new BaseD<uint8_t>(machInst, dest, op1);
+ case 0x1:
+ return new BaseD<uint16_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1)
+ {
+ if (q) {
+ switch (size) {
+ case 0x0:
+ return new BaseQ<int8_t>(machInst, dest, op1);
+ case 0x1:
+ return new BaseQ<int16_t>(machInst, dest, op1);
+ case 0x2:
+ return new BaseQ<int32_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 0x0:
+ return new BaseD<int8_t>(machInst, dest, op1);
+ case 0x1:
+ return new BaseD<int16_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ,
+ template <typename T> class BaseBQ>
+ StaticInstPtr
+ decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1)
+ {
+ if (q) {
+ switch (size) {
+ case 0x0:
+ return new BaseQ<uint8_t>(machInst, dest, op1);
+ case 0x1:
+ return new BaseQ<uint16_t>(machInst, dest, op1);
+ case 0x2:
+ return new BaseBQ<uint32_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 0x0:
+ return new BaseD<uint8_t>(machInst, dest, op1);
+ case 0x1:
+ return new BaseD<uint16_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ,
+ template <typename T> class BaseBQ>
+ StaticInstPtr
+ decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
+ IntRegIndex dest, IntRegIndex op1)
+ {
+ if (q) {
+ switch (size) {
+ case 0x0:
+ return new BaseQ<int8_t>(machInst, dest, op1);
+ case 0x1:
+ return new BaseQ<int16_t>(machInst, dest, op1);
+ case 0x2:
+ return new BaseBQ<int32_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 0x0:
+ return new BaseD<int8_t>(machInst, dest, op1);
+ case 0x1:
+ return new BaseD<int16_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+ }
}};
output exec {{
@@ -872,10 +1369,7 @@ let {{
readDestCode = 'destElem = gtoh(destReg.elements[i]);'
eWalkCode += '''
if (imm < 0 && imm >= eCount) {
- if (FullSystem)
- fault = new UndefinedInstruction;
- else
- fault = new UndefinedInstruction(false, mnemonic);
+ fault = new UndefinedInstruction(machInst, false, mnemonic);
} else {
for (unsigned i = 0; i < eCount; i++) {
Element srcElem1 = gtoh(srcReg1.elements[i]);
@@ -926,10 +1420,7 @@ let {{
readDestCode = 'destElem = gtoh(destReg.elements[i]);'
eWalkCode += '''
if (imm < 0 && imm >= eCount) {
- if (FullSystem)
- fault = new UndefinedInstruction;
- else
- fault = new UndefinedInstruction(false, mnemonic);
+ fault = new UndefinedInstruction(machInst, false, mnemonic);
} else {
for (unsigned i = 0; i < eCount; i++) {
Element srcElem1 = gtoh(srcReg1.elements[i]);
@@ -978,10 +1469,7 @@ let {{
readDestCode = 'destReg = destRegs[i];'
eWalkCode += '''
if (imm < 0 && imm >= eCount) {
- if (FullSystem)
- fault = new UndefinedInstruction;
- else
- fault = new UndefinedInstruction(false, mnemonic);
+ fault = new UndefinedInstruction(machInst, false, mnemonic);
} else {
for (unsigned i = 0; i < rCount; i++) {
FloatReg srcReg1 = srcRegs1[i];
@@ -2156,7 +2644,7 @@ let {{
bool done;
destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
if (!done) {
- destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
+ destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
true, true, VfpRoundNearest);
} else if (flushToZero(srcReg1, srcReg2)) {
fpscr.idc = 1;
@@ -2171,7 +2659,7 @@ let {{
bool done;
destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
if (!done) {
- destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
+ destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
true, true, VfpRoundNearest);
} else if (flushToZero(srcReg1, srcReg2)) {
fpscr.idc = 1;
@@ -2234,6 +2722,24 @@ let {{
threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
+ vfmafpCode = '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
+ true, true, VfpRoundNearest);
+ FpscrExc = fpscr;
+ '''
+ threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
+ threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
+
+ vfmsfpCode = '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
+ true, true, VfpRoundNearest);
+ FpscrExc = fpscr;
+ '''
+ threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
+ threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
+
vmlsfpCode = '''
FPSCR fpscr = (FPSCR) FpscrExc;
float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
@@ -2765,7 +3271,7 @@ let {{
fpscr.idc = 1;
VfpSavedState state = prepFpState(VfpRoundNearest);
__asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
- destReg = vfpFpSToFixed(srcElem1, false, false, imm);
+ destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
__asm__ __volatile__("" :: "m" (destReg));
finishVfp(fpscr, state, true);
FpscrExc = fpscr;
@@ -2781,7 +3287,7 @@ let {{
fpscr.idc = 1;
VfpSavedState state = prepFpState(VfpRoundNearest);
__asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
- destReg = vfpFpSToFixed(srcElem1, true, false, imm);
+ destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
__asm__ __volatile__("" :: "m" (destReg));
finishVfp(fpscr, state, true);
FpscrExc = fpscr;
@@ -2795,7 +3301,7 @@ let {{
FPSCR fpscr = (FPSCR) FpscrExc;
VfpSavedState state = prepFpState(VfpRoundNearest);
__asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
- destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
+ destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
__asm__ __volatile__("" :: "m" (destElem));
finishVfp(fpscr, state, true);
FpscrExc = fpscr;
@@ -2809,7 +3315,7 @@ let {{
FPSCR fpscr = (FPSCR) FpscrExc;
VfpSavedState state = prepFpState(VfpRoundNearest);
__asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
- destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
+ destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
__asm__ __volatile__("" :: "m" (destElem));
finishVfp(fpscr, state, true);
FpscrExc = fpscr;
@@ -3296,10 +3802,7 @@ let {{
} else {
index -= eCount;
if (index >= eCount) {
- if (FullSystem)
- fault = new UndefinedInstruction;
- else
- fault = new UndefinedInstruction(false, mnemonic);
+ fault = new UndefinedInstruction(machInst, false, mnemonic);
} else {
destReg.elements[i] = srcReg2.elements[index];
}
diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa
new file mode 100644
index 000000000..e065761f4
--- /dev/null
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -0,0 +1,3355 @@
+// -*- mode: c++ -*-
+
+// Copyright (c) 2012-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Giacomo Gabrielli
+// Mbou Eyole
+
+let {{
+
+ header_output = ""
+ exec_output = ""
+
+ # FP types (FP operations always work with unsigned representations)
+ floatTypes = ("uint32_t", "uint64_t")
+ smallFloatTypes = ("uint32_t",)
+
+ def threeEqualRegInstX(name, Name, opClass, types, rCount, op,
+ readDest=False, pairwise=False, scalar=False,
+ byElem=False):
+ assert (not pairwise) or ((not byElem) and (not scalar))
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ RegVect srcReg1, destReg;
+ '''
+ if byElem:
+ # 2nd register operand has to be read fully
+ eWalkCode += '''
+ FullRegVect srcReg2;
+ '''
+ else:
+ eWalkCode += '''
+ RegVect srcReg2;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+ srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+ ''' % { "reg" : reg }
+ if byElem:
+ # 2nd operand has to be read fully
+ for reg in range(rCount, 4):
+ eWalkCode += '''
+ srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ if pairwise:
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ Element srcElem1 = gtoh(2 * i < eCount ?
+ srcReg1.elements[2 * i] :
+ srcReg2.elements[2 * i - eCount]);
+ Element srcElem2 = gtoh(2 * i < eCount ?
+ srcReg1.elements[2 * i + 1] :
+ srcReg2.elements[2 * i + 1 - eCount]);
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ else:
+ scalarCheck = '''
+ if (i != 0) {
+ destReg.elements[i] = 0;
+ continue;
+ }
+ '''
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ %(scalarCheck)s
+ Element srcElem1 = gtoh(srcReg1.elements[i]);
+ Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]);
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode,
+ "scalarCheck" : scalarCheck if scalar else "",
+ "src2Index" : "imm" if byElem else "i" }
+ for reg in range(rCount):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ if rCount < 4: # zero upper half
+ for reg in range(rCount, 4):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX2RegImmOp" if byElem else "DataX2RegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ if byElem:
+ header_output += NeonX2RegImmOpDeclare.subst(iop)
+ else:
+ header_output += NeonX2RegOpDeclare.subst(iop)
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def threeUnequalRegInstX(name, Name, opClass, types, op,
+ bigSrc1, bigSrc2, bigDest, readDest, scalar=False,
+ byElem=False, hi=False):
+ assert not (scalar and hi)
+ global header_output, exec_output
+ src1Cnt = src2Cnt = destCnt = 2
+ src1Prefix = src2Prefix = destPrefix = ''
+ if bigSrc1:
+ src1Cnt = 4
+ src1Prefix = 'Big'
+ if bigSrc2:
+ src2Cnt = 4
+ src2Prefix = 'Big'
+ if bigDest:
+ destCnt = 4
+ destPrefix = 'Big'
+ if byElem:
+ src2Prefix = 'Full'
+ eWalkCode = simd64EnabledCheckCode + '''
+ %sRegVect srcReg1;
+ %sRegVect srcReg2;
+ %sRegVect destReg;
+ ''' % (src1Prefix, src2Prefix, destPrefix)
+ srcReg1 = 0
+ if hi and not bigSrc1: # long/widening operations
+ srcReg1 = 2
+ for reg in range(src1Cnt):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw);
+ ''' % { "reg" : reg, "srcReg1" : srcReg1 }
+ srcReg1 += 1
+ srcReg2 = 0
+ if (not byElem) and (hi and not bigSrc2): # long/widening operations
+ srcReg2 = 2
+ for reg in range(src2Cnt):
+ eWalkCode += '''
+ srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw);
+ ''' % { "reg" : reg, "srcReg2" : srcReg2 }
+ srcReg2 += 1
+ if byElem:
+ # 2nd operand has to be read fully
+ for reg in range(src2Cnt, 4):
+ eWalkCode += '''
+ srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ for reg in range(destCnt):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ scalarCheck = '''
+ if (i != 0) {
+ destReg.elements[i] = 0;
+ continue;
+ }
+ '''
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ %(scalarCheck)s
+ %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
+ %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]);
+ %(destPrefix)sElement destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode,
+ "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
+ "destPrefix" : destPrefix,
+ "scalarCheck" : scalarCheck if scalar else "",
+ "src2Index" : "imm" if byElem else "i" }
+ destReg = 0
+ if hi and not bigDest:
+ # narrowing operations
+ destReg = 2
+ for reg in range(destCnt):
+ eWalkCode += '''
+ AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg, "destReg": destReg }
+ destReg += 1
+ if destCnt < 4 and not hi: # zero upper half
+ for reg in range(destCnt, 4):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX2RegImmOp" if byElem else "DataX2RegOp",
+ { "code": eWalkCode,
+ "r_count": 2,
+ "op_class": opClass }, [])
+ if byElem:
+ header_output += NeonX2RegImmOpDeclare.subst(iop)
+ else:
+ header_output += NeonX2RegOpDeclare.subst(iop)
+ exec_output += NeonXUnequalRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False,
+ scalar=False, byElem=False, hi=False):
+ assert not byElem
+ threeUnequalRegInstX(name, Name, opClass, types, op,
+ True, True, False, readDest, scalar, byElem, hi)
+
+ def threeRegLongInstX(name, Name, opClass, types, op, readDest=False,
+ scalar=False, byElem=False, hi=False):
+ threeUnequalRegInstX(name, Name, opClass, types, op,
+ False, False, True, readDest, scalar, byElem, hi)
+
+ def threeRegWideInstX(name, Name, opClass, types, op, readDest=False,
+ scalar=False, byElem=False, hi=False):
+ assert not byElem
+ threeUnequalRegInstX(name, Name, opClass, types, op,
+ True, False, True, readDest, scalar, byElem, hi)
+
+ def twoEqualRegInstX(name, Name, opClass, types, rCount, op,
+ readDest=False, scalar=False, byElem=False,
+ hasImm=False, isDup=False):
+ global header_output, exec_output
+ assert (not isDup) or byElem
+ if byElem:
+ hasImm = True
+ if isDup:
+ eWalkCode = simd64EnabledCheckCode + '''
+ FullRegVect srcReg1;
+ RegVect destReg;
+ '''
+ else:
+ eWalkCode = simd64EnabledCheckCode + '''
+ RegVect srcReg1, destReg;
+ '''
+ for reg in range(4 if isDup else rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ scalarCheck = '''
+ if (i != 0) {
+ destReg.elements[i] = 0;
+ continue;
+ }
+ '''
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ %(scalarCheck)s
+ unsigned j = i;
+ Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]);
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[j] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode,
+ "scalarCheck" : scalarCheck if scalar else "",
+ "src1Index" : "imm" if byElem else "i" }
+ for reg in range(rCount):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ if rCount < 4: # zero upper half
+ for reg in range(rCount, 4):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX1RegImmOp" if hasImm else "DataX1RegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ if hasImm:
+ header_output += NeonX1RegImmOpDeclare.subst(iop)
+ else:
+ header_output += NeonX1RegOpDeclare.subst(iop)
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def twoRegLongInstX(name, Name, opClass, types, op, readDest=False,
+ hi=False, hasImm=False):
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ RegVect srcReg1;
+ BigRegVect destReg;
+ '''
+ destReg = 0 if not hi else 2
+ for reg in range(2):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw);
+ ''' % { "reg" : reg, "destReg": destReg }
+ destReg += 1
+ destReg = 0 if not hi else 2
+ if readDest:
+ for reg in range(4):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+ ''' % { "reg" : reg }
+ destReg += 1
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destReg = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ Element srcElem1 = gtoh(srcReg1.elements[i]);
+ BigElement destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(4):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX1RegImmOp" if hasImm else "DataX1RegOp",
+ { "code": eWalkCode,
+ "r_count": 2,
+ "op_class": opClass }, [])
+ if hasImm:
+ header_output += NeonX1RegImmOpDeclare.subst(iop)
+ else:
+ header_output += NeonX1RegOpDeclare.subst(iop)
+ exec_output += NeonXUnequalRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False,
+ scalar=False, hi=False, hasImm=False):
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ BigRegVect srcReg1;
+ RegVect destReg;
+ '''
+ for reg in range(4):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ for reg in range(2):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+ ''' % { "reg" : reg }
+ else:
+ eWalkCode += '''
+ destReg.elements[0] = 0;
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ scalarCheck = '''
+ if (i != 0) {
+ destReg.elements[i] = 0;
+ continue;
+ }
+ '''
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ %(scalarCheck)s
+ BigElement srcElem1 = gtoh(srcReg1.elements[i]);
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode,
+ "scalarCheck" : scalarCheck if scalar else "" }
+ destReg = 0 if not hi else 2
+ for reg in range(2):
+ eWalkCode += '''
+ AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg, "destReg": destReg }
+ destReg += 1
+ if not hi:
+ for reg in range(2, 4): # zero upper half
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX1RegImmOp" if hasImm else "DataX1RegOp",
+ { "code": eWalkCode,
+ "r_count": 2,
+ "op_class": opClass }, [])
+ if hasImm:
+ header_output += NeonX1RegImmOpDeclare.subst(iop)
+ else:
+ header_output += NeonX1RegOpDeclare.subst(iop)
+ exec_output += NeonXUnequalRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def threeRegScrambleInstX(name, Name, opClass, types, rCount, op):
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ RegVect srcReg1, srcReg2, destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+ srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ eWalkCode += op
+ for reg in range(rCount):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ if rCount < 4:
+ for reg in range(rCount, 4):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX2RegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ header_output += NeonX2RegOpDeclare.subst(iop)
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def insFromVecElemInstX(name, Name, opClass, types, rCount):
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ FullRegVect srcReg1;
+ RegVect destReg;
+ '''
+ for reg in range(4):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ for reg in range(rCount):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+ ''' % { "reg" : reg }
+ eWalkCode += '''
+ Element srcElem1 = gtoh(srcReg1.elements[imm2]);
+ Element destElem = srcElem1;
+ destReg.elements[imm1] = htog(destElem);
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX1Reg2ImmOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ header_output += NeonX1Reg2ImmOpDeclare.subst(iop)
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op):
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ RegVect srcReg1, destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ eWalkCode += '''
+ Element srcElem1 = gtoh(srcReg1.elements[0]);
+ Element srcElem2 = gtoh(srcReg1.elements[1]);
+ Element destElem;
+ %(op)s
+ destReg.elements[0] = htog(destElem);
+ ''' % { "op" : op }
+ destCnt = rCount / 2
+ for reg in range(destCnt):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ for reg in range(destCnt, 4): # zero upper half
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX1RegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ header_output += NeonX1RegOpDeclare.subst(iop)
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def twoRegAcrossInstX(name, Name, opClass, types, rCount, op,
+ doubleDest=False, long=False):
+ global header_output, exec_output
+ destPrefix = "Big" if long else ""
+ eWalkCode = simd64EnabledCheckCode + '''
+ RegVect srcReg1;
+ %sRegVect destReg;
+ ''' % destPrefix
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ eWalkCode += '''
+ destReg.regs[0] = 0;
+ %(destPrefix)sElement destElem = 0;
+ for (unsigned i = 0; i < eCount; i++) {
+ Element srcElem1 = gtoh(srcReg1.elements[i]);
+ if (i == 0) {
+ destElem = srcElem1;
+ } else {
+ %(op)s
+ }
+ }
+ destReg.elements[0] = htog(destElem);
+ ''' % { "op" : op, "destPrefix" : destPrefix }
+ destCnt = 2 if doubleDest else 1
+ for reg in range(destCnt):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ for reg in range(destCnt, 4): # zero upper half
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX1RegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ header_output += NeonX1RegOpDeclare.subst(iop)
+ if long:
+ exec_output += NeonXUnequalRegOpExecute.subst(iop)
+ else:
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def twoRegCondenseInstX(name, Name, opClass, types, rCount, op,
+ readDest=False):
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ RegVect srcRegs;
+ BigRegVect destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount / 2; i++) {
+ Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
+ Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
+ BigElement destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(rCount):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ if rCount < 4: # zero upper half
+ for reg in range(rCount, 4):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX1RegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ header_output += NeonX1RegOpDeclare.subst(iop)
+ exec_output += NeonXUnequalRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ RegVect destReg;
+ '''
+ if readDest:
+ for reg in range(rCount):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(rCount):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ if rCount < 4: # zero upper half
+ for reg in range(rCount, 4):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataXImmOnlyOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ header_output += NeonX1RegImmOnlyOpDeclare.subst(iop)
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def dupGprInstX(name, Name, opClass, types, rCount, gprSpec):
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ RegVect destReg;
+ for (unsigned i = 0; i < eCount; i++) {
+ destReg.elements[i] = htog((Element) %sOp1);
+ }
+ ''' % gprSpec
+ for reg in range(rCount):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ if rCount < 4: # zero upper half
+ for reg in range(rCount, 4):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX1RegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ header_output += NeonX1RegOpDeclare.subst(iop)
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def extInstX(name, Name, opClass, types, rCount, op):
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ RegVect srcReg1, srcReg2, destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+ srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ eWalkCode += op
+ for reg in range(rCount):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ if rCount < 4: # zero upper half
+ for reg in range(rCount, 4):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX2RegImmOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ header_output += NeonX2RegImmOpDeclare.subst(iop)
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec):
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ RegVect destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+ ''' % { "reg" : reg }
+ eWalkCode += '''
+ destReg.elements[imm] = htog((Element) %sOp1);
+ ''' % gprSpec
+ for reg in range(rCount):
+ eWalkCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX1RegImmOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ header_output += NeonX1RegImmOpDeclare.subst(iop)
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def insToGprInstX(name, Name, opClass, types, rCount, gprSpec,
+ signExt=False):
+ global header_output, exec_output
+ eWalkCode = simd64EnabledCheckCode + '''
+ FullRegVect srcReg;
+ '''
+ for reg in range(4):
+ eWalkCode += '''
+ srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+ ''' % { "reg" : reg }
+ if signExt:
+ eWalkCode += '''
+ %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]);
+ ''' % gprSpec
+ else:
+ eWalkCode += '''
+ %sDest = srcReg.elements[imm];
+ ''' % gprSpec
+ iop = InstObjParams(name, Name,
+ "DataX1RegImmOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ header_output += NeonX1RegImmOpDeclare.subst(iop)
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount):
+ global header_output, decoder_output, exec_output
+ code = simd64EnabledCheckCode + '''
+ union
+ {
+ uint8_t bytes[64];
+ FloatRegBits regs[16];
+ } table;
+
+ union
+ {
+ uint8_t bytes[%(rCount)d * 4];
+ FloatRegBits regs[%(rCount)d];
+ } destReg, srcReg2;
+
+ const unsigned length = %(length)d;
+ const bool isTbl = %(isTbl)s;
+ ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl }
+ for reg in range(rCount):
+ code += '''
+ srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+ destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+ ''' % { "reg" : reg }
+ for reg in range(16):
+ if reg < length * 4:
+ code += '''
+ table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw);
+ ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 }
+ else:
+ code += '''
+ table.regs[%(reg)d] = 0;
+ ''' % { "reg" : reg }
+ code += '''
+ for (unsigned i = 0; i < sizeof(destReg); i++) {
+ uint8_t index = srcReg2.bytes[i];
+ if (index < 16 * length) {
+ destReg.bytes[i] = table.bytes[index];
+ } else {
+ if (isTbl)
+ destReg.bytes[i] = 0;
+ // else destReg.bytes[i] unchanged
+ }
+ }
+ '''
+ for reg in range(rCount):
+ code += '''
+ AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ if rCount < 4: # zero upper half
+ for reg in range(rCount, 4):
+ code += '''
+ AA64FpDestP%(reg)d_uw = 0;
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "DataX2RegOp",
+ { "code": code,
+ "r_count": rCount,
+ "op_class": opClass }, [])
+ header_output += NeonX2RegOpDeclare.subst(iop)
+ exec_output += NeonXEqualRegOpExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonXExecDeclare.subst(substDict)
+
+ # ABS
+ absCode = '''
+ if (srcElem1 < 0) {
+ destElem = -srcElem1;
+ } else {
+ destElem = srcElem1;
+ }
+ '''
+ twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode)
+ twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode)
+ # ADD
+ addCode = "destElem = srcElem1 + srcElem2;"
+ threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode)
+ threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode)
+ # ADDHN, ADDHN2
+ addhnCode = '''
+ destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
+ (sizeof(Element) * 8);
+ '''
+ threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes,
+ addhnCode)
+ threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes,
+ addhnCode, hi=True)
+ # ADDP (scalar)
+ twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4,
+ addCode)
+ # ADDP (vector)
+ threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2,
+ addCode, pairwise=True)
+ threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4,
+ addCode, pairwise=True)
+ # ADDV
+ # Note: SimdAddOp can be a bit optimistic here
+ addAcrossCode = "destElem += srcElem1;"
+ twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"),
+ 2, addAcrossCode)
+ twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4,
+ addAcrossCode)
+ # AND
+ andCode = "destElem = srcElem1 & srcElem2;"
+ threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode)
+ threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode)
+ # BIC (immediate)
+ bicImmCode = "destElem &= ~imm;"
+ oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2,
+ bicImmCode, True)
+ oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4,
+ bicImmCode, True)
+ # BIC (register)
+ bicCode = "destElem = srcElem1 & ~srcElem2;"
+ threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode)
+ threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode)
+ # BIF
+ bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);"
+ threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode,
+ True)
+ threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode,
+ True)
+ # BIT
+ bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);"
+ threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode,
+ True)
+ threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode,
+ True)
+ # BSL
+ bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);"
+ threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode,
+ True)
+ threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode,
+ True)
+ # CLS
+ clsCode = '''
+ unsigned count = 0;
+ if (srcElem1 < 0) {
+ srcElem1 <<= 1;
+ while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
+ count++;
+ srcElem1 <<= 1;
+ }
+ } else {
+ srcElem1 <<= 1;
+ while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
+ count++;
+ srcElem1 <<= 1;
+ }
+ }
+ destElem = count;
+ '''
+ twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode)
+ twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode)
+ # CLZ
+ clzCode = '''
+ unsigned count = 0;
+ while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
+ count++;
+ srcElem1 <<= 1;
+ }
+ destElem = count;
+ '''
+ twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode)
+ twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode)
+ # CMEQ (register)
+ cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;"
+ threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2,
+ cmeqCode)
+ threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4,
+ cmeqCode)
+ # CMEQ (zero)
+ cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;"
+ twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2,
+ cmeqZeroCode)
+ twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4,
+ cmeqZeroCode)
+ # CMGE (register)
+ cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;"
+ threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode)
+ threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode)
+ # CMGE (zero)
+ cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;"
+ twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2,
+ cmgeZeroCode)
+ twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4,
+ cmgeZeroCode)
+ # CMGT (register)
+ cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;"
+ threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode)
+ threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode)
+ # CMGT (zero)
+ cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;"
+ twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2,
+ cmgtZeroCode)
+ twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4,
+ cmgtZeroCode)
+ # CMHI (register)
+ threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2,
+ cmgtCode)
+ threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4,
+ cmgtCode)
+ # CMHS (register)
+ threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2,
+ cmgeCode)
+ threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4,
+ cmgeCode)
+ # CMLE (zero)
+ cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;"
+ twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2,
+ cmleZeroCode)
+ twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4,
+ cmleZeroCode)
+ # CMLT (zero)
+ cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;"
+ twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2,
+ cmltZeroCode)
+ twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4,
+ cmltZeroCode)
+ # CMTST (register)
+ tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;"
+ threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2,
+ tstCode)
+ threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4,
+ tstCode)
+ # CNT
+ cntCode = '''
+ unsigned count = 0;
+ while (srcElem1 && count < sizeof(Element) * 8) {
+ count += srcElem1 & 0x1;
+ srcElem1 >>= 1;
+ }
+ destElem = count;
+ '''
+ twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode)
+ twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode)
+ # DUP (element)
+ dupCode = "destElem = srcElem1;"
+ twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2,
+ dupCode, isDup=True, byElem=True)
+ twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4,
+ dupCode, isDup=True, byElem=True)
+ twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4,
+ dupCode, isDup=True, byElem=True, scalar=True)
+ # DUP (general register)
+ dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W')
+ dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W')
+ dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X')
+ # EOR
+ eorCode = "destElem = srcElem1 ^ srcElem2;"
+ threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode)
+ threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode)
+ # EXT
+ extCode = '''
+ for (unsigned i = 0; i < eCount; i++) {
+ unsigned index = i + imm;
+ if (index < eCount) {
+ destReg.elements[i] = srcReg1.elements[index];
+ } else {
+ index -= eCount;
+ if (index >= eCount) {
+ fault = new UndefinedInstruction(machInst, false, mnemonic);
+ } else {
+ destReg.elements[i] = srcReg2.elements[index];
+ }
+ }
+ }
+ '''
+ extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode)
+ extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode)
+ # FABD
+ fpOp = '''
+ FPSCR fpscr = (FPSCR) FpscrExc;
+ destElem = %s;
+ FpscrExc = fpscr;
+ '''
+ fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))"
+ threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2,
+ fabdCode)
+ threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4,
+ fabdCode)
+ threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4,
+ fabdCode, scalar=True)
+ # FABS
+ fabsCode = fpOp % "fplibAbs<Element>(srcElem1)"
+ twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2,
+ fabsCode)
+ twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4,
+ fabsCode)
+ # FACGE
+ fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1),"
+ " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0")
+ facgeCode = fpCmpAbsOp % "GE"
+ threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, facgeCode)
+ threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4,
+ facgeCode)
+ threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4,
+ facgeCode, scalar=True)
+ # FACGT
+ facgtCode = fpCmpAbsOp % "GT"
+ threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, facgtCode)
+ threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4,
+ facgtCode)
+ threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4,
+ facgtCode, scalar=True)
+ # FADD
+ fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)"
+ faddCode = fpBinOp % "Add"
+ threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2,
+ faddCode)
+ threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4,
+ faddCode)
+ # FADDP (scalar)
+ twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp",
+ ("uint32_t",), 2, faddCode)
+ twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp",
+ ("uint64_t",), 4, faddCode)
+ # FADDP (vector)
+ threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes,
+ 2, faddCode, pairwise=True)
+ threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4,
+ faddCode, pairwise=True)
+ # FCMEQ (register)
+ fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?"
+ " -1 : 0")
+ fcmeqCode = fpCmpOp % "EQ"
+ threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fcmeqCode)
+ threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmeqCode)
+ threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmeqCode, scalar=True)
+ # FCMEQ (zero)
+ fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0"
+ fcmeqZeroCode = fpCmpZeroOp % "EQ"
+ twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fcmeqZeroCode)
+ twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmeqZeroCode)
+ twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmeqZeroCode, scalar=True)
+ # FCMGE (register)
+ fcmgeCode = fpCmpOp % "GE"
+ threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fcmgeCode)
+ threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmgeCode)
+ threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmgeCode, scalar=True)
+ # FCMGE (zero)
+ fcmgeZeroCode = fpCmpZeroOp % "GE"
+ twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fcmgeZeroCode)
+ twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmgeZeroCode)
+ twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmgeZeroCode, scalar=True)
+ # FCMGT (register)
+ fcmgtCode = fpCmpOp % "GT"
+ threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fcmgtCode)
+ threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmgtCode)
+ threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmgtCode, scalar=True)
+ # FCMGT (zero)
+ fcmgtZeroCode = fpCmpZeroOp % "GT"
+ twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fcmgtZeroCode)
+ twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmgtZeroCode)
+ twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmgtZeroCode, scalar=True)
+ # FCMLE (zero)
+ fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?"
+ " -1 : 0")
+ fcmleZeroCode = fpCmpRevZeroOp % "GE"
+ twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fcmleZeroCode)
+ twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmleZeroCode)
+ twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmleZeroCode, scalar=True)
+ # FCMLT (zero)
+ fcmltZeroCode = fpCmpRevZeroOp % "GT"
+ twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fcmltZeroCode)
+ twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmltZeroCode)
+ twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4,
+ fcmltZeroCode, scalar=True)
+ # FCVTAS
+ fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>("
+ "srcElem1, %s, %s, %s, fpscr)")
+ fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY")
+ twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2,
+ fcvtasCode)
+ twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4,
+ fcvtasCode)
+ twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4,
+ fcvtasCode, scalar=True)
+ # FCVTAU
+ fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY")
+ twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2,
+ fcvtauCode)
+ twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4,
+ fcvtauCode)
+ twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4,
+ fcvtauCode, scalar=True)
+ # FCVTL, FCVTL2
+ fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>("
+ "srcElem1, FPCRRounding(fpscr), fpscr)")
+ twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"),
+ fcvtlCode)
+ twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"),
+ fcvtlCode, hi=True)
+ # FCVTMS
+ fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF")
+ twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2,
+ fcvtmsCode)
+ twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4,
+ fcvtmsCode)
+ twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4,
+ fcvtmsCode, scalar=True)
+ # FCVTMU
+ fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF")
+ twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2,
+ fcvtmuCode)
+ twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4,
+ fcvtmuCode)
+ twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4,
+ fcvtmuCode, scalar=True)
+ # FCVTN, FCVTN2
+ fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>("
+ "srcElem1, FPCRRounding(fpscr), fpscr)")
+ twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp",
+ ("uint16_t", "uint32_t"), fcvtnCode)
+ twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp",
+ ("uint16_t", "uint32_t"), fcvtnCode, hi=True)
+ # FCVTNS
+ fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN")
+ twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2,
+ fcvtnsCode)
+ twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4,
+ fcvtnsCode)
+ twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4,
+ fcvtnsCode, scalar=True)
+ # FCVTNU
+ fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN")
+ twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2,
+ fcvtnuCode)
+ twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4,
+ fcvtnuCode)
+ twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4,
+ fcvtnuCode, scalar=True)
+ # FCVTPS
+ fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF")
+ twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2,
+ fcvtpsCode)
+ twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4,
+ fcvtpsCode)
+ twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4,
+ fcvtpsCode, scalar=True)
+ # FCVTPU
+ fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF")
+ twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2,
+ fcvtpuCode)
+ twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4,
+ fcvtpuCode)
+ twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4,
+ fcvtpuCode, scalar=True)
+ # FCVTXN, FCVTXN2
+ fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>("
+ "srcElem1, FPRounding_ODD, fpscr)")
+ twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes,
+ fcvtxnCode)
+ twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes,
+ fcvtxnCode, hi=True)
+ twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes,
+ fcvtxnCode, scalar=True)
+ # FCVTZS (fixed-point)
+ fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO")
+ twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes,
+ 2, fcvtzsCode, hasImm=True)
+ twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4,
+ fcvtzsCode, hasImm=True)
+ twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4,
+ fcvtzsCode, hasImm=True, scalar=True)
+ # FCVTZS (integer)
+ fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO")
+ twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes,
+ 2, fcvtzsIntCode)
+ twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4,
+ fcvtzsIntCode)
+ twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4,
+ fcvtzsIntCode, scalar=True)
+ # FCVTZU (fixed-point)
+ fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO")
+ twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes,
+ 2, fcvtzuCode, hasImm=True)
+ twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4,
+ fcvtzuCode, hasImm=True)
+ twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4,
+ fcvtzuCode, hasImm=True, scalar=True)
+ # FCVTZU (integer)
+ fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO")
+ twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2,
+ fcvtzuIntCode)
+ twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4,
+ fcvtzuIntCode)
+ twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4,
+ fcvtzuIntCode, scalar=True)
+ # FDIV
+ fdivCode = fpBinOp % "Div"
+ threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2,
+ fdivCode)
+ threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4,
+ fdivCode)
+ # FMAX
+ fmaxCode = fpBinOp % "Max"
+ threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2,
+ fmaxCode)
+ threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4,
+ fmaxCode)
+ # FMAXNM
+ fmaxnmCode = fpBinOp % "MaxNum"
+ threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fmaxnmCode)
+ threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4,
+ fmaxnmCode)
+ # FMAXNMP (scalar)
+ twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp",
+ ("uint32_t",), 2, fmaxnmCode)
+ twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp",
+ ("uint64_t",), 4, fmaxnmCode)
+ # FMAXNMP (vector)
+ threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp",
+ smallFloatTypes, 2, fmaxnmCode, pairwise=True)
+ threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4,
+ fmaxnmCode, pairwise=True)
+ # FMAXNMV
+ # Note: SimdFloatCmpOp can be a bit optimistic here
+ fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)"
+ fmaxnmAcrossCode = fpAcrossOp % "MaxNum"
+ twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",),
+ 4, fmaxnmAcrossCode)
+ # FMAXP (scalar)
+ twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp",
+ ("uint32_t",), 2, fmaxCode)
+ twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp",
+ ("uint64_t",), 4, fmaxCode)
+ # FMAXP (vector)
+ threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fmaxCode, pairwise=True)
+ threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4,
+ fmaxCode, pairwise=True)
+ # FMAXV
+ # Note: SimdFloatCmpOp can be a bit optimistic here
+ fmaxAcrossCode = fpAcrossOp % "Max"
+ twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4,
+ fmaxAcrossCode)
+ # FMIN
+ fminCode = fpBinOp % "Min"
+ threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2,
+ fminCode)
+ threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4,
+ fminCode)
+ # FMINNM
+ fminnmCode = fpBinOp % "MinNum"
+ threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fminnmCode)
+ threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4,
+ fminnmCode)
+ # FMINNMP (scalar)
+ twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp",
+ ("uint32_t",), 2, fminnmCode)
+ twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp",
+ ("uint64_t",), 4, fminnmCode)
+ # FMINNMP (vector)
+ threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp",
+ smallFloatTypes, 2, fminnmCode, pairwise=True)
+ threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4,
+ fminnmCode, pairwise=True)
+ # FMINNMV
+ # Note: SimdFloatCmpOp can be a bit optimistic here
+ fminnmAcrossCode = fpAcrossOp % "MinNum"
+ twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",),
+ 4, fminnmAcrossCode)
+ # FMINP (scalar)
+ twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp",
+ ("uint32_t",), 2, fminCode)
+ twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp",
+ ("uint64_t",), 4, fminCode)
+ # FMINP (vector)
+ threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes,
+ 2, fminCode, pairwise=True)
+ threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4,
+ fminCode, pairwise=True)
+ # FMINV
+ # Note: SimdFloatCmpOp can be a bit optimistic here
+ fminAcrossCode = fpAcrossOp % "Min"
+ twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4,
+ fminAcrossCode)
+ # FMLA (by element)
+ fmlaCode = fpOp % ("fplibMulAdd<Element>("
+ "destElem, srcElem1, srcElem2, fpscr)")
+ threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp",
+ smallFloatTypes, 2, fmlaCode, True, byElem=True)
+ threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes,
+ 4, fmlaCode, True, byElem=True)
+ threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes,
+ 4, fmlaCode, True, byElem=True, scalar=True)
+ # FMLA (vector)
+ threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes,
+ 2, fmlaCode, True)
+ threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4,
+ fmlaCode, True)
+ # FMLS (by element)
+ fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem,"
+ " fplibNeg<Element>(srcElem1), srcElem2, fpscr)")
+ threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp",
+ smallFloatTypes, 2, fmlsCode, True, byElem=True)
+ threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes,
+ 4, fmlsCode, True, byElem=True)
+ threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes,
+ 4, fmlsCode, True, byElem=True, scalar=True)
+ # FMLS (vector)
+ threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes,
+ 2, fmlsCode, True)
+ threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4,
+ fmlsCode, True)
+ # FMOV
+ fmovCode = 'destElem = imm;'
+ oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2,
+ fmovCode)
+ oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode)
+ # FMUL (by element)
+ fmulCode = fpBinOp % "Mul"
+ threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp",
+ smallFloatTypes, 2, fmulCode, byElem=True)
+ threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4,
+ fmulCode, byElem=True)
+ threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4,
+ fmulCode, byElem=True, scalar=True)
+ # FMUL (vector)
+ threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2,
+ fmulCode)
+ threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4,
+ fmulCode)
+ # FMULX
+ fmulxCode = fpBinOp % "MulX"
+ threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes,
+ 2, fmulxCode)
+ threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4,
+ fmulxCode)
+ threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4,
+ fmulxCode, scalar=True)
+ # FMULX (by element)
+ threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp",
+ smallFloatTypes, 2, fmulxCode, byElem=True)
+ threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes,
+ 4, fmulxCode, byElem=True)
+ threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes,
+ 4, fmulxCode, byElem=True, scalar=True)
+ # FNEG
+ fnegCode = fpOp % "fplibNeg<Element>(srcElem1)"
+ twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2,
+ fnegCode)
+ twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4,
+ fnegCode)
+ # FRECPE
+ frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)"
+ twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp",
+ smallFloatTypes, 2, frecpeCode)
+ twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4,
+ frecpeCode)
+ twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes,
+ 4, frecpeCode, scalar=True)
+ # FRECPS
+ frecpsCode = fpBinOp % "RecipStepFused"
+ threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp",
+ smallFloatTypes, 2, frecpsCode)
+ threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes,
+ 4, frecpsCode)
+ threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes,
+ 4, frecpsCode, scalar=True)
+ # FRECPX
+ frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)"
+ twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4,
+ frecpxCode, scalar=True)
+ # FRINTA
+ frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)"
+ frintaCode = frintCode % ("FPRounding_TIEAWAY", "false")
+ twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2,
+ frintaCode)
+ twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4,
+ frintaCode)
+ # FRINTI
+ frintiCode = frintCode % ("FPCRRounding(fpscr)", "false")
+ twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2,
+ frintiCode)
+ twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4,
+ frintiCode)
+ # FRINTM
+ frintmCode = frintCode % ("FPRounding_NEGINF", "false")
+ twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2,
+ frintmCode)
+ twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4,
+ frintmCode)
+ # FRINTN
+ frintnCode = frintCode % ("FPRounding_TIEEVEN", "false")
+ twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2,
+ frintnCode)
+ twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4,
+ frintnCode)
+ # FRINTP
+ frintpCode = frintCode % ("FPRounding_POSINF", "false")
+ twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2,
+ frintpCode)
+ twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4,
+ frintpCode)
+ # FRINTX
+ frintxCode = frintCode % ("FPCRRounding(fpscr)", "true")
+ twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2,
+ frintxCode)
+ twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4,
+ frintxCode)
+ # FRINTZ
+ frintzCode = frintCode % ("FPRounding_ZERO", "false")
+ twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2,
+ frintzCode)
+ twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4,
+ frintzCode)
+ # FRSQRTE
+ frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)"
+ twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp",
+ smallFloatTypes, 2, frsqrteCode)
+ twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4,
+ frsqrteCode)
+ twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4,
+ frsqrteCode, scalar=True)
+ # FRSQRTS
+ frsqrtsCode = fpBinOp % "RSqrtStepFused"
+ threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp",
+ smallFloatTypes, 2, frsqrtsCode)
+ threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes,
+ 4, frsqrtsCode)
+ threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes,
+ 4, frsqrtsCode, scalar=True)
+ # FSQRT
+ fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)"
+ twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2,
+ fsqrtCode)
+ twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4,
+ fsqrtCode)
+ # FSUB
+ fsubCode = fpBinOp % "Sub"
+ threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2,
+ fsubCode)
+ threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4,
+ fsubCode)
+ # INS (element)
+ insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4)
+ # INS (general register)
+ insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4,
+ 'W')
+ insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X')
+ # MLA (by element)
+ mlaCode = "destElem += srcElem1 * srcElem2;"
+ threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp",
+ ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True)
+ threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp",
+ ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True)
+ # MLA (vector)
+ threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2,
+ mlaCode, True)
+ threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4,
+ mlaCode, True)
+ # MLS (by element)
+ mlsCode = "destElem -= srcElem1 * srcElem2;"
+ threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp",
+ ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True)
+ threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp",
+ ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True)
+ # MLS (vector)
+ threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2,
+ mlsCode, True)
+ threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4,
+ mlsCode, True)
+ # MOV (element) -> alias to INS (element)
+ # MOV (from general) -> alias to INS (general register)
+ # MOV (scalar) -> alias to DUP (element)
+ # MOV (to general) -> alias to UMOV
+ # MOV (vector) -> alias to ORR (register)
+ # MOVI
+ movImmCode = "destElem = imm;"
+ oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2,
+ movImmCode)
+ oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4,
+ movImmCode)
+ # MUL (by element)
+ mulCode = "destElem = srcElem1 * srcElem2;"
+ threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp",
+ ("uint16_t", "uint32_t"), 2, mulCode, byElem=True)
+ threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp",
+ ("uint16_t", "uint32_t"), 4, mulCode, byElem=True)
+ # MUL (vector)
+ threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2,
+ mulCode)
+ threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4,
+ mulCode)
+ # MVN
+ mvnCode = "destElem = ~srcElem1;"
+ twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode)
+ twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode)
+ # MVNI
+ mvniCode = "destElem = ~imm;"
+ oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode)
+ oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode)
+ # NEG
+ negCode = "destElem = -srcElem1;"
+ twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode)
+ twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode)
+ # NOT -> alias to MVN
+ # ORN
+ ornCode = "destElem = srcElem1 | ~srcElem2;"
+ threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode)
+ threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode)
+ # ORR (immediate)
+ orrImmCode = "destElem |= imm;"
+ oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2,
+ orrImmCode, True)
+ oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4,
+ orrImmCode, True)
+ # ORR (register)
+ orrCode = "destElem = srcElem1 | srcElem2;"
+ threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode)
+ threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode)
+ # PMUL
+ pmulCode = '''
+ destElem = 0;
+ for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
+ if (bits(srcElem2, j))
+ destElem ^= srcElem1 << j;
+ }
+ '''
+ threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2,
+ pmulCode)
+ threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4,
+ pmulCode)
+ # PMULL, PMULL2
+ # Note: 64-bit PMULL is not available (Crypto. Extension)
+ pmullCode = '''
+ destElem = 0;
+ for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
+ if (bits(srcElem2, j))
+ destElem ^= (BigElement)srcElem1 << j;
+ }
+ '''
+ threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode)
+ threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",),
+ pmullCode, hi=True)
+ # RADDHN, RADDHN2
+ raddhnCode = '''
+ destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
+ ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
+ (sizeof(Element) * 8);
+ '''
+ threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes,
+ raddhnCode)
+ threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes,
+ raddhnCode, hi=True)
+ # RBIT
+ rbitCode = '''
+ destElem = 0;
+ Element temp = srcElem1;
+ for (int i = 0; i < 8 * sizeof(Element); i++) {
+ destElem = destElem | ((temp & 0x1) <<
+ (8 * sizeof(Element) - 1 - i));
+ temp >>= 1;
+ }
+ '''
+ twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode)
+ twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode)
+ # REV16
+ rev16Code = '''
+ destElem = srcElem1;
+ unsigned groupSize = ((1 << 1) / sizeof(Element));
+ unsigned reverseMask = (groupSize - 1);
+ j = i ^ reverseMask;
+ '''
+ twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2,
+ rev16Code)
+ twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4,
+ rev16Code)
+ # REV32
+ rev32Code = '''
+ destElem = srcElem1;
+ unsigned groupSize = ((1 << 2) / sizeof(Element));
+ unsigned reverseMask = (groupSize - 1);
+ j = i ^ reverseMask;
+ '''
+ twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"),
+ 2, rev32Code)
+ twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"),
+ 4, rev32Code)
+ # REV64
+ rev64Code = '''
+ destElem = srcElem1;
+ unsigned groupSize = ((1 << 3) / sizeof(Element));
+ unsigned reverseMask = (groupSize - 1);
+ j = i ^ reverseMask;
+ '''
+ twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2,
+ rev64Code)
+ twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4,
+ rev64Code)
+ # RSHRN, RSHRN2
+ rshrnCode = '''
+ if (imm > sizeof(srcElem1) * 8) {
+ destElem = 0;
+ } else if (imm) {
+ Element rBit = bits(srcElem1, imm - 1);
+ destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
+ } else {
+ destElem = srcElem1;
+ }
+ '''
+ twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes,
+ rshrnCode, hasImm=True)
+ twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes,
+ rshrnCode, hasImm=True, hi=True)
+ # RSUBHN, RSUBHN2
+ rsubhnCode = '''
+ destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
+ ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
+ (sizeof(Element) * 8);
+ '''
+ threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes,
+ rsubhnCode)
+ threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes,
+ rsubhnCode, hi=True)
+ # SABA
+ abaCode = '''
+ destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
+ (srcElem2 - srcElem1);
+ '''
+ threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2,
+ abaCode, True)
+ threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4,
+ abaCode, True)
+ # SABAL, SABAL2
+ abalCode = '''
+ destElem += (srcElem1 > srcElem2) ?
+ ((BigElement)srcElem1 - (BigElement)srcElem2) :
+ ((BigElement)srcElem2 - (BigElement)srcElem1);
+ '''
+ threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes,
+ abalCode, True)
+ threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes,
+ abalCode, True, hi=True)
+ # SABD
+ abdCode = '''
+ destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
+ (srcElem2 - srcElem1);
+ '''
+ threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2,
+ abdCode)
+ threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4,
+ abdCode)
+ # SABDL, SABDL2
+ abdlCode = '''
+ destElem = (srcElem1 > srcElem2) ?
+ ((BigElement)srcElem1 - (BigElement)srcElem2) :
+ ((BigElement)srcElem2 - (BigElement)srcElem1);
+ '''
+ threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes,
+ abdlCode, True)
+ threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes,
+ abdlCode, True, hi=True)
+ # SADALP
+ adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;"
+ twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2,
+ adalpCode, True)
+ twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4,
+ adalpCode, True)
+ # SADDL, SADDL2
+ addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;"
+ threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes,
+ addlwCode)
+ threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes,
+ addlwCode, hi=True)
+ # SADDLP
+ twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2,
+ addlwCode)
+ twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4,
+ addlwCode)
+ # SADDLV
+ # Note: SimdAddOp can be a bit optimistic here
+ addAcrossLongCode = "destElem += (BigElement)srcElem1;"
+ twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"),
+ 2, addAcrossLongCode, long=True)
+ twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"),
+ 4, addAcrossLongCode, long=True)
+ twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4,
+ addAcrossLongCode, doubleDest=True, long=True)
+ # SADDW, SADDW2
+ threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes,
+ addlwCode)
+ threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes,
+ addlwCode, hi=True)
+ # SCVTF (fixed-point)
+ scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm,"
+ " false, FPCRRounding(fpscr), fpscr)")
+ twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2,
+ scvtfFixedCode % 32, hasImm=True)
+ twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4,
+ scvtfFixedCode % 32, hasImm=True)
+ twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4,
+ scvtfFixedCode % 64, hasImm=True)
+ twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes,
+ 4, scvtfFixedCode % 32, hasImm=True, scalar=True)
+ twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4,
+ scvtfFixedCode % 64, hasImm=True, scalar=True)
+ # SCVTF (integer)
+ scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0,"
+ " false, FPCRRounding(fpscr), fpscr)")
+ twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2,
+ scvtfIntCode % 32)
+ twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4,
+ scvtfIntCode % 32)
+ twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4,
+ scvtfIntCode % 64)
+ twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4,
+ scvtfIntCode % 32, scalar=True)
+ twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4,
+ scvtfIntCode % 64, scalar=True)
+ # SHADD
+ haddCode = '''
+ Element carryBit =
+ (((unsigned)srcElem1 & 0x1) +
+ ((unsigned)srcElem2 & 0x1)) >> 1;
+ // Use division instead of a shift to ensure the sign extension works
+ // right. The compiler will figure out if it can be a shift. Mask the
+ // inputs so they get truncated correctly.
+ destElem = (((srcElem1 & ~(Element)1) / 2) +
+ ((srcElem2 & ~(Element)1) / 2)) + carryBit;
+ '''
+ threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2,
+ haddCode)
+ threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4,
+ haddCode)
+ # SHL
+ shlCode = '''
+ if (imm >= sizeof(Element) * 8)
+ destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
+ else
+ destElem = srcElem1 << imm;
+ '''
+ twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode,
+ hasImm=True)
+ twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode,
+ hasImm=True)
+ # SHLL, SHLL2
+ shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);"
+ twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode)
+ twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode,
+ hi=True)
+ # SHRN, SHRN2
+ shrnCode = '''
+ if (imm >= sizeof(srcElem1) * 8) {
+ destElem = 0;
+ } else {
+ destElem = srcElem1 >> imm;
+ }
+ '''
+ twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes,
+ shrnCode, hasImm=True)
+ twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes,
+ shrnCode, hasImm=True, hi=True)
+ # SHSUB
+ hsubCode = '''
+ Element borrowBit =
+ (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
+ // Use division instead of a shift to ensure the sign extension works
+ // right. The compiler will figure out if it can be a shift. Mask the
+ // inputs so they get truncated correctly.
+ destElem = (((srcElem1 & ~(Element)1) / 2) -
+ ((srcElem2 & ~(Element)1) / 2)) - borrowBit;
+ '''
+ threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2,
+ hsubCode)
+ threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4,
+ hsubCode)
+ # SLI
+ sliCode = '''
+ if (imm >= sizeof(Element) * 8)
+ destElem = destElem;
+ else
+ destElem = (srcElem1 << imm) | (destElem & mask(imm));
+ '''
+ twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode,
+ True, hasImm=True)
+ twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode,
+ True, hasImm=True)
+ # SMAX
+ maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;"
+ threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2,
+ maxCode)
+ threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4,
+ maxCode)
+ # SMAXP
+ threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2,
+ maxCode, pairwise=True)
+ threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4,
+ maxCode, pairwise=True)
+ # SMAXV
+ maxAcrossCode = '''
+ if (i == 0 || srcElem1 > destElem)
+ destElem = srcElem1;
+ '''
+ twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"),
+ 2, maxAcrossCode)
+ twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4,
+ maxAcrossCode)
+ # SMIN
+ minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;"
+ threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2,
+ minCode)
+ threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4,
+ minCode)
+ # SMINP
+ threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2,
+ minCode, pairwise=True)
+ threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4,
+ minCode, pairwise=True)
+ # SMINV
+ minAcrossCode = '''
+ if (i == 0 || srcElem1 < destElem)
+ destElem = srcElem1;
+ '''
+ twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"),
+ 2, minAcrossCode)
+ twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4,
+ minAcrossCode)
+ # SMLAL, SMLAL2 (by element)
+ mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;"
+ threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp",
+ ("int16_t", "int32_t"), mlalCode, True, byElem=True)
+ threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp",
+ ("int16_t", "int32_t"), mlalCode, True, byElem=True,
+ hi=True)
+ # SMLAL, SMLAL2 (vector)
+ threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes,
+ mlalCode, True)
+ threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes,
+ mlalCode, True, hi=True)
+ # SMLSL, SMLSL2 (by element)
+ mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;"
+ threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes,
+ mlslCode, True, byElem=True)
+ threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp",
+ smallSignedTypes, mlslCode, True, byElem=True, hi=True)
+ # SMLSL, SMLSL2 (vector)
+ threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes,
+ mlslCode, True)
+ threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes,
+ mlslCode, True, hi=True)
+ # SMOV
+ insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4,
+ 'W', True)
+ insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X',
+ True)
+ # SMULL, SMULL2 (by element)
+ mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;"
+ threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes,
+ mullCode, byElem=True)
+ threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes,
+ mullCode, byElem=True, hi=True)
+ # SMULL, SMULL2 (vector)
+ threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes,
+ mullCode)
+ threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes,
+ mullCode, hi=True)
+ # SQABS
+ sqabsCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
+ fpscr.qc = 1;
+ destElem = ~srcElem1;
+ } else if (srcElem1 < 0) {
+ destElem = -srcElem1;
+ } else {
+ destElem = srcElem1;
+ }
+ FpscrQc = fpscr;
+ '''
+ twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2,
+ sqabsCode)
+ twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4,
+ sqabsCode)
+ twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4,
+ sqabsCode, scalar=True)
+ # SQADD
+ sqaddCode = '''
+ destElem = srcElem1 + srcElem2;
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ bool negDest = (destElem < 0);
+ bool negSrc1 = (srcElem1 < 0);
+ bool negSrc2 = (srcElem2 < 0);
+ if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
+ destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+ if (negDest)
+ destElem -= 1;
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2,
+ sqaddCode)
+ threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4,
+ sqaddCode)
+ threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4,
+ sqaddCode, scalar=True)
+ # SQDMLAL, SQDMLAL2 (by element)
+ qdmlalCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
+ Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
+ Element halfNeg = maxNeg / 2;
+ if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+ midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
+ fpscr.qc = 1;
+ }
+ bool negPreDest = ltz(destElem);
+ destElem += midElem;
+ bool negDest = ltz(destElem);
+ bool negMid = ltz(midElem);
+ if (negPreDest == negMid && negMid != negDest) {
+ destElem = mask(sizeof(BigElement) * 8 - 1);
+ if (negPreDest)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlalCode, True, byElem=True)
+ threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlalCode, True, byElem=True,
+ hi=True)
+ threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlalCode, True, byElem=True,
+ scalar=True)
+ # SQDMLAL, SQDMLAL2 (vector)
+ threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlalCode, True)
+ threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlalCode, True, hi=True)
+ threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlalCode, True, scalar=True)
+ # SQDMLSL, SQDMLSL2 (by element)
+ qdmlslCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
+ Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
+ Element halfNeg = maxNeg / 2;
+ if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+ midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
+ fpscr.qc = 1;
+ }
+ bool negPreDest = ltz(destElem);
+ destElem -= midElem;
+ bool negDest = ltz(destElem);
+ bool posMid = ltz((BigElement)-midElem);
+ if (negPreDest == posMid && posMid != negDest) {
+ destElem = mask(sizeof(BigElement) * 8 - 1);
+ if (negPreDest)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlslCode, True, byElem=True)
+ threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlslCode, True, byElem=True,
+ hi=True)
+ threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlslCode, True, byElem=True,
+ scalar=True)
+ # SQDMLSL, SQDMLSL2 (vector)
+ threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlslCode, True)
+ threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlslCode, True, hi=True)
+ threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp",
+ ("int16_t", "int32_t"), qdmlslCode, True, scalar=True)
+ # SQDMULH (by element)
+ sqdmulhCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
+ (sizeof(Element) * 8);
+ if (srcElem1 == srcElem2 &&
+ srcElem1 == (Element)((Element)1 <<
+ (sizeof(Element) * 8 - 1))) {
+ destElem = ~srcElem1;
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True)
+ threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True)
+ threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True,
+ scalar=True)
+ # SQDMULH (vector)
+ threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqdmulhCode)
+ threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqdmulhCode)
+ threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True)
+ # SQDMULL, SQDMULL2 (by element)
+ qdmullCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
+ if (srcElem1 == srcElem2 &&
+ srcElem1 == (Element)((Element)1 <<
+ (Element)(sizeof(Element) * 8 - 1))) {
+ destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp",
+ ("int16_t", "int32_t"), qdmullCode, True, byElem=True)
+ threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp",
+ ("int16_t", "int32_t"), qdmullCode, True, byElem=True,
+ hi=True)
+ threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp",
+ ("int16_t", "int32_t"), qdmullCode, True, byElem=True,
+ scalar=True)
+ # SQDMULL, SQDMULL2 (vector)
+ threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp",
+ ("int16_t", "int32_t"), qdmullCode, True)
+ threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp",
+ ("int16_t", "int32_t"), qdmullCode, True, hi=True)
+ threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp",
+ ("int16_t", "int32_t"), qdmullCode, True, scalar=True)
+ # SQNEG
+ sqnegCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
+ fpscr.qc = 1;
+ destElem = ~srcElem1;
+ } else {
+ destElem = -srcElem1;
+ }
+ FpscrQc = fpscr;
+ '''
+ twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2,
+ sqnegCode)
+ twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4,
+ sqnegCode)
+ twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
+ sqnegCode, scalar=True)
+ # SQRDMULH (by element)
+ sqrdmulhCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
+ ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
+ (sizeof(Element) * 8);
+ Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
+ Element halfNeg = maxNeg / 2;
+ if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+ if (destElem < 0) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ } else {
+ destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+ }
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True)
+ threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True)
+ threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True,
+ scalar=True)
+ # SQRDMULH (vector)
+ threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqrdmulhCode)
+ threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmulhCode)
+ threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True)
+ # SQRSHL
+ sqrshlCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ Element rBit = 0;
+ if (shiftAmt <= sizeof(Element) * 8)
+ rBit = bits(srcElem1, shiftAmt - 1);
+ if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
+ rBit = 1;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ // Make sure the right shift sign extended when it should.
+ if (srcElem1 < 0 && destElem >= 0) {
+ destElem |= -((Element)1 << (sizeof(Element) * 8 -
+ 1 - shiftAmt));
+ }
+ destElem += rBit;
+ } else if (shiftAmt > 0) {
+ bool sat = false;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ if (srcElem1 != 0)
+ sat = true;
+ else
+ destElem = 0;
+ } else {
+ if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - 1 - shiftAmt) !=
+ ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
+ sat = true;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ }
+ if (sat) {
+ fpscr.qc = 1;
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2,
+ sqrshlCode)
+ threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4,
+ sqrshlCode)
+ threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4,
+ sqrshlCode, scalar=True)
+ # SQRSHRN, SQRSHRN2
+ sqrshrnCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0 && srcElem1 != -1)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = (srcElem1 >> (imm - 1));
+ uint64_t rBit = mid & 0x1;
+ mid >>= 1;
+ mid |= -(mid & ((BigElement)1 <<
+ (sizeof(BigElement) * 8 - 1 - imm)));
+ mid += rBit;
+ if (mid != (Element)mid) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ if (srcElem1 != (Element)srcElem1) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ } else {
+ destElem = srcElem1;
+ }
+ }
+ FpscrQc = fpscr;
+ '''
+ twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes,
+ sqrshrnCode, hasImm=True)
+ twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes,
+ sqrshrnCode, hasImm=True, hi=True)
+ twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes,
+ sqrshrnCode, hasImm=True, scalar=True)
+ # SQRSHRUN, SQRSHRUN2
+ sqrshrunCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = (srcElem1 >> (imm - 1));
+ uint64_t rBit = mid & 0x1;
+ mid >>= 1;
+ mid |= -(mid & ((BigElement)1 <<
+ (sizeof(BigElement) * 8 - 1 - imm)));
+ mid += rBit;
+ if (bits(mid, sizeof(BigElement) * 8 - 1,
+ sizeof(Element) * 8) != 0) {
+ if (srcElem1 < 0) {
+ destElem = 0;
+ } else {
+ destElem = mask(sizeof(Element) * 8);
+ }
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ if (srcElem1 < 0) {
+ fpscr.qc = 1;
+ destElem = 0;
+ } else {
+ destElem = srcElem1;
+ }
+ }
+ FpscrQc = fpscr;
+ '''
+ twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes,
+ sqrshrunCode, hasImm=True)
+ twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp",
+ smallSignedTypes, sqrshrunCode, hasImm=True, hi=True)
+ twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp",
+ smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True)
+ # SQSHL (immediate)
+ sqshlImmCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (imm >= sizeof(Element) * 8) {
+ if (srcElem1 != 0) {
+ destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+ if (srcElem1 > 0)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ } else {
+ destElem = 0;
+ }
+ } else if (imm) {
+ destElem = (srcElem1 << imm);
+ uint64_t topBits = bits((uint64_t)srcElem1,
+ sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - 1 - imm);
+ if (topBits != 0 && topBits != mask(imm + 1)) {
+ destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+ if (srcElem1 > 0)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ FpscrQc = fpscr;
+ '''
+ twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2,
+ sqshlImmCode, hasImm=True)
+ twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4,
+ sqshlImmCode, hasImm=True)
+ twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4,
+ sqshlImmCode, hasImm=True, scalar=True)
+ # SQSHL (register)
+ sqshlCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ // Make sure the right shift sign extended when it should.
+ if (srcElem1 < 0 && destElem >= 0) {
+ destElem |= -((Element)1 << (sizeof(Element) * 8 -
+ 1 - shiftAmt));
+ }
+ } else if (shiftAmt > 0) {
+ bool sat = false;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ if (srcElem1 != 0)
+ sat = true;
+ else
+ destElem = 0;
+ } else {
+ if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - 1 - shiftAmt) !=
+ ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
+ sat = true;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ }
+ if (sat) {
+ fpscr.qc = 1;
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2,
+ sqshlCode)
+ threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4,
+ sqshlCode)
+ threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4,
+ sqshlCode, scalar=True)
+ # SQSHLU
+ sqshluCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (imm >= sizeof(Element) * 8) {
+ if (srcElem1 < 0) {
+ destElem = 0;
+ fpscr.qc = 1;
+ } else if (srcElem1 > 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = 0;
+ }
+ } else if (imm) {
+ destElem = (srcElem1 << imm);
+ uint64_t topBits = bits((uint64_t)srcElem1,
+ sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - imm);
+ if (srcElem1 < 0) {
+ destElem = 0;
+ fpscr.qc = 1;
+ } else if (topBits != 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ }
+ } else {
+ if (srcElem1 < 0) {
+ fpscr.qc = 1;
+ destElem = 0;
+ } else {
+ destElem = srcElem1;
+ }
+ }
+ FpscrQc = fpscr;
+ '''
+ twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2,
+ sqshluCode, hasImm=True)
+ twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4,
+ sqshluCode, hasImm=True)
+ twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4,
+ sqshluCode, hasImm=True, scalar=True)
+ # SQSHRN, SQSHRN2
+ sqshrnCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0 && srcElem1 != -1)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
+ mid |= -(mid & ((BigElement)1 <<
+ (sizeof(BigElement) * 8 - 1 - imm)));
+ if (mid != (Element)mid) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ FpscrQc = fpscr;
+ '''
+ twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes,
+ sqshrnCode, hasImm=True)
+ twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes,
+ sqshrnCode, hasImm=True, hi=True)
+ twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes,
+ sqshrnCode, hasImm=True, scalar=True)
+ # SQSHRUN, SQSHRUN2
+ sqshrunCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
+ if (bits(mid, sizeof(BigElement) * 8 - 1,
+ sizeof(Element) * 8) != 0) {
+ if (srcElem1 < 0) {
+ destElem = 0;
+ } else {
+ destElem = mask(sizeof(Element) * 8);
+ }
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ FpscrQc = fpscr;
+ '''
+ twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes,
+ sqshrunCode, hasImm=True)
+ twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes,
+ sqshrunCode, hasImm=True, hi=True)
+ twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes,
+ sqshrunCode, hasImm=True, scalar=True)
+ # SQSUB
+ sqsubCode = '''
+ destElem = srcElem1 - srcElem2;
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ bool negDest = (destElem < 0);
+ bool negSrc1 = (srcElem1 < 0);
+ bool posSrc2 = (srcElem2 >= 0);
+ if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
+ destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+ if (negDest)
+ destElem -= 1;
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2,
+ sqsubCode)
+ threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4,
+ sqsubCode)
+ threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4,
+ sqsubCode, scalar=True)
+ # SQXTN, SQXTN2
+ sqxtnCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ destElem = srcElem1;
+ if ((BigElement)destElem != srcElem1) {
+ fpscr.qc = 1;
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ }
+ FpscrQc = fpscr;
+ '''
+ twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes,
+ sqxtnCode)
+ twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes,
+ sqxtnCode, hi=True)
+ twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes,
+ sqxtnCode, scalar=True)
+ # SQXTUN, SQXTUN2
+ sqxtunCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ destElem = srcElem1;
+ if (srcElem1 < 0 ||
+ ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
+ fpscr.qc = 1;
+ destElem = mask(sizeof(Element) * 8);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ }
+ FpscrQc = fpscr;
+ '''
+ twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes,
+ sqxtunCode)
+ twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes,
+ sqxtunCode, hi=True)
+ twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes,
+ sqxtunCode, scalar=True)
+ # SRHADD
+ rhaddCode = '''
+ Element carryBit =
+ (((unsigned)srcElem1 & 0x1) +
+ ((unsigned)srcElem2 & 0x1) + 1) >> 1;
+ // Use division instead of a shift to ensure the sign extension works
+ // right. The compiler will figure out if it can be a shift. Mask the
+ // inputs so they get truncated correctly.
+ destElem = (((srcElem1 & ~(Element)1) / 2) +
+ ((srcElem2 & ~(Element)1) / 2)) + carryBit;
+ '''
+ threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2,
+ rhaddCode)
+ threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4,
+ rhaddCode)
+ # SRI
+ sriCode = '''
+ if (imm >= sizeof(Element) * 8)
+ destElem = destElem;
+ else
+ destElem = (srcElem1 >> imm) |
+ (destElem & ~mask(sizeof(Element) * 8 - imm));
+ '''
+ twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode,
+ True, hasImm=True)
+ twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode,
+ True, hasImm=True)
+ # SRSHL
+ rshlCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ Element rBit = 0;
+ if (shiftAmt <= sizeof(Element) * 8)
+ rBit = bits(srcElem1, shiftAmt - 1);
+ if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
+ rBit = 1;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ // Make sure the right shift sign extended when it should.
+ if (ltz(srcElem1) && !ltz(destElem)) {
+ destElem |= -((Element)1 << (sizeof(Element) * 8 -
+ 1 - shiftAmt));
+ }
+ destElem += rBit;
+ } else if (shiftAmt > 0) {
+ if (shiftAmt >= sizeof(Element) * 8) {
+ destElem = 0;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ '''
+ threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2,
+ rshlCode)
+ threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4,
+ rshlCode)
+ # SRSHR
+ rshrCode = '''
+ if (imm > sizeof(srcElem1) * 8) {
+ destElem = 0;
+ } else if (imm) {
+ Element rBit = bits(srcElem1, imm - 1);
+ destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
+ } else {
+ destElem = srcElem1;
+ }
+ '''
+ twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2,
+ rshrCode, hasImm=True)
+ twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4,
+ rshrCode, hasImm=True)
+ # SRSRA
+ rsraCode = '''
+ if (imm > sizeof(srcElem1) * 8) {
+ destElem += 0;
+ } else if (imm) {
+ Element rBit = bits(srcElem1, imm - 1);
+ destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
+ } else {
+ destElem += srcElem1;
+ }
+ '''
+ twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2,
+ rsraCode, True, hasImm=True)
+ twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4,
+ rsraCode, True, hasImm=True)
+ # SSHL
+ shlCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ // Make sure the right shift sign extended when it should.
+ if (ltz(srcElem1) && !ltz(destElem)) {
+ destElem |= -((Element)1 << (sizeof(Element) * 8 -
+ 1 - shiftAmt));
+ }
+ } else {
+ if (shiftAmt >= sizeof(Element) * 8) {
+ destElem = 0;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ }
+ '''
+ threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2,
+ shlCode)
+ threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4,
+ shlCode)
+ # SSHLL, SSHLL2
+ shllCode = '''
+ if (imm >= sizeof(destElem) * 8) {
+ destElem = 0;
+ } else {
+ destElem = (BigElement)srcElem1 << imm;
+ }
+ '''
+ twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes,
+ shllCode, hasImm=True)
+ twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes,
+ shllCode, hasImm=True, hi=True)
+ # SSHR
+ shrCode = '''
+ if (imm >= sizeof(srcElem1) * 8) {
+ if (ltz(srcElem1))
+ destElem = -1;
+ else
+ destElem = 0;
+ } else {
+ destElem = srcElem1 >> imm;
+ }
+ '''
+ twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode,
+ hasImm=True)
+ twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode,
+ hasImm=True)
+ # SSRA
+ sraCode = '''
+ Element mid;;
+ if (imm >= sizeof(srcElem1) * 8) {
+ mid = ltz(srcElem1) ? -1 : 0;
+ } else {
+ mid = srcElem1 >> imm;
+ if (ltz(srcElem1) && !ltz(mid)) {
+ mid |= -(mid & ((Element)1 <<
+ (sizeof(Element) * 8 - 1 - imm)));
+ }
+ }
+ destElem += mid;
+ '''
+ twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode,
+ True, hasImm=True)
+ twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode,
+ True, hasImm=True)
+ # SSUBL
+ sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;"
+ threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes,
+ sublwCode)
+ threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes,
+ sublwCode, hi=True)
+ # SSUBW
+ threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes,
+ sublwCode)
+ threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes,
+ sublwCode, hi=True)
+ # SUB
+ subCode = "destElem = srcElem1 - srcElem2;"
+ threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode)
+ threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode)
+ # SUBHN, SUBHN2
+ subhnCode = '''
+ destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
+ (sizeof(Element) * 8);
+ '''
+ threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes,
+ subhnCode)
+ threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes,
+ subhnCode, hi=True)
+ # SUQADD
+ suqaddCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ Element tmp = destElem + srcElem1;
+ if (bits(destElem, sizeof(Element) * 8 - 1) == 0) {
+ if (bits(tmp, sizeof(Element) * 8 - 1) == 1 ||
+ tmp < srcElem1 || tmp < destElem) {
+ destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
+ fpscr.qc = 1;
+ } else {
+ destElem = tmp;
+ }
+ } else {
+ Element absDestElem = (~destElem) + 1;
+ if (absDestElem < srcElem1) {
+ // Still check for positive sat., no need to check for negative sat.
+ if (bits(tmp, sizeof(Element) * 8 - 1) == 1) {
+ destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
+ fpscr.qc = 1;
+ } else {
+ destElem = tmp;
+ }
+ } else {
+ destElem = tmp;
+ }
+ }
+ FpscrQc = fpscr;
+ '''
+ twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
+ suqaddCode, True)
+ twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4,
+ suqaddCode, True)
+ twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4,
+ suqaddCode, True, scalar=True)
+ # SXTL -> alias to SSHLL
+ # TBL
+ tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2)
+ tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4)
+ tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2)
+ tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4)
+ tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2)
+ tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4)
+ tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2)
+ tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4)
+ # TBX
+ tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2)
+ tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4)
+ tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2)
+ tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4)
+ tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2)
+ tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4)
+ tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2)
+ tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4)
+ # TRN1
+ trnCode = '''
+ unsigned part = %s;
+ for (unsigned i = 0; i < eCount / 2; i++) {
+ destReg.elements[2 * i] = srcReg1.elements[2 * i + part];
+ destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part];
+ }
+ '''
+ threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2,
+ trnCode % "0")
+ threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4,
+ trnCode % "0")
+ # TRN2
+ threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2,
+ trnCode % "1")
+ threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4,
+ trnCode % "1")
+ # UABA
+ threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2,
+ abaCode, True)
+ threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4,
+ abaCode, True)
+ # UABAL, UABAL2
+ threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes,
+ abalCode, True)
+ threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes,
+ abalCode, True, hi=True)
+ # UABD
+ threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2,
+ abdCode)
+ threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4,
+ abdCode)
+ # UABDL, UABDL2
+ threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes,
+ abdlCode, True)
+ threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes,
+ abdlCode, True, hi=True)
+ # UADALP
+ twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes,
+ 2, adalpCode, True)
+ twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes,
+ 4, adalpCode, True)
+ # UADDL, UADDL2
+ threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes,
+ addlwCode)
+ threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes,
+ addlwCode, hi=True)
+ # UADDLP
+ twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes,
+ 2, addlwCode)
+ twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes,
+ 4, addlwCode)
+ # UADDLV
+ twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp",
+ ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True)
+ twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp",
+ ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True)
+ twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4,
+ addAcrossLongCode, doubleDest=True, long=True)
+ # UADDW
+ threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes,
+ addlwCode)
+ threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes,
+ addlwCode, hi=True)
+ # UCVTF (fixed-point)
+ ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true,"
+ " FPCRRounding(fpscr), fpscr)")
+ twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2,
+ ucvtfFixedCode, hasImm=True)
+ twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4,
+ ucvtfFixedCode, hasImm=True)
+ twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4,
+ ucvtfFixedCode, hasImm=True, scalar=True)
+ # UCVTF (integer)
+ ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true,"
+ " FPCRRounding(fpscr), fpscr)")
+ twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2,
+ ucvtfIntCode)
+ twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4,
+ ucvtfIntCode)
+ twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4,
+ ucvtfIntCode, scalar=True)
+ # UHADD
+ threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2,
+ haddCode)
+ threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4,
+ haddCode)
+ # UHSUB
+ threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2,
+ hsubCode)
+ threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4,
+ hsubCode)
+ # UMAX
+ threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2,
+ maxCode)
+ threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4,
+ maxCode)
+ # UMAXP
+ threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2,
+ maxCode, pairwise=True)
+ threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4,
+ maxCode, pairwise=True)
+ # UMAXV
+ twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"),
+ 2, maxAcrossCode)
+ twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4,
+ maxAcrossCode)
+ # UMIN
+ threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2,
+ minCode)
+ threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4,
+ minCode)
+ # UMINP
+ threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2,
+ minCode, pairwise=True)
+ threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4,
+ minCode, pairwise=True)
+ # UMINV
+ twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"),
+ 2, minAcrossCode)
+ twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4,
+ minAcrossCode)
+ # UMLAL (by element)
+ threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp",
+ smallUnsignedTypes, mlalCode, True, byElem=True)
+ threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp",
+ smallUnsignedTypes, mlalCode, True, byElem=True, hi=True)
+ # UMLAL (vector)
+ threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes,
+ mlalCode, True)
+ threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes,
+ mlalCode, True, hi=True)
+ # UMLSL (by element)
+ threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp",
+ smallUnsignedTypes, mlslCode, True, byElem=True)
+ threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp",
+ smallUnsignedTypes, mlslCode, True, byElem=True, hi=True)
+ # UMLSL (vector)
+ threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes,
+ mlslCode, True)
+ threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes,
+ mlslCode, True, hi=True)
+ # UMOV
+ insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W')
+ insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X')
+ # UMULL, UMULL2 (by element)
+ threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes,
+ mullCode, byElem=True)
+ threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes,
+ mullCode, byElem=True, hi=True)
+ # UMULL, UMULL2 (vector)
+ threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes,
+ mullCode)
+ threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes,
+ mullCode, hi=True)
+ # UQADD
+ uqaddCode = '''
+ destElem = srcElem1 + srcElem2;
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (destElem < srcElem1 || destElem < srcElem2) {
+ destElem = (Element)(-1);
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
+ uqaddCode)
+ threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4,
+ uqaddCode)
+ threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4,
+ uqaddCode, scalar=True)
+ # UQRSHL
+ uqrshlCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ Element rBit = 0;
+ if (shiftAmt <= sizeof(Element) * 8)
+ rBit = bits(srcElem1, shiftAmt - 1);
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ destElem += rBit;
+ } else {
+ if (shiftAmt >= sizeof(Element) * 8) {
+ if (srcElem1 != 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = 0;
+ }
+ } else {
+ if (bits(srcElem1, sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - shiftAmt)) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ }
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes,
+ 2, uqrshlCode)
+ threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4,
+ uqrshlCode)
+ threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4,
+ uqrshlCode, scalar=True)
+ # UQRSHRN
+ uqrshrnCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = (srcElem1 >> (imm - 1));
+ uint64_t rBit = mid & 0x1;
+ mid >>= 1;
+ mid += rBit;
+ if (mid != (Element)mid) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ if (srcElem1 != (Element)srcElem1) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ fpscr.qc = 1;
+ } else {
+ destElem = srcElem1;
+ }
+ }
+ FpscrQc = fpscr;
+ '''
+ twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes,
+ uqrshrnCode, hasImm=True)
+ twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp",
+ smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True)
+ twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp",
+ smallUnsignedTypes, uqrshrnCode, hasImm=True,
+ scalar=True)
+ # UQSHL (immediate)
+ uqshlImmCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (imm >= sizeof(Element) * 8) {
+ if (srcElem1 != 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = 0;
+ }
+ } else if (imm) {
+ destElem = (srcElem1 << imm);
+ uint64_t topBits = bits((uint64_t)srcElem1,
+ sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - imm);
+ if (topBits != 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ FpscrQc = fpscr;
+ '''
+ twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2,
+ uqshlImmCode, hasImm=True)
+ twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4,
+ uqshlImmCode, hasImm=True)
+ twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4,
+ uqshlImmCode, hasImm=True, scalar=True)
+ # UQSHL (register)
+ uqshlCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ } else if (shiftAmt > 0) {
+ if (shiftAmt >= sizeof(Element) * 8) {
+ if (srcElem1 != 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = 0;
+ }
+ } else {
+ if (bits(srcElem1, sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - shiftAmt)) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2,
+ uqshlCode)
+ threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4,
+ uqshlCode)
+ threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4,
+ uqshlCode, scalar=True)
+ # UQSHRN, UQSHRN2
+ uqshrnCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
+ if (mid != (Element)mid) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ FpscrQc = fpscr;
+ '''
+ twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes,
+ uqshrnCode, hasImm=True)
+ twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes,
+ uqshrnCode, hasImm=True, hi=True)
+ twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes,
+ uqshrnCode, hasImm=True, scalar=True)
+ # UQSUB
+ uqsubCode = '''
+ destElem = srcElem1 - srcElem2;
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ if (destElem > srcElem1) {
+ destElem = 0;
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2,
+ uqsubCode)
+ threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4,
+ uqsubCode)
+ threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4,
+ uqsubCode, scalar=True)
+ # UQXTN
+ uqxtnCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ destElem = srcElem1;
+ if ((BigElement)destElem != srcElem1) {
+ fpscr.qc = 1;
+ destElem = mask(sizeof(Element) * 8);
+ }
+ FpscrQc = fpscr;
+ '''
+ twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes,
+ uqxtnCode)
+ twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes,
+ uqxtnCode, hi=True)
+ twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes,
+ uqxtnCode, scalar=True)
+ # URECPE
+ urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);"
+ twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2,
+ urecpeCode)
+ twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4,
+ urecpeCode)
+ # URHADD
+ threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes,
+ 2, rhaddCode)
+ threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes,
+ 4, rhaddCode)
+ # URSHL
+ threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2,
+ rshlCode)
+ threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4,
+ rshlCode)
+ # URSHR
+ twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2,
+ rshrCode, hasImm=True)
+ twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4,
+ rshrCode, hasImm=True)
+ # URSQRTE
+ ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);"
+ twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2,
+ ursqrteCode)
+ twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4,
+ ursqrteCode)
+ # URSRA
+ twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2,
+ rsraCode, True, hasImm=True)
+ twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4,
+ rsraCode, True, hasImm=True)
+ # USHL
+ threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2,
+ shlCode)
+ threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4,
+ shlCode)
+ # USHLL, USHLL2
+ twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes,
+ shllCode, hasImm=True)
+ twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes,
+ shllCode, hi=True, hasImm=True)
+ # USHR
+ twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2,
+ shrCode, hasImm=True)
+ twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4,
+ shrCode, hasImm=True)
+ # USQADD
+ usqaddCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ Element tmp = destElem + srcElem1;
+ if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) {
+ if (tmp < srcElem1 || tmp < destElem) {
+ destElem = (Element)(-1);
+ fpscr.qc = 1;
+ } else {
+ destElem = tmp;
+ }
+ } else {
+ Element absSrcElem1 = (~srcElem1) + 1;
+ if (absSrcElem1 > destElem) {
+ destElem = 0;
+ fpscr.qc = 1;
+ } else {
+ destElem = tmp;
+ }
+ }
+ FpscrQc = fpscr;
+ '''
+ twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
+ usqaddCode, True)
+ twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4,
+ usqaddCode, True)
+ twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4,
+ usqaddCode, True, scalar=True)
+ # USRA
+ twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2,
+ sraCode, True, hasImm=True)
+ twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4,
+ sraCode, True, hasImm=True)
+ # USUBL
+ threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes,
+ sublwCode)
+ threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes,
+ sublwCode, hi=True)
+ # USUBW
+ threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes,
+ sublwCode)
+ threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes,
+ sublwCode, hi=True)
+ # UXTL -> alias to USHLL
+ # UZP1
+ uzpCode = '''
+ unsigned part = %s;
+ for (unsigned i = 0; i < eCount / 2; i++) {
+ destReg.elements[i] = srcReg1.elements[2 * i + part];
+ destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part];
+ }
+ '''
+ threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2,
+ uzpCode % "0")
+ threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4,
+ uzpCode % "0")
+ # UZP2
+ threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2,
+ uzpCode % "1")
+ threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4,
+ uzpCode % "1")
+ # XTN, XTN2
+ xtnCode = "destElem = srcElem1;"
+ twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode)
+ twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes,
+ xtnCode, hi=True)
+ # ZIP1
+ zipCode = '''
+ unsigned base = %s;
+ for (unsigned i = 0; i < eCount / 2; i++) {
+ destReg.elements[2 * i] = srcReg1.elements[base + i];
+ destReg.elements[2 * i + 1] = srcReg2.elements[base + i];
+ }
+ '''
+ threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2,
+ zipCode % "0")
+ threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4,
+ zipCode % "0")
+ # ZIP2
+ threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2,
+ zipCode % "eCount / 2")
+ threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4,
+ zipCode % "eCount / 2")
+
+}};
diff --git a/src/arch/arm/isa/insts/neon64_mem.isa b/src/arch/arm/isa/insts/neon64_mem.isa
new file mode 100644
index 000000000..32a37f87e
--- /dev/null
+++ b/src/arch/arm/isa/insts/neon64_mem.isa
@@ -0,0 +1,471 @@
+// -*- mode: c++ -*-
+
+// Copyright (c) 2012-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Mbou Eyole
+// Giacomo Gabrielli
+
+let {{
+
+ header_output = ''
+ decoder_output = ''
+ exec_output = ''
+
+ def mkMemAccMicroOp(name):
+ global header_output, decoder_output, exec_output
+ SPAlignmentCheckCodeNeon = '''
+ if (baseIsSP && bits(XURa, 3, 0) &&
+ SPAlignmentCheckEnabled(xc->tcBase())) {
+ return new SPAlignmentFault();
+ }
+ '''
+ eaCode = SPAlignmentCheckCodeNeon + '''
+ EA = XURa + imm;
+ '''
+ memDecl = '''
+ const int MaxNumBytes = 16;
+ union MemUnion {
+ uint8_t bytes[MaxNumBytes];
+ uint32_t floatRegBits[MaxNumBytes / 4];
+ };
+ '''
+
+ # Do endian conversion for all the elements
+ convCode = '''
+ VReg x = {0, 0};
+
+ x.lo = (((XReg) memUnion.floatRegBits[1]) << 32) |
+ (XReg) memUnion.floatRegBits[0];
+ x.hi = (((XReg) memUnion.floatRegBits[3]) << 32) |
+ (XReg) memUnion.floatRegBits[2];
+
+ const unsigned eCount = 16 / (1 << eSize);
+
+ if (isBigEndian64(xc->tcBase())) {
+ for (unsigned i = 0; i < eCount; i++) {
+ switch (eSize) {
+ case 0x3: // 64-bit
+ writeVecElem(&x, (XReg) gtobe(
+ (uint64_t) readVecElem(x, i, eSize)), i, eSize);
+ break;
+ case 0x2: // 32-bit
+ writeVecElem(&x, (XReg) gtobe(
+ (uint32_t) readVecElem(x, i, eSize)), i, eSize);
+ break;
+ case 0x1: // 16-bit
+ writeVecElem(&x, (XReg) gtobe(
+ (uint16_t) readVecElem(x, i, eSize)), i, eSize);
+ break;
+ default: // 8-bit
+ break; // Nothing to do here
+ }
+ }
+ } else {
+ for (unsigned i = 0; i < eCount; i++) {
+ switch (eSize) {
+ case 0x3: // 64-bit
+ writeVecElem(&x, (XReg) gtole(
+ (uint64_t) readVecElem(x, i, eSize)), i, eSize);
+ break;
+ case 0x2: // 32-bit
+ writeVecElem(&x, (XReg) gtole(
+ (uint32_t) readVecElem(x, i, eSize)), i, eSize);
+ break;
+ case 0x1: // 16-bit
+ writeVecElem(&x, (XReg) gtole(
+ (uint16_t) readVecElem(x, i, eSize)), i, eSize);
+ break;
+ default: // 8-bit
+ break; // Nothing to do here
+ }
+ }
+ }
+
+ memUnion.floatRegBits[0] = (uint32_t) x.lo;
+ memUnion.floatRegBits[1] = (uint32_t) (x.lo >> 32);
+ memUnion.floatRegBits[2] = (uint32_t) x.hi;
+ memUnion.floatRegBits[3] = (uint32_t) (x.hi >> 32);
+ '''
+
+ # Offload everything into registers
+ regSetCode = ''
+ for reg in range(4):
+ regSetCode += '''
+ AA64FpDestP%(reg)d_uw = gtoh(memUnion.floatRegBits[%(reg)d]);
+ ''' % { 'reg' : reg }
+
+ # Pull everything in from registers
+ regGetCode = ''
+ for reg in range(4):
+ regGetCode += '''
+ memUnion.floatRegBits[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+ ''' % { 'reg' : reg }
+
+ loadMemAccCode = convCode + regSetCode
+ storeMemAccCode = regGetCode + convCode
+
+ loadIop = InstObjParams(name + 'ld',
+ 'MicroNeonLoad64',
+ 'MicroNeonMemOp',
+ { 'mem_decl' : memDecl,
+ 'memacc_code' : loadMemAccCode,
+ 'ea_code' : simd64EnabledCheckCode + eaCode,
+ },
+ [ 'IsMicroop', 'IsMemRef', 'IsLoad' ])
+ storeIop = InstObjParams(name + 'st',
+ 'MicroNeonStore64',
+ 'MicroNeonMemOp',
+ { 'mem_decl' : memDecl,
+ 'memacc_code' : storeMemAccCode,
+ 'ea_code' : simd64EnabledCheckCode + eaCode,
+ },
+ [ 'IsMicroop', 'IsMemRef', 'IsStore' ])
+
+ exec_output += NeonLoadExecute64.subst(loadIop) + \
+ NeonLoadInitiateAcc64.subst(loadIop) + \
+ NeonLoadCompleteAcc64.subst(loadIop) + \
+ NeonStoreExecute64.subst(storeIop) + \
+ NeonStoreInitiateAcc64.subst(storeIop) + \
+ NeonStoreCompleteAcc64.subst(storeIop)
+ header_output += MicroNeonMemDeclare64.subst(loadIop) + \
+ MicroNeonMemDeclare64.subst(storeIop)
+
+ def mkMarshalMicroOp(name, Name):
+ global header_output, decoder_output, exec_output
+
+ getInputCodeOp1L = ''
+ for v in range(4):
+ for p in range(4):
+ getInputCodeOp1L += '''
+ writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)d_uw,
+ %(p)d, 0x2);
+ ''' % { 'v' : v, 'p' : p }
+
+ getInputCodeOp1S = ''
+ for v in range(4):
+ for p in range(4):
+ getInputCodeOp1S += '''
+ writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)dS_uw,
+ %(p)d, 0x2);
+ ''' % { 'v' : v, 'p' : p }
+
+ if name == 'deint_neon_uop':
+
+ eCode = '''
+ VReg input[4]; // input data from scratch area
+ VReg output[2]; // output data to arch. SIMD regs
+ VReg temp;
+ temp.lo = 0;
+ temp.hi = 0;
+ '''
+ for p in range(4):
+ eCode += '''
+ writeVecElem(&temp, (XReg) AA64FpDestP%(p)dV1L_uw, %(p)d, 0x2);
+ ''' % { 'p' : p }
+ eCode += getInputCodeOp1L
+
+ # Note that numRegs is not always the same as numStructElems; in
+ # particular, for LD1/ST1, numStructElems is 1 but numRegs can be
+ # 1, 2, 3 or 4
+
+ eCode += '''
+ output[0].lo = 0;
+ output[0].hi = 0;
+ output[1].lo = 0;
+ output[1].hi = 0;
+
+ int eCount = dataSize / (8 << eSize);
+ int eSizeBytes = 1 << eSize; // element size in bytes
+ int numBytes = step * dataSize / 4;
+ int totNumBytes = numRegs * dataSize / 8;
+
+ int structElemNo, pos, a, b;
+ XReg data;
+
+ for (int r = 0; r < 2; ++r) {
+ for (int i = 0; i < eCount; ++i) {
+ if (numBytes < totNumBytes) {
+ structElemNo = r + (step * 2);
+ if (numStructElems == 1) {
+ pos = (eSizeBytes * i) +
+ (eCount * structElemNo * eSizeBytes);
+ } else {
+ pos = (numStructElems * eSizeBytes * i) +
+ (structElemNo * eSizeBytes);
+ }
+ a = pos / 16;
+ b = (pos % 16) / eSizeBytes;
+ data = (XReg) readVecElem(input[a], (XReg) b,
+ eSize);
+ writeVecElem(&output[r], data, i, eSize);
+ numBytes += eSizeBytes;
+ }
+ }
+ }
+ '''
+ for p in range(4):
+ eCode += '''
+ AA64FpDestP%(p)dV0L_uw = (uint32_t) readVecElem(output[0],
+ %(p)d, 0x2);
+ ''' % { 'p' : p }
+ eCode += '''
+ if ((numRegs % 2 == 0) || (numRegs == 3 && step == 0)) {
+ '''
+ for p in range(4):
+ eCode += '''
+ AA64FpDestP%(p)dV1L_uw = (uint32_t) readVecElem(
+ output[1], %(p)d, 0x2);
+ ''' % { 'p' : p }
+ eCode += '''
+ } else {
+ '''
+ for p in range(4):
+ eCode += '''
+ AA64FpDestP%(p)dV1L_uw = (uint32_t) readVecElem(temp,
+ %(p)d, 0x2);
+ ''' % { 'p' : p }
+ eCode += '''
+ }
+ '''
+
+ iop = InstObjParams(name, Name, 'MicroNeonMixOp64',
+ { 'code' : eCode }, ['IsMicroop'])
+ header_output += MicroNeonMixDeclare64.subst(iop)
+ exec_output += MicroNeonMixExecute64.subst(iop)
+
+ elif name == 'int_neon_uop':
+
+ eCode = '''
+ VReg input[4]; // input data from arch. SIMD regs
+ VReg output[2]; // output data to scratch area
+ '''
+
+ eCode += getInputCodeOp1S
+
+ # Note that numRegs is not always the same as numStructElems; in
+ # particular, for LD1/ST1, numStructElems is 1 but numRegs can be
+ # 1, 2, 3 or 4
+
+ eCode += '''
+ int eCount = dataSize / (8 << eSize);
+ int eSizeBytes = 1 << eSize;
+ int totNumBytes = numRegs * dataSize / 8;
+ int numOutputElems = 128 / (8 << eSize);
+ int stepOffset = step * 32;
+
+ for (int i = 0; i < 2; ++i) {
+ output[i].lo = 0;
+ output[i].hi = 0;
+ }
+
+ int r = 0, k = 0, i, j;
+ XReg data;
+
+ for (int pos = stepOffset; pos < 32 + stepOffset;
+ pos += eSizeBytes) {
+ if (pos < totNumBytes) {
+ if (numStructElems == 1) {
+ i = (pos / eSizeBytes) % eCount;
+ j = pos / (eCount * eSizeBytes);
+ } else {
+ i = pos / (numStructElems * eSizeBytes);
+ j = (pos % (numStructElems * eSizeBytes)) /
+ eSizeBytes;
+ }
+ data = (XReg) readVecElem(input[j], (XReg) i, eSize);
+ writeVecElem(&output[r], data, k, eSize);
+ k++;
+ if (k == numOutputElems){
+ k = 0;
+ ++r;
+ }
+ }
+ }
+ '''
+ for v in range(2):
+ for p in range(4):
+ eCode += '''
+ AA64FpDestP%(p)dV%(v)d_uw = (uint32_t) readVecElem(
+ output[%(v)d], %(p)d, 0x2);
+ ''' % { 'v': v, 'p': p}
+
+ iop = InstObjParams(name, Name, 'MicroNeonMixOp64',
+ { 'code' : eCode }, ['IsMicroop'])
+ header_output += MicroNeonMixDeclare64.subst(iop)
+ exec_output += MicroNeonMixExecute64.subst(iop)
+
+ elif name == 'unpack_neon_uop':
+
+ eCode = '''
+ VReg input[4]; //input data from scratch area
+ VReg output[2]; //output data to arch. SIMD regs
+ '''
+
+ eCode += getInputCodeOp1L
+
+ # Fill output regs with register data initially. Note that
+ # elements in output register outside indexed lanes are left
+ # untouched
+ for v in range(2):
+ for p in range(4):
+ eCode += '''
+ writeVecElem(&output[%(v)d], (XReg) AA64FpDestP%(p)dV%(v)dL_uw,
+ %(p)d, 0x2);
+ ''' % { 'v': v, 'p': p}
+ eCode += '''
+ int eCount = dataSize / (8 << eSize);
+ int eCount128 = 128 / (8 << eSize);
+ int eSizeBytes = 1 << eSize;
+ int totNumBytes = numStructElems * eSizeBytes;
+ int numInputElems = eCount128;
+ int stepOffset = step * 2 * eSizeBytes;
+ int stepLimit = 2 * eSizeBytes;
+
+ int r = 0, i, j;
+ XReg data;
+
+ for (int pos = stepOffset; pos < stepLimit + stepOffset;
+ pos += eSizeBytes) {
+ if (pos < totNumBytes) {
+ r = pos / eSizeBytes;
+ j = r / numInputElems;
+ i = r % numInputElems;
+ data = (XReg) readVecElem(input[j], (XReg) i, eSize);
+
+ if (replicate) {
+ for (int i = 0; i < eCount128; ++i) {
+ if (i < eCount) {
+ writeVecElem(&output[r % 2], data, i,
+ eSize);
+ } else { // zero extend if necessary
+ writeVecElem(&output[r % 2], (XReg) 0, i,
+ eSize);
+ }
+ }
+ } else {
+ writeVecElem(&output[r % 2], data, lane, eSize);
+ }
+ }
+ }
+ '''
+ for v in range(2):
+ for p in range(4):
+ eCode += '''
+ AA64FpDestP%(p)dV%(v)dL_uw = (uint32_t) readVecElem(
+ output[%(v)d], %(p)d, 0x2);
+ ''' % { 'v' : v, 'p' : p }
+
+ iop = InstObjParams(name, Name, 'MicroNeonMixLaneOp64',
+ { 'code' : eCode }, ['IsMicroop'])
+ header_output += MicroNeonMixLaneDeclare64.subst(iop)
+ exec_output += MicroNeonMixExecute64.subst(iop)
+
+ elif name == 'pack_neon_uop':
+
+ eCode = '''
+ VReg input[4]; // input data from arch. SIMD regs
+ VReg output[2]; // output data to scratch area
+ '''
+
+ eCode += getInputCodeOp1S
+
+ eCode += '''
+ int eSizeBytes = 1 << eSize;
+ int numOutputElems = 128 / (8 << eSize);
+ int totNumBytes = numStructElems * eSizeBytes;
+ int stepOffset = step * 32;
+ int stepLimit = 32;
+
+ int r = 0, i, j;
+ XReg data;
+
+ for (int i = 0; i < 2; ++i) {
+ output[i].lo = 0;
+ output[i].hi = 0;
+ }
+
+ for (int pos = stepOffset; pos < stepLimit + stepOffset;
+ pos += eSizeBytes) {
+ if (pos < totNumBytes) {
+ r = pos / 16;
+ j = pos / eSizeBytes;
+ i = (pos / eSizeBytes) % numOutputElems;
+ data = (XReg) readVecElem(input[j], lane, eSize);
+ writeVecElem(&output[r % 2], data, i, eSize);
+ }
+ }
+ '''
+
+ for v in range(2):
+ for p in range(4):
+ eCode += '''
+ AA64FpDestP%(p)dV%(v)d_uw = (uint32_t) readVecElem(
+ output[%(v)d], %(p)d, 0x2);
+ ''' % { 'v' : v, 'p' : p }
+
+ iop = InstObjParams(name, Name, 'MicroNeonMixLaneOp64',
+ { 'code' : eCode }, ['IsMicroop'])
+ header_output += MicroNeonMixLaneDeclare64.subst(iop)
+ exec_output += MicroNeonMixExecute64.subst(iop)
+
+ # Generate instructions
+ mkMemAccMicroOp('mem_neon_uop')
+ mkMarshalMicroOp('deint_neon_uop', 'MicroDeintNeon64')
+ mkMarshalMicroOp('int_neon_uop', 'MicroIntNeon64')
+ mkMarshalMicroOp('unpack_neon_uop', 'MicroUnpackNeon64')
+ mkMarshalMicroOp('pack_neon_uop', 'MicroPackNeon64')
+
+}};
+
+let {{
+
+ iop = InstObjParams('vldmult64', 'VldMult64', 'VldMultOp64', '', [])
+ header_output += VMemMultDeclare64.subst(iop)
+ decoder_output += VMemMultConstructor64.subst(iop)
+
+ iop = InstObjParams('vstmult64', 'VstMult64', 'VstMultOp64', '', [])
+ header_output += VMemMultDeclare64.subst(iop)
+ decoder_output += VMemMultConstructor64.subst(iop)
+
+ iop = InstObjParams('vldsingle64', 'VldSingle64', 'VldSingleOp64', '', [])
+ header_output += VMemSingleDeclare64.subst(iop)
+ decoder_output += VMemSingleConstructor64.subst(iop)
+
+ iop = InstObjParams('vstsingle64', 'VstSingle64', 'VstSingleOp64', '', [])
+ header_output += VMemSingleDeclare64.subst(iop)
+ decoder_output += VMemSingleConstructor64.subst(iop)
+
+}};
diff --git a/src/arch/arm/isa/insts/str.isa b/src/arch/arm/isa/insts/str.isa
index 80846053b..3f595692a 100644
--- a/src/arch/arm/isa/insts/str.isa
+++ b/src/arch/arm/isa/insts/str.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -38,6 +38,7 @@
// Authors: Gabe Black
let {{
+ import math
header_output = ""
decoder_output = ""
@@ -77,7 +78,9 @@ let {{
(newHeader,
newDecoder,
newExec) = self.fillTemplates(self.name, self.Name, codeBlobs,
- self.memFlags, self.instFlags, base, wbDecl)
+ self.memFlags, self.instFlags,
+ base, wbDecl, None, False,
+ self.size, self.sign)
header_output += newHeader
decoder_output += newDecoder
@@ -171,7 +174,7 @@ let {{
self.size, self.sign, self.user)
# Add memory request flags where necessary
- self.memFlags.append("%d" % (self.size - 1))
+ self.memFlags.append("%d" % int(math.log(self.size, 2)))
if self.user:
self.memFlags.append("ArmISA::TLB::UserMode")
diff --git a/src/arch/arm/isa/insts/str64.isa b/src/arch/arm/isa/insts/str64.isa
new file mode 100644
index 000000000..c15dca16e
--- /dev/null
+++ b/src/arch/arm/isa/insts/str64.isa
@@ -0,0 +1,372 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+let {{
+
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ class StoreInst64(LoadStoreInst):
+ execBase = 'Store64'
+ micro = False
+
+ def __init__(self, mnem, Name, size=4, user=False, flavor="normal",
+ top = False):
+ super(StoreInst64, self).__init__()
+
+ self.name = mnem
+ self.Name = Name
+ self.size = size
+ self.user = user
+ self.flavor = flavor
+ self.top = top
+
+ self.memFlags = ["ArmISA::TLB::MustBeOne"]
+ self.instFlags = []
+ self.codeBlobs = { "postacc_code" : "" }
+
+ # Add memory request flags where necessary
+ if self.user:
+ self.memFlags.append("ArmISA::TLB::UserMode")
+
+ if self.flavor in ("relexp", "exp"):
+ # For exclusive pair ops alignment check is based on total size
+ self.memFlags.append("%d" % int(math.log(self.size, 2) + 1))
+ elif not (self.size == 16 and self.top):
+ # Only the first microop should perform alignment checking.
+ self.memFlags.append("%d" % int(math.log(self.size, 2)))
+
+ if self.flavor not in ("release", "relex", "exclusive",
+ "relexp", "exp"):
+ self.memFlags.append("ArmISA::TLB::AllowUnaligned")
+
+ if self.micro:
+ self.instFlags.append("IsMicroop")
+
+ if self.flavor in ("release", "relex", "relexp"):
+ self.instFlags.extend(["IsMemBarrier",
+ "IsWriteBarrier",
+ "IsReadBarrier"])
+ if self.flavor in ("relex", "exclusive", "exp", "relexp"):
+ self.instFlags.append("IsStoreConditional")
+ self.memFlags.append("Request::LLSC")
+
+ def emitHelper(self, base = 'Memory64', wbDecl = None):
+ global header_output, decoder_output, exec_output
+
+ # If this is a microop itself, don't allow anything that would
+ # require further microcoding.
+ if self.micro:
+ assert not wbDecl
+
+ fa_code = None
+ if not self.micro and self.flavor in ("normal", "release"):
+ fa_code = '''
+ fault->annotate(ArmFault::SAS, %s);
+ fault->annotate(ArmFault::SSE, false);
+ fault->annotate(ArmFault::SRT, dest);
+ fault->annotate(ArmFault::SF, %s);
+ fault->annotate(ArmFault::AR, %s);
+ ''' % ("0" if self.size == 1 else
+ "1" if self.size == 2 else
+ "2" if self.size == 4 else "3",
+ "true" if self.size == 8 else "false",
+ "true" if self.flavor == "release" else "false")
+
+ (newHeader, newDecoder, newExec) = \
+ self.fillTemplates(self.name, self.Name, self.codeBlobs,
+ self.memFlags, self.instFlags,
+ base, wbDecl, faCode=fa_code)
+
+ header_output += newHeader
+ decoder_output += newDecoder
+ exec_output += newExec
+
+ def buildEACode(self):
+ # Address computation
+ eaCode = ""
+ if self.flavor == "fp":
+ eaCode += vfp64EnabledCheckCode
+
+ eaCode += SPAlignmentCheckCode + "EA = XBase"
+ if self.size == 16:
+ if self.top:
+ eaCode += " + (isBigEndian64(xc->tcBase()) ? 0 : 8)"
+ else:
+ eaCode += " + (isBigEndian64(xc->tcBase()) ? 8 : 0)"
+ if not self.post:
+ eaCode += self.offset
+ eaCode += ";"
+
+ self.codeBlobs["ea_code"] = eaCode
+
+
+ class StoreImmInst64(StoreInst64):
+ def __init__(self, *args, **kargs):
+ super(StoreImmInst64, self).__init__(*args, **kargs)
+ self.offset = "+ imm"
+
+ self.wbDecl = "MicroAddXiUop(machInst, base, base, imm);"
+
+ class StoreRegInst64(StoreInst64):
+ def __init__(self, *args, **kargs):
+ super(StoreRegInst64, self).__init__(*args, **kargs)
+ self.offset = "+ extendReg64(XOffset, type, shiftAmt, 64)"
+
+ self.wbDecl = \
+ "MicroAddXERegUop(machInst, base, base, " + \
+ " offset, type, shiftAmt);"
+
+ class StoreRawRegInst64(StoreInst64):
+ def __init__(self, *args, **kargs):
+ super(StoreRawRegInst64, self).__init__(*args, **kargs)
+ self.offset = ""
+
+ class StoreSingle64(StoreInst64):
+ def emit(self):
+ self.buildEACode()
+
+ # Code that actually handles the access
+ if self.flavor == "fp":
+ if self.size in (1, 2, 4):
+ accCode = '''
+ Mem%(suffix)s =
+ cSwap(AA64FpDestP0%(suffix)s, isBigEndian64(xc->tcBase()));
+ '''
+ elif self.size == 8 or (self.size == 16 and not self.top):
+ accCode = '''
+ uint64_t data = AA64FpDestP1_uw;
+ data = (data << 32) | AA64FpDestP0_uw;
+ Mem%(suffix)s = cSwap(data, isBigEndian64(xc->tcBase()));
+ '''
+ elif self.size == 16 and self.top:
+ accCode = '''
+ uint64_t data = AA64FpDestP3_uw;
+ data = (data << 32) | AA64FpDestP2_uw;
+ Mem%(suffix)s = cSwap(data, isBigEndian64(xc->tcBase()));
+ '''
+ else:
+ accCode = \
+ 'Mem%(suffix)s = cSwap(XDest%(suffix)s, isBigEndian64(xc->tcBase()));'
+ if self.size == 16:
+ accCode = accCode % \
+ { "suffix" : buildMemSuffix(False, 8) }
+ else:
+ accCode = accCode % \
+ { "suffix" : buildMemSuffix(False, self.size) }
+
+ self.codeBlobs["memacc_code"] = accCode
+
+ if self.flavor in ("relex", "exclusive"):
+ self.instFlags.append("IsStoreConditional")
+ self.memFlags.append("Request::LLSC")
+
+ # Push it out to the output files
+ wbDecl = None
+ if self.writeback and not self.micro:
+ wbDecl = self.wbDecl
+ self.emitHelper(self.base, wbDecl)
+
+ class StoreDouble64(StoreInst64):
+ def emit(self):
+ self.buildEACode()
+
+ # Code that actually handles the access
+ if self.flavor == "fp":
+ accCode = '''
+ uint64_t data = AA64FpDest2P0_uw;
+ data = (data << 32) | AA64FpDestP0_uw;
+ Mem_ud = cSwap(data, isBigEndian64(xc->tcBase()));
+ '''
+ else:
+ if self.size == 4:
+ accCode = '''
+ uint64_t data = XDest2_uw;
+ data = (data << 32) | XDest_uw;
+ Mem_ud = cSwap(data, isBigEndian64(xc->tcBase()));
+ '''
+ elif self.size == 8:
+ accCode = '''
+ // This temporary needs to be here so that the parser
+ // will correctly identify this instruction as a store.
+ Twin64_t temp;
+ temp.a = XDest_ud;
+ temp.b = XDest2_ud;
+ Mem_tud = temp;
+ '''
+ self.codeBlobs["memacc_code"] = accCode
+
+ # Push it out to the output files
+ wbDecl = None
+ if self.writeback and not self.micro:
+ wbDecl = self.wbDecl
+ self.emitHelper(self.base, wbDecl)
+
+ class StoreImm64(StoreImmInst64, StoreSingle64):
+ decConstBase = 'LoadStoreImm64'
+ base = 'ArmISA::MemoryImm64'
+ writeback = False
+ post = False
+
+ class StorePre64(StoreImmInst64, StoreSingle64):
+ decConstBase = 'LoadStoreImm64'
+ base = 'ArmISA::MemoryPreIndex64'
+ writeback = True
+ post = False
+
+ class StorePost64(StoreImmInst64, StoreSingle64):
+ decConstBase = 'LoadStoreImm64'
+ base = 'ArmISA::MemoryPostIndex64'
+ writeback = True
+ post = True
+
+ class StoreReg64(StoreRegInst64, StoreSingle64):
+ decConstBase = 'LoadStoreReg64'
+ base = 'ArmISA::MemoryReg64'
+ writeback = False
+ post = False
+
+ class StoreRaw64(StoreRawRegInst64, StoreSingle64):
+ decConstBase = 'LoadStoreRaw64'
+ base = 'ArmISA::MemoryRaw64'
+ writeback = False
+ post = False
+
+ class StoreEx64(StoreRawRegInst64, StoreSingle64):
+ decConstBase = 'LoadStoreEx64'
+ base = 'ArmISA::MemoryEx64'
+ writeback = False
+ post = False
+ execBase = 'StoreEx64'
+ def __init__(self, *args, **kargs):
+ super(StoreEx64, self).__init__(*args, **kargs)
+ self.codeBlobs["postacc_code"] = "XResult = !writeResult;"
+
+ def buildStores64(mnem, NameBase, size, flavor="normal"):
+ StoreImm64(mnem, NameBase + "_IMM", size, flavor=flavor).emit()
+ StorePre64(mnem, NameBase + "_PRE", size, flavor=flavor).emit()
+ StorePost64(mnem, NameBase + "_POST", size, flavor=flavor).emit()
+ StoreReg64(mnem, NameBase + "_REG", size, flavor=flavor).emit()
+
+ buildStores64("strb", "STRB64", 1)
+ buildStores64("strh", "STRH64", 2)
+ buildStores64("str", "STRW64", 4)
+ buildStores64("str", "STRX64", 8)
+ buildStores64("str", "STRBFP64", 1, flavor="fp")
+ buildStores64("str", "STRHFP64", 2, flavor="fp")
+ buildStores64("str", "STRSFP64", 4, flavor="fp")
+ buildStores64("str", "STRDFP64", 8, flavor="fp")
+
+ StoreImm64("sturb", "STURB64_IMM", 1).emit()
+ StoreImm64("sturh", "STURH64_IMM", 2).emit()
+ StoreImm64("stur", "STURW64_IMM", 4).emit()
+ StoreImm64("stur", "STURX64_IMM", 8).emit()
+ StoreImm64("stur", "STURBFP64_IMM", 1, flavor="fp").emit()
+ StoreImm64("stur", "STURHFP64_IMM", 2, flavor="fp").emit()
+ StoreImm64("stur", "STURSFP64_IMM", 4, flavor="fp").emit()
+ StoreImm64("stur", "STURDFP64_IMM", 8, flavor="fp").emit()
+
+ StoreImm64("sttrb", "STTRB64_IMM", 1, user=True).emit()
+ StoreImm64("sttrh", "STTRH64_IMM", 2, user=True).emit()
+ StoreImm64("sttr", "STTRW64_IMM", 4, user=True).emit()
+ StoreImm64("sttr", "STTRX64_IMM", 8, user=True).emit()
+
+ StoreRaw64("stlr", "STLRX64", 8, flavor="release").emit()
+ StoreRaw64("stlr", "STLRW64", 4, flavor="release").emit()
+ StoreRaw64("stlrh", "STLRH64", 2, flavor="release").emit()
+ StoreRaw64("stlrb", "STLRB64", 1, flavor="release").emit()
+
+ StoreEx64("stlxr", "STLXRX64", 8, flavor="relex").emit()
+ StoreEx64("stlxr", "STLXRW64", 4, flavor="relex").emit()
+ StoreEx64("stlxrh", "STLXRH64", 2, flavor="relex").emit()
+ StoreEx64("stlxrb", "STLXRB64", 1, flavor="relex").emit()
+
+ StoreEx64("stxr", "STXRX64", 8, flavor="exclusive").emit()
+ StoreEx64("stxr", "STXRW64", 4, flavor="exclusive").emit()
+ StoreEx64("stxrh", "STXRH64", 2, flavor="exclusive").emit()
+ StoreEx64("stxrb", "STXRB64", 1, flavor="exclusive").emit()
+
+ class StoreImmU64(StoreImm64):
+ decConstBase = 'LoadStoreImmU64'
+ micro = True
+
+ class StoreImmDU64(StoreImmInst64, StoreDouble64):
+ decConstBase = 'LoadStoreImmDU64'
+ base = 'ArmISA::MemoryDImm64'
+ micro = True
+ post = False
+ writeback = False
+
+ class StoreImmDEx64(StoreImmInst64, StoreDouble64):
+ execBase = 'StoreEx64'
+ decConstBase = 'StoreImmDEx64'
+ base = 'ArmISA::MemoryDImmEx64'
+ micro = False
+ post = False
+ writeback = False
+ def __init__(self, *args, **kargs):
+ super(StoreImmDEx64, self).__init__(*args, **kargs)
+ self.codeBlobs["postacc_code"] = "XResult = !writeResult;"
+
+ class StoreRegU64(StoreReg64):
+ decConstBase = 'LoadStoreRegU64'
+ micro = True
+
+ StoreImmDEx64("stlxp", "STLXPW64", 4, flavor="relexp").emit()
+ StoreImmDEx64("stlxp", "STLXPX64", 8, flavor="relexp").emit()
+ StoreImmDEx64("stxp", "STXPW64", 4, flavor="exp").emit()
+ StoreImmDEx64("stxp", "STXPX64", 8, flavor="exp").emit()
+
+ StoreImmU64("strxi_uop", "MicroStrXImmUop", 8).emit()
+ StoreRegU64("strxr_uop", "MicroStrXRegUop", 8).emit()
+ StoreImmU64("strfpxi_uop", "MicroStrFpXImmUop", 8, flavor="fp").emit()
+ StoreRegU64("strfpxr_uop", "MicroStrFpXRegUop", 8, flavor="fp").emit()
+ StoreImmU64("strqbfpxi_uop", "MicroStrQBFpXImmUop",
+ 16, flavor="fp", top=False).emit()
+ StoreRegU64("strqbfpxr_uop", "MicroStrQBFpXRegUop",
+ 16, flavor="fp", top=False).emit()
+ StoreImmU64("strqtfpxi_uop", "MicroStrQTFpXImmUop",
+ 16, flavor="fp", top=True).emit()
+ StoreRegU64("strqtfpxr_uop", "MicroStrQTFpXRegUop",
+ 16, flavor="fp", top=True).emit()
+ StoreImmDU64("strdxi_uop", "MicroStrDXImmUop", 4).emit()
+ StoreImmDU64("strdfpxi_uop", "MicroStrDFpXImmUop", 4, flavor="fp").emit()
+
+}};
diff --git a/src/arch/arm/isa/insts/swap.isa b/src/arch/arm/isa/insts/swap.isa
index b42a1c4b2..f2ceed28e 100644
--- a/src/arch/arm/isa/insts/swap.isa
+++ b/src/arch/arm/isa/insts/swap.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -73,10 +73,7 @@ let {{
swpPreAccCode = '''
if (!((SCTLR)Sctlr).sw) {
- if (FullSystem)
- return new UndefinedInstruction;
- else
- return new UndefinedInstruction(false, mnemonic);
+ return new UndefinedInstruction(machInst, false, mnemonic);
}
'''