diff options
author | ARM gem5 Developers <none@none> | 2014-01-24 15:29:34 -0600 |
---|---|---|
committer | ARM gem5 Developers <none@none> | 2014-01-24 15:29:34 -0600 |
commit | 612f8f074fa1099cf70faf495d46cc647762a031 (patch) | |
tree | bd1e99c43bf15292395eadd4b7ae3f5c823545c3 /src/arch/arm/isa | |
parent | f3585c841e964c98911784a187fc4f081a02a0a6 (diff) | |
download | gem5-612f8f074fa1099cf70faf495d46cc647762a031.tar.xz |
arm: Add support for ARMv8 (AArch64 & AArch32)
Note: AArch64 and AArch32 interworking is not supported. If you use an AArch64
kernel you are restricted to AArch64 user-mode binaries. This will be addressed
in a later patch.
Note: Virtualization is only supported in AArch32 mode. This will also be fixed
in a later patch.
Contributors:
Giacomo Gabrielli (TrustZone, LPAE, system-level AArch64, AArch64 NEON, validation)
Thomas Grocutt (AArch32 Virtualization, AArch64 FP, validation)
Mbou Eyole (AArch64 NEON, validation)
Ali Saidi (AArch64 Linux support, code integration, validation)
Edmund Grimley-Evans (AArch64 FP)
William Wang (AArch64 Linux support)
Rene De Jong (AArch64 Linux support, performance opt.)
Matt Horsnell (AArch64 MP, validation)
Matt Evans (device models, code integration, validation)
Chris Adeniyi-Jones (AArch64 syscall-emulation)
Prakash Ramrakhyani (validation)
Dam Sunwoo (validation)
Chander Sudanthi (validation)
Stephan Diestelhorst (validation)
Andreas Hansson (code integration, performance opt.)
Eric Van Hensbergen (performance opt.)
Gabe Black
Diffstat (limited to 'src/arch/arm/isa')
51 files changed, 15251 insertions, 399 deletions
diff --git a/src/arch/arm/isa/bitfields.isa b/src/arch/arm/isa/bitfields.isa index 5a8b5db6d..6006cfb2d 100644 --- a/src/arch/arm/isa/bitfields.isa +++ b/src/arch/arm/isa/bitfields.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -73,6 +73,7 @@ def bitfield SEVEN_AND_FOUR sevenAndFour; def bitfield THUMB thumb; def bitfield BIGTHUMB bigThumb; +def bitfield AARCH64 aarch64; // Other def bitfield COND_CODE condCode; diff --git a/src/arch/arm/isa/decoder/aarch64.isa b/src/arch/arm/isa/decoder/aarch64.isa new file mode 100644 index 000000000..a6c0fa2df --- /dev/null +++ b/src/arch/arm/isa/decoder/aarch64.isa @@ -0,0 +1,48 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// The 64 bit ARM decoder +// -------------------------- +// + + +Aarch64::aarch64(); + diff --git a/src/arch/arm/isa/decoder/arm.isa b/src/arch/arm/isa/decoder/arm.isa index 4bd9d5cf4..f0c0dec18 100644 --- a/src/arch/arm/isa/decoder/arm.isa +++ b/src/arch/arm/isa/decoder/arm.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -73,7 +73,11 @@ format DataOp { 0x9: ArmBlxReg::armBlxReg(); } 0x5: ArmSatAddSub::armSatAddSub(); - 0x7: Breakpoint::bkpt(); + 0x6: ArmERet::armERet(); + 0x7: decode OPCODE_22 { + 0: Breakpoint::bkpt(); + 1: ArmSmcHyp::armSmcHyp(); + } } 0x1: ArmHalfWordMultAndMultAcc::armHalfWordMultAndMultAcc(); } @@ -105,6 +109,10 @@ format DataOp { } 0x6: decode CPNUM { 0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStore(); + 0xf: decode OPCODE_20 { + 0: Mcrr15::Mcrr15(); + 1: Mrrc15::Mrrc15(); + } } 0x7: decode OPCODE_24 { 0: decode OPCODE_4 { diff --git a/src/arch/arm/isa/decoder/decoder.isa b/src/arch/arm/isa/decoder/decoder.isa index cf7d17871..94685b943 100644 --- a/src/arch/arm/isa/decoder/decoder.isa +++ b/src/arch/arm/isa/decoder/decoder.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -41,8 +41,12 @@ // Authors: Gabe Black decode THUMB default Unknown::unknown() { -0: -##include "arm.isa" +0: decode AARCH64 { + 0: + ##include "arm.isa" + 1: + ##include "aarch64.isa" +} 1: ##include "thumb.isa" } diff --git a/src/arch/arm/isa/decoder/thumb.isa b/src/arch/arm/isa/decoder/thumb.isa index f54cc728d..31495793e 100644 --- a/src/arch/arm/isa/decoder/thumb.isa +++ b/src/arch/arm/isa/decoder/thumb.isa @@ -95,8 +95,14 @@ decode BIGTHUMB { 0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStre(); 0xf: decode HTOPCODE_9_4 { 0x00: Unknown::undefined(); - 0x04: WarnUnimpl::mcrr(); // mcrr2 - 0x05: WarnUnimpl::mrrc(); // mrrc2 + 0x04: decode LTCOPROC { + 0xf: Mcrr15::Mcrr15(); + default: WarnUnimpl::mcrr(); // mcrr2 + } + 0x05: decode LTCOPROC { + 0xf: Mrrc15::Mrrc15(); + default: WarnUnimpl::mrrc(); // mrrc2 + } 0x02, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e: WarnUnimpl::stc(); // stc2 diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa new file mode 100644 index 000000000..3ed70ce81 --- /dev/null +++ b/src/arch/arm/isa/formats/aarch64.isa @@ -0,0 +1,2035 @@ +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black +// Thomas Grocutt +// Mbou Eyole +// Giacomo Gabrielli + +output header {{ +namespace Aarch64 +{ + StaticInstPtr decodeDataProcImm(ExtMachInst machInst); + StaticInstPtr decodeBranchExcSys(ExtMachInst machInst); + StaticInstPtr decodeLoadsStores(ExtMachInst machInst); + StaticInstPtr decodeDataProcReg(ExtMachInst machInst); + + StaticInstPtr decodeFpAdvSIMD(ExtMachInst machInst); + StaticInstPtr decodeFp(ExtMachInst machInst); + StaticInstPtr decodeAdvSIMD(ExtMachInst machInst); + StaticInstPtr decodeAdvSIMDScalar(ExtMachInst machInst); + + StaticInstPtr decodeGem5Ops(ExtMachInst machInst); +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeDataProcImm(ExtMachInst machInst) + { + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rdsp = makeSP(rd); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + + uint8_t opc = bits(machInst, 30, 29); + bool sf = bits(machInst, 31); + bool n = bits(machInst, 22); + uint8_t immr = bits(machInst, 21, 16); + uint8_t imms = bits(machInst, 15, 10); + switch (bits(machInst, 25, 23)) { + case 0x0: + case 0x1: + { + uint64_t immlo = bits(machInst, 30, 29); + uint64_t immhi = bits(machInst, 23, 5); + uint64_t imm = (immlo << 0) | (immhi << 2); + if (bits(machInst, 31) == 0) + return new AdrXImm(machInst, rd, INTREG_ZERO, sext<21>(imm)); + else + return new AdrpXImm(machInst, rd, INTREG_ZERO, + sext<33>(imm << 12)); + } + case 0x2: + case 0x3: + { + uint32_t imm12 = bits(machInst, 21, 10); + uint8_t shift = bits(machInst, 23, 22); + uint32_t imm; + if (shift == 0x0) + imm = imm12 << 0; + else if (shift == 0x1) + imm = imm12 << 12; + else + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new AddXImm(machInst, rdsp, rnsp, imm); + case 0x1: + return new AddXImmCc(machInst, rd, rnsp, imm); + case 0x2: + return new SubXImm(machInst, rdsp, rnsp, imm); + case 0x3: + return new SubXImmCc(machInst, rd, rnsp, imm); + } + } + case 0x4: + { + if (!sf && n) + return new Unknown64(machInst); + // len = MSB(n:NOT(imms)), len < 1 is undefined. + uint8_t len = 0; + if (n) { + len = 6; + } else if (imms == 0x3f || imms == 0x3e) { + return new Unknown64(machInst); + } else { + len = findMsbSet(imms ^ 0x3f); + } + // Generate r, s, and size. + uint64_t r = bits(immr, len - 1, 0); + uint64_t s = bits(imms, len - 1, 0); + uint8_t size = 1 << len; + if (s == size - 1) + return new Unknown64(machInst); + // Generate the pattern with s 1s, rotated by r, with size bits. + uint64_t pattern = mask(s + 1); + if (r) { + pattern = (pattern >> r) | (pattern << (size - r)); + pattern &= mask(size); + } + uint8_t width = sf ? 64 : 32; + // Replicate that to fill up the immediate. + for (unsigned i = 1; i < (width / size); i *= 2) + pattern |= (pattern << (i * size)); + uint64_t imm = pattern; + + switch (opc) { + case 0x0: + return new AndXImm(machInst, rdsp, rn, imm); + case 0x1: + return new OrrXImm(machInst, rdsp, rn, imm); + case 0x2: + return new EorXImm(machInst, rdsp, rn, imm); + case 0x3: + return new AndXImmCc(machInst, rd, rn, imm); + } + } + case 0x5: + { + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + uint32_t imm16 = bits(machInst, 20, 5); + uint32_t hw = bits(machInst, 22, 21); + switch (opc) { + case 0x0: + return new Movn(machInst, rd, imm16, hw * 16); + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new Movz(machInst, rd, imm16, hw * 16); + case 0x3: + return new Movk(machInst, rd, imm16, hw * 16); + } + } + case 0x6: + if ((sf != n) || (!sf && (bits(immr, 5) || bits(imms, 5)))) + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new Sbfm64(machInst, rd, rn, immr, imms); + case 0x1: + return new Bfm64(machInst, rd, rn, immr, imms); + case 0x2: + return new Ubfm64(machInst, rd, rn, immr, imms); + case 0x3: + return new Unknown64(machInst); + } + case 0x7: + { + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + if (opc || bits(machInst, 21)) + return new Unknown64(machInst); + else + return new Extr64(machInst, rd, rn, rm, imms); + } + } + return new FailUnimplemented("Unhandled Case8", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeBranchExcSys(ExtMachInst machInst) + { + switch (bits(machInst, 30, 29)) { + case 0x0: + { + int64_t imm = sext<26>(bits(machInst, 25, 0)) << 2; + if (bits(machInst, 31) == 0) + return new B64(machInst, imm); + else + return new Bl64(machInst, imm); + } + case 0x1: + { + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + if (bits(machInst, 25) == 0) { + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + if (bits(machInst, 24) == 0) + return new Cbz64(machInst, imm, rt); + else + return new Cbnz64(machInst, imm, rt); + } else { + uint64_t bitmask = 0x1; + bitmask <<= bits(machInst, 23, 19); + int64_t imm = sext<14>(bits(machInst, 18, 5)) << 2; + if (bits(machInst, 31)) + bitmask <<= 32; + if (bits(machInst, 24) == 0) + return new Tbz64(machInst, bitmask, imm, rt); + else + return new Tbnz64(machInst, bitmask, imm, rt); + } + } + case 0x2: + // bit 30:26=10101 + if (bits(machInst, 31) == 0) { + if (bits(machInst, 25, 24) || bits(machInst, 4)) + return new Unknown64(machInst); + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + ConditionCode condCode = + (ConditionCode)(uint8_t)(bits(machInst, 3, 0)); + return new BCond64(machInst, imm, condCode); + } else if (bits(machInst, 25, 24) == 0x0) { + if (bits(machInst, 4, 2)) + return new Unknown64(machInst); + uint8_t decVal = (bits(machInst, 1, 0) << 0) | + (bits(machInst, 23, 21) << 2); + switch (decVal) { + case 0x01: + return new Svc64(machInst); + case 0x02: + return new FailUnimplemented("hvc", machInst); + case 0x03: + return new Smc64(machInst); + case 0x04: + return new FailUnimplemented("brk", machInst); + case 0x08: + return new FailUnimplemented("hlt", machInst); + case 0x15: + return new FailUnimplemented("dcps1", machInst); + case 0x16: + return new FailUnimplemented("dcps2", machInst); + case 0x17: + return new FailUnimplemented("dcps3", machInst); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 25, 22) == 0x4) { + // bit 31:22=1101010100 + bool l = bits(machInst, 21); + uint8_t op0 = bits(machInst, 20, 19); + uint8_t op1 = bits(machInst, 18, 16); + uint8_t crn = bits(machInst, 15, 12); + uint8_t crm = bits(machInst, 11, 8); + uint8_t op2 = bits(machInst, 7, 5); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + switch (op0) { + case 0x0: + if (rt != 0x1f || l) + return new Unknown64(machInst); + if (crn == 0x2 && op1 == 0x3) { + switch (op2) { + case 0x0: + return new NopInst(machInst); + case 0x1: + return new YieldInst(machInst); + case 0x2: + return new WfeInst(machInst); + case 0x3: + return new WfiInst(machInst); + case 0x4: + return new SevInst(machInst); + case 0x5: + return new SevlInst(machInst); + default: + return new Unknown64(machInst); + } + } else if (crn == 0x3 && op1 == 0x3) { + switch (op2) { + case 0x2: + return new Clrex64(machInst); + case 0x4: + return new Dsb64(machInst); + case 0x5: + return new Dmb64(machInst); + case 0x6: + return new Isb64(machInst); + default: + return new Unknown64(machInst); + } + } else if (crn == 0x4) { + // MSR immediate + switch (op1 << 3 | op2) { + case 0x5: + // SP + return new MsrSP64(machInst, + (IntRegIndex) MISCREG_SPSEL, + INTREG_ZERO, + crm & 0x1); + case 0x1e: + // DAIFSet + return new MsrDAIFSet64( + machInst, + (IntRegIndex) MISCREG_DAIF, + INTREG_ZERO, + crm); + case 0x1f: + // DAIFClr + return new MsrDAIFClr64( + machInst, + (IntRegIndex) MISCREG_DAIF, + INTREG_ZERO, + crm); + default: + return new Unknown64(machInst); + } + } else { + return new Unknown64(machInst); + } + break; + case 0x1: + case 0x2: + case 0x3: + { + // bit 31:22=1101010100, 20:19=11 + bool read = l; + MiscRegIndex miscReg = + decodeAArch64SysReg(op0, op1, crn, crm, op2); + if (read) { + if ((miscReg == MISCREG_DC_CIVAC_Xt) || + (miscReg == MISCREG_DC_CVAC_Xt) || + (miscReg == MISCREG_DC_ZVA_Xt)) { + return new Unknown64(machInst); + } + } + // Check for invalid registers + if (miscReg == MISCREG_UNKNOWN) { + return new Unknown64(machInst); + } else if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + if (miscReg == MISCREG_NZCV) { + if (read) + return new MrsNZCV64(machInst, rt, (IntRegIndex) miscReg); + else + return new MsrNZCV64(machInst, (IntRegIndex) miscReg, rt); + } + uint32_t iss = msrMrs64IssBuild(read, op0, op1, crn, crm, op2, rt); + if (miscReg == MISCREG_DC_ZVA_Xt && !read) + return new Dczva(machInst, rt, (IntRegIndex) miscReg, iss); + + if (read) + return new Mrs64(machInst, rt, (IntRegIndex) miscReg, iss); + else + return new Msr64(machInst, (IntRegIndex) miscReg, rt, iss); + } else if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + read ? "mrs" : "msr", miscRegName[miscReg]); + return new WarnUnimplemented(read ? "mrs" : "msr", + machInst, full_mnem); + } else { + return new FailUnimplemented(csprintf("%s %s", + read ? "mrs" : "msr", miscRegName[miscReg]).c_str(), + machInst); + } + } + break; + } + } else if (bits(machInst, 25) == 0x1) { + uint8_t opc = bits(machInst, 24, 21); + uint8_t op2 = bits(machInst, 20, 16); + uint8_t op3 = bits(machInst, 15, 10); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + uint8_t op4 = bits(machInst, 4, 0); + if (op2 != 0x1f || op3 != 0x0 || op4 != 0x0) + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new Br64(machInst, rn); + case 0x1: + return new Blr64(machInst, rn); + case 0x2: + return new Ret64(machInst, rn); + case 0x4: + if (rn != 0x1f) + return new Unknown64(machInst); + return new Eret64(machInst); + case 0x5: + if (rn != 0x1f) + return new Unknown64(machInst); + return new FailUnimplemented("dret", machInst); + } + } + default: + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case7", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeLoadsStores(ExtMachInst machInst) + { + // bit 27,25=10 + switch (bits(machInst, 29, 28)) { + case 0x0: + if (bits(machInst, 26) == 0) { + if (bits(machInst, 24) != 0) + return new Unknown64(machInst); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rt2 = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + IntRegIndex rs = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + uint8_t opc = (bits(machInst, 15) << 0) | + (bits(machInst, 23, 21) << 1); + uint8_t size = bits(machInst, 31, 30); + switch (opc) { + case 0x0: + switch (size) { + case 0x0: + return new STXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new STXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new STXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new STXRX64(machInst, rt, rnsp, rs); + } + case 0x1: + switch (size) { + case 0x0: + return new STLXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new STLXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new STLXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new STLXRX64(machInst, rt, rnsp, rs); + } + case 0x2: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new STXPW64(machInst, rs, rt, rt2, rnsp); + case 0x3: + return new STXPX64(machInst, rs, rt, rt2, rnsp); + } + + case 0x3: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new STLXPW64(machInst, rs, rt, rt2, rnsp); + case 0x3: + return new STLXPX64(machInst, rs, rt, rt2, rnsp); + } + + case 0x4: + switch (size) { + case 0x0: + return new LDXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new LDXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new LDXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new LDXRX64(machInst, rt, rnsp, rs); + } + case 0x5: + switch (size) { + case 0x0: + return new LDAXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new LDAXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new LDAXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new LDAXRX64(machInst, rt, rnsp, rs); + } + case 0x6: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new LDXPW64(machInst, rt, rt2, rnsp); + case 0x3: + return new LDXPX64(machInst, rt, rt2, rnsp); + } + + case 0x7: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new LDAXPW64(machInst, rt, rt2, rnsp); + case 0x3: + return new LDAXPX64(machInst, rt, rt2, rnsp); + } + + case 0x9: + switch (size) { + case 0x0: + return new STLRB64(machInst, rt, rnsp); + case 0x1: + return new STLRH64(machInst, rt, rnsp); + case 0x2: + return new STLRW64(machInst, rt, rnsp); + case 0x3: + return new STLRX64(machInst, rt, rnsp); + } + case 0xd: + switch (size) { + case 0x0: + return new LDARB64(machInst, rt, rnsp); + case 0x1: + return new LDARH64(machInst, rt, rnsp); + case 0x2: + return new LDARW64(machInst, rt, rnsp); + case 0x3: + return new LDARX64(machInst, rt, rnsp); + } + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 31)) { + return new Unknown64(machInst); + } else { + return decodeNeonMem(machInst); + } + case 0x1: + { + if (bits(machInst, 24) != 0) + return new Unknown64(machInst); + uint8_t switchVal = (bits(machInst, 26) << 0) | + (bits(machInst, 31, 30) << 1); + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + switch (switchVal) { + case 0x0: + return new LDRWL64_LIT(machInst, rt, imm); + case 0x1: + return new LDRSFP64_LIT(machInst, rt, imm); + case 0x2: + return new LDRXL64_LIT(machInst, rt, imm); + case 0x3: + return new LDRDFP64_LIT(machInst, rt, imm); + case 0x4: + return new LDRSWL64_LIT(machInst, rt, imm); + case 0x5: + return new BigFpMemLit("ldr", machInst, rt, imm); + case 0x6: + return new PRFM64_LIT(machInst, rt, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + uint8_t opc = bits(machInst, 31, 30); + if (opc >= 3) + return new Unknown64(machInst); + uint32_t size = 0; + bool fp = bits(machInst, 26); + bool load = bits(machInst, 22); + if (fp) { + size = 4 << opc; + } else { + if ((opc == 1) && !load) + return new Unknown64(machInst); + size = (opc == 0 || opc == 1) ? 4 : 8; + } + uint8_t type = bits(machInst, 24, 23); + int64_t imm = sext<7>(bits(machInst, 21, 15)) * size; + + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rt2 = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + + bool noAlloc = (type == 0); + bool signExt = !noAlloc && !fp && opc == 1; + PairMemOp::AddrMode mode; + const char *mnemonic = NULL; + switch (type) { + case 0x0: + case 0x2: + mode = PairMemOp::AddrMd_Offset; + break; + case 0x1: + mode = PairMemOp::AddrMd_PostIndex; + break; + case 0x3: + mode = PairMemOp::AddrMd_PreIndex; + break; + default: + return new Unknown64(machInst); + } + if (load) { + if (noAlloc) + mnemonic = "ldnp"; + else if (signExt) + mnemonic = "ldpsw"; + else + mnemonic = "ldp"; + } else { + if (noAlloc) + mnemonic = "stnp"; + else + mnemonic = "stp"; + } + + return new LdpStp(mnemonic, machInst, size, fp, load, noAlloc, + signExt, false, false, imm, mode, rn, rt, rt2); + } + // bit 29:27=111, 25=0 + case 0x3: + { + uint8_t switchVal = (bits(machInst, 23, 22) << 0) | + (bits(machInst, 26) << 2) | + (bits(machInst, 31, 30) << 3); + if (bits(machInst, 24) == 1) { + uint64_t imm12 = bits(machInst, 21, 10); + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + switch (switchVal) { + case 0x00: + return new STRB64_IMM(machInst, rt, rnsp, imm12); + case 0x01: + return new LDRB64_IMM(machInst, rt, rnsp, imm12); + case 0x02: + return new LDRSBX64_IMM(machInst, rt, rnsp, imm12); + case 0x03: + return new LDRSBW64_IMM(machInst, rt, rnsp, imm12); + case 0x04: + return new STRBFP64_IMM(machInst, rt, rnsp, imm12); + case 0x05: + return new LDRBFP64_IMM(machInst, rt, rnsp, imm12); + case 0x06: + return new BigFpMemImm("str", machInst, false, + rt, rnsp, imm12 << 4); + case 0x07: + return new BigFpMemImm("ldr", machInst, true, + rt, rnsp, imm12 << 4); + case 0x08: + return new STRH64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x09: + return new LDRH64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0a: + return new LDRSHX64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0b: + return new LDRSHW64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0c: + return new STRHFP64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0d: + return new LDRHFP64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x10: + return new STRW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x11: + return new LDRW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x12: + return new LDRSW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x14: + return new STRSFP64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x15: + return new LDRSFP64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x18: + return new STRX64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x19: + return new LDRX64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1a: + return new PRFM64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1c: + return new STRDFP64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1d: + return new LDRDFP64_IMM(machInst, rt, rnsp, imm12 << 3); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 11, 10) != 0x2) + return new Unknown64(machInst); + if (!bits(machInst, 14)) + return new Unknown64(machInst); + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + ArmExtendType type = + (ArmExtendType)(uint32_t)bits(machInst, 15, 13); + uint8_t s = bits(machInst, 12); + switch (switchVal) { + case 0x00: + return new STRB64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x01: + return new LDRB64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x02: + return new LDRSBX64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x03: + return new LDRSBW64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x04: + return new STRBFP64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x05: + return new LDRBFP64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x6: + return new BigFpMemReg("str", machInst, false, + rt, rnsp, rm, type, s * 4); + case 0x7: + return new BigFpMemReg("ldr", machInst, true, + rt, rnsp, rm, type, s * 4); + case 0x08: + return new STRH64_REG(machInst, rt, rnsp, rm, type, s); + case 0x09: + return new LDRH64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0a: + return new LDRSHX64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0b: + return new LDRSHW64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0c: + return new STRHFP64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0d: + return new LDRHFP64_REG(machInst, rt, rnsp, rm, type, s); + case 0x10: + return new STRW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x11: + return new LDRW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x12: + return new LDRSW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x14: + return new STRSFP64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x15: + return new LDRSFP64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x18: + return new STRX64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x19: + return new LDRX64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1a: + return new PRFM64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1c: + return new STRDFP64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1d: + return new LDRDFP64_REG(machInst, rt, rnsp, rm, type, s * 3); + default: + return new Unknown64(machInst); + } + } else { + // bit 29:27=111, 25:24=00, 21=0 + switch (bits(machInst, 11, 10)) { + case 0x0: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STURB64_IMM(machInst, rt, rnsp, imm); + case 0x01: + return new LDURB64_IMM(machInst, rt, rnsp, imm); + case 0x02: + return new LDURSBX64_IMM(machInst, rt, rnsp, imm); + case 0x03: + return new LDURSBW64_IMM(machInst, rt, rnsp, imm); + case 0x04: + return new STURBFP64_IMM(machInst, rt, rnsp, imm); + case 0x05: + return new LDURBFP64_IMM(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemImm("stur", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemImm("ldur", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STURH64_IMM(machInst, rt, rnsp, imm); + case 0x09: + return new LDURH64_IMM(machInst, rt, rnsp, imm); + case 0x0a: + return new LDURSHX64_IMM(machInst, rt, rnsp, imm); + case 0x0b: + return new LDURSHW64_IMM(machInst, rt, rnsp, imm); + case 0x0c: + return new STURHFP64_IMM(machInst, rt, rnsp, imm); + case 0x0d: + return new LDURHFP64_IMM(machInst, rt, rnsp, imm); + case 0x10: + return new STURW64_IMM(machInst, rt, rnsp, imm); + case 0x11: + return new LDURW64_IMM(machInst, rt, rnsp, imm); + case 0x12: + return new LDURSW64_IMM(machInst, rt, rnsp, imm); + case 0x14: + return new STURSFP64_IMM(machInst, rt, rnsp, imm); + case 0x15: + return new LDURSFP64_IMM(machInst, rt, rnsp, imm); + case 0x18: + return new STURX64_IMM(machInst, rt, rnsp, imm); + case 0x19: + return new LDURX64_IMM(machInst, rt, rnsp, imm); + case 0x1a: + return new PRFUM64_IMM(machInst, rt, rnsp, imm); + case 0x1c: + return new STURDFP64_IMM(machInst, rt, rnsp, imm); + case 0x1d: + return new LDURDFP64_IMM(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + // bit 29:27=111, 25:24=00, 21=0, 11:10=01 + case 0x1: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STRB64_POST(machInst, rt, rnsp, imm); + case 0x01: + return new LDRB64_POST(machInst, rt, rnsp, imm); + case 0x02: + return new LDRSBX64_POST(machInst, rt, rnsp, imm); + case 0x03: + return new LDRSBW64_POST(machInst, rt, rnsp, imm); + case 0x04: + return new STRBFP64_POST(machInst, rt, rnsp, imm); + case 0x05: + return new LDRBFP64_POST(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemPost("str", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemPost("ldr", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STRH64_POST(machInst, rt, rnsp, imm); + case 0x09: + return new LDRH64_POST(machInst, rt, rnsp, imm); + case 0x0a: + return new LDRSHX64_POST(machInst, rt, rnsp, imm); + case 0x0b: + return new LDRSHW64_POST(machInst, rt, rnsp, imm); + case 0x0c: + return new STRHFP64_POST(machInst, rt, rnsp, imm); + case 0x0d: + return new LDRHFP64_POST(machInst, rt, rnsp, imm); + case 0x10: + return new STRW64_POST(machInst, rt, rnsp, imm); + case 0x11: + return new LDRW64_POST(machInst, rt, rnsp, imm); + case 0x12: + return new LDRSW64_POST(machInst, rt, rnsp, imm); + case 0x14: + return new STRSFP64_POST(machInst, rt, rnsp, imm); + case 0x15: + return new LDRSFP64_POST(machInst, rt, rnsp, imm); + case 0x18: + return new STRX64_POST(machInst, rt, rnsp, imm); + case 0x19: + return new LDRX64_POST(machInst, rt, rnsp, imm); + case 0x1c: + return new STRDFP64_POST(machInst, rt, rnsp, imm); + case 0x1d: + return new LDRDFP64_POST(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STTRB64_IMM(machInst, rt, rnsp, imm); + case 0x01: + return new LDTRB64_IMM(machInst, rt, rnsp, imm); + case 0x02: + return new LDTRSBX64_IMM(machInst, rt, rnsp, imm); + case 0x03: + return new LDTRSBW64_IMM(machInst, rt, rnsp, imm); + case 0x08: + return new STTRH64_IMM(machInst, rt, rnsp, imm); + case 0x09: + return new LDTRH64_IMM(machInst, rt, rnsp, imm); + case 0x0a: + return new LDTRSHX64_IMM(machInst, rt, rnsp, imm); + case 0x0b: + return new LDTRSHW64_IMM(machInst, rt, rnsp, imm); + case 0x10: + return new STTRW64_IMM(machInst, rt, rnsp, imm); + case 0x11: + return new LDTRW64_IMM(machInst, rt, rnsp, imm); + case 0x12: + return new LDTRSW64_IMM(machInst, rt, rnsp, imm); + case 0x18: + return new STTRX64_IMM(machInst, rt, rnsp, imm); + case 0x19: + return new LDTRX64_IMM(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + case 0x3: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STRB64_PRE(machInst, rt, rnsp, imm); + case 0x01: + return new LDRB64_PRE(machInst, rt, rnsp, imm); + case 0x02: + return new LDRSBX64_PRE(machInst, rt, rnsp, imm); + case 0x03: + return new LDRSBW64_PRE(machInst, rt, rnsp, imm); + case 0x04: + return new STRBFP64_PRE(machInst, rt, rnsp, imm); + case 0x05: + return new LDRBFP64_PRE(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemPre("str", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemPre("ldr", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STRH64_PRE(machInst, rt, rnsp, imm); + case 0x09: + return new LDRH64_PRE(machInst, rt, rnsp, imm); + case 0x0a: + return new LDRSHX64_PRE(machInst, rt, rnsp, imm); + case 0x0b: + return new LDRSHW64_PRE(machInst, rt, rnsp, imm); + case 0x0c: + return new STRHFP64_PRE(machInst, rt, rnsp, imm); + case 0x0d: + return new LDRHFP64_PRE(machInst, rt, rnsp, imm); + case 0x10: + return new STRW64_PRE(machInst, rt, rnsp, imm); + case 0x11: + return new LDRW64_PRE(machInst, rt, rnsp, imm); + case 0x12: + return new LDRSW64_PRE(machInst, rt, rnsp, imm); + case 0x14: + return new STRSFP64_PRE(machInst, rt, rnsp, imm); + case 0x15: + return new LDRSFP64_PRE(machInst, rt, rnsp, imm); + case 0x18: + return new STRX64_PRE(machInst, rt, rnsp, imm); + case 0x19: + return new LDRX64_PRE(machInst, rt, rnsp, imm); + case 0x1c: + return new STRDFP64_PRE(machInst, rt, rnsp, imm); + case 0x1d: + return new LDRDFP64_PRE(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + } + } + } + } + return new FailUnimplemented("Unhandled Case1", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeDataProcReg(ExtMachInst machInst) + { + uint8_t switchVal = (bits(machInst, 28) << 1) | + (bits(machInst, 24) << 0); + switch (switchVal) { + case 0x0: + { + uint8_t switchVal = (bits(machInst, 21) << 0) | + (bits(machInst, 30, 29) << 1); + ArmShiftType type = (ArmShiftType)(uint8_t)bits(machInst, 23, 22); + uint8_t imm6 = bits(machInst, 15, 10); + bool sf = bits(machInst, 31); + if (!sf && (imm6 & 0x20)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + + switch (switchVal) { + case 0x0: + return new AndXSReg(machInst, rd, rn, rm, imm6, type); + case 0x1: + return new BicXSReg(machInst, rd, rn, rm, imm6, type); + case 0x2: + return new OrrXSReg(machInst, rd, rn, rm, imm6, type); + case 0x3: + return new OrnXSReg(machInst, rd, rn, rm, imm6, type); + case 0x4: + return new EorXSReg(machInst, rd, rn, rm, imm6, type); + case 0x5: + return new EonXSReg(machInst, rd, rn, rm, imm6, type); + case 0x6: + return new AndXSRegCc(machInst, rd, rn, rm, imm6, type); + case 0x7: + return new BicXSRegCc(machInst, rd, rn, rm, imm6, type); + } + } + case 0x1: + { + uint8_t switchVal = bits(machInst, 30, 29); + if (bits(machInst, 21) == 0) { + ArmShiftType type = + (ArmShiftType)(uint8_t)bits(machInst, 23, 22); + if (type == ROR) + return new Unknown64(machInst); + uint8_t imm6 = bits(machInst, 15, 10); + if (!bits(machInst, 31) && bits(imm6, 5)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (switchVal) { + case 0x0: + return new AddXSReg(machInst, rd, rn, rm, imm6, type); + case 0x1: + return new AddXSRegCc(machInst, rd, rn, rm, imm6, type); + case 0x2: + return new SubXSReg(machInst, rd, rn, rm, imm6, type); + case 0x3: + return new SubXSRegCc(machInst, rd, rn, rm, imm6, type); + } + } else { + if (bits(machInst, 23, 22) != 0 || bits(machInst, 12, 10) > 0x4) + return new Unknown64(machInst); + ArmExtendType type = + (ArmExtendType)(uint8_t)bits(machInst, 15, 13); + uint8_t imm3 = bits(machInst, 12, 10); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rdsp = makeSP(rd); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + + switch (switchVal) { + case 0x0: + return new AddXEReg(machInst, rdsp, rnsp, rm, type, imm3); + case 0x1: + return new AddXERegCc(machInst, rd, rnsp, rm, type, imm3); + case 0x2: + return new SubXEReg(machInst, rdsp, rnsp, rm, type, imm3); + case 0x3: + return new SubXERegCc(machInst, rd, rnsp, rm, type, imm3); + } + } + } + case 0x2: + { + if (bits(machInst, 21) == 1) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (bits(machInst, 23, 22)) { + case 0x0: + { + if (bits(machInst, 15, 10)) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 30, 29); + switch (switchVal) { + case 0x0: + return new AdcXSReg(machInst, rd, rn, rm, 0, LSL); + case 0x1: + return new AdcXSRegCc(machInst, rd, rn, rm, 0, LSL); + case 0x2: + return new SbcXSReg(machInst, rd, rn, rm, 0, LSL); + case 0x3: + return new SbcXSRegCc(machInst, rd, rn, rm, 0, LSL); + } + } + case 0x1: + { + if ((bits(machInst, 4) == 1) || + (bits(machInst, 10) == 1) || + (bits(machInst, 29) == 0)) { + return new Unknown64(machInst); + } + ConditionCode cond = + (ConditionCode)(uint8_t)bits(machInst, 15, 12); + uint8_t flags = bits(machInst, 3, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + if (bits(machInst, 11) == 0) { + IntRegIndex rm = + (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + if (bits(machInst, 30) == 0) { + return new CcmnReg64(machInst, rn, rm, cond, flags); + } else { + return new CcmpReg64(machInst, rn, rm, cond, flags); + } + } else { + uint8_t imm5 = bits(machInst, 20, 16); + if (bits(machInst, 30) == 0) { + return new CcmnImm64(machInst, rn, imm5, cond, flags); + } else { + return new CcmpImm64(machInst, rn, imm5, cond, flags); + } + } + } + case 0x2: + { + if (bits(machInst, 29) == 1 || + bits(machInst, 11) == 1) { + return new Unknown64(machInst); + } + uint8_t switchVal = (bits(machInst, 10) << 0) | + (bits(machInst, 30) << 1); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + ConditionCode cond = + (ConditionCode)(uint8_t)bits(machInst, 15, 12); + switch (switchVal) { + case 0x0: + return new Csel64(machInst, rd, rn, rm, cond); + case 0x1: + return new Csinc64(machInst, rd, rn, rm, cond); + case 0x2: + return new Csinv64(machInst, rd, rn, rm, cond); + case 0x3: + return new Csneg64(machInst, rd, rn, rm, cond); + } + } + case 0x3: + if (bits(machInst, 30) == 0) { + if (bits(machInst, 29) != 0) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 15, 10); + switch (switchVal) { + case 0x2: + return new Udiv64(machInst, rd, rn, rm); + case 0x3: + return new Sdiv64(machInst, rd, rn, rm); + case 0x8: + return new Lslv64(machInst, rd, rn, rm); + case 0x9: + return new Lsrv64(machInst, rd, rn, rm); + case 0xa: + return new Asrv64(machInst, rd, rn, rm); + case 0xb: + return new Rorv64(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } else { + if (bits(machInst, 20, 16) != 0 || + bits(machInst, 29) != 0) { + return new Unknown64(machInst); + } + uint8_t switchVal = bits(machInst, 15, 10); + switch (switchVal) { + case 0x0: + return new Rbit64(machInst, rd, rn); + case 0x1: + return new Rev1664(machInst, rd, rn); + case 0x2: + if (bits(machInst, 31) == 0) + return new Rev64(machInst, rd, rn); + else + return new Rev3264(machInst, rd, rn); + case 0x3: + if (bits(machInst, 31) != 1) + return new Unknown64(machInst); + return new Rev64(machInst, rd, rn); + case 0x4: + return new Clz64(machInst, rd, rn); + case 0x5: + return new Cls64(machInst, rd, rn); + } + } + } + } + case 0x3: + { + if (bits(machInst, 30, 29) != 0x0 || + (bits(machInst, 23, 21) != 0 && bits(machInst, 31) == 0)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex ra = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (bits(machInst, 23, 21)) { + case 0x0: + if (bits(machInst, 15) == 0) + return new Madd64(machInst, rd, ra, rn, rm); + else + return new Msub64(machInst, rd, ra, rn, rm); + case 0x1: + if (bits(machInst, 15) == 0) + return new Smaddl64(machInst, rd, ra, rn, rm); + else + return new Smsubl64(machInst, rd, ra, rn, rm); + case 0x2: + if (bits(machInst, 15) != 0) + return new Unknown64(machInst); + return new Smulh64(machInst, rd, rn, rm); + case 0x5: + if (bits(machInst, 15) == 0) + return new Umaddl64(machInst, rd, ra, rn, rm); + else + return new Umsubl64(machInst, rd, ra, rn, rm); + case 0x6: + if (bits(machInst, 15) != 0) + return new Unknown64(machInst); + return new Umulh64(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } + } + return new FailUnimplemented("Unhandled Case2", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeAdvSIMD(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 10) == 0) { + return decodeNeonIndexedElem(machInst); + } else if (bits(machInst, 23) == 1) { + return new Unknown64(machInst); + } else { + if (bits(machInst, 22, 19)) { + return decodeNeonShiftByImm(machInst); + } else { + return decodeNeonModImm(machInst); + } + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 10) == 1) { + return decodeNeon3Same(machInst); + } else if (bits(machInst, 11) == 0) { + return decodeNeon3Diff(machInst); + } else if (bits(machInst, 20, 17) == 0x0) { + return decodeNeon2RegMisc(machInst); + } else if (bits(machInst, 20, 17) == 0x8) { + return decodeNeonAcrossLanes(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 24) || + bits(machInst, 21) || + bits(machInst, 15)) { + return new Unknown64(machInst); + } else if (bits(machInst, 10) == 1) { + if (bits(machInst, 23, 22)) + return new Unknown64(machInst); + return decodeNeonCopy(machInst); + } else if (bits(machInst, 29) == 1) { + return decodeNeonExt(machInst); + } else if (bits(machInst, 11) == 1) { + return decodeNeonZipUzpTrn(machInst); + } else if (bits(machInst, 23, 22) == 0x0) { + return decodeNeonTblTbx(machInst); + } else { + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case3", machInst); + } +} +}}; + + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + // bit 30=0, 28:25=1111 + decodeFp(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + IntRegIndex ra = (IntRegIndex)(uint32_t)bits(machInst, 14, 10); + uint8_t switchVal = (bits(machInst, 23, 21) << 1) | + (bits(machInst, 15) << 0); + switch (switchVal) { + case 0x0: // FMADD Sd = Sa + Sn*Sm + return new FMAddS(machInst, rd, rn, rm, ra); + case 0x1: // FMSUB Sd = Sa + (-Sn)*Sm + return new FMSubS(machInst, rd, rn, rm, ra); + case 0x2: // FNMADD Sd = (-Sa) + (-Sn)*Sm + return new FNMAddS(machInst, rd, rn, rm, ra); + case 0x3: // FNMSUB Sd = (-Sa) + Sn*Sm + return new FNMSubS(machInst, rd, rn, rm, ra); + case 0x4: // FMADD Dd = Da + Dn*Dm + return new FMAddD(machInst, rd, rn, rm, ra); + case 0x5: // FMSUB Dd = Da + (-Dn)*Dm + return new FMSubD(machInst, rd, rn, rm, ra); + case 0x6: // FNMADD Dd = (-Da) + (-Dn)*Dm + return new FNMAddD(machInst, rd, rn, rm, ra); + case 0x7: // FNMSUB Dd = (-Da) + Dn*Dm + return new FNMSubD(machInst, rd, rn, rm, ra); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 21) == 0) { + bool s = bits(machInst, 29); + if (s) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 20, 16); + uint8_t type = bits(machInst, 23, 22); + uint8_t scale = bits(machInst, 15, 10); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + if (bits(machInst, 18, 17) == 3 && scale != 0) + return new Unknown64(machInst); + // 30:24=0011110, 21=0 + switch (switchVal) { + case 0x00: + return new FailUnimplemented("fcvtns", machInst); + case 0x01: + return new FailUnimplemented("fcvtnu", machInst); + case 0x02: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // SCVTF Sd = convertFromInt(Wn/(2^fbits)) + return new FcvtSFixedFpSW(machInst, rd, rn, scale); + case 1: // SCVTF Dd = convertFromInt(Wn/(2^fbits)) + return new FcvtSFixedFpDW(machInst, rd, rn, scale); + case 4: // SCVTF Sd = convertFromInt(Xn/(2^fbits)) + return new FcvtSFixedFpSX(machInst, rd, rn, scale); + case 5: // SCVTF Dd = convertFromInt(Xn/(2^fbits)) + return new FcvtSFixedFpDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x03: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // UCVTF Sd = convertFromInt(Wn/(2^fbits)) + return new FcvtUFixedFpSW(machInst, rd, rn, scale); + case 1: // UCVTF Dd = convertFromInt(Wn/(2^fbits)) + return new FcvtUFixedFpDW(machInst, rd, rn, scale); + case 4: // UCVTF Sd = convertFromInt(Xn/(2^fbits)) + return new FcvtUFixedFpSX(machInst, rd, rn, scale); + case 5: // UCVTF Dd = convertFromInt(Xn/(2^fbits)) + return new FcvtUFixedFpDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x04: + return new FailUnimplemented("fcvtas", machInst); + case 0x05: + return new FailUnimplemented("fcvtau", machInst); + case 0x08: + return new FailUnimplemented("fcvtps", machInst); + case 0x09: + return new FailUnimplemented("fcvtpu", machInst); + case 0x0e: + return new FailUnimplemented("fmov elem. to 64", machInst); + case 0x0f: + return new FailUnimplemented("fmov 64 bit", machInst); + case 0x10: + return new FailUnimplemented("fcvtms", machInst); + case 0x11: + return new FailUnimplemented("fcvtmu", machInst); + case 0x18: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // FCVTZS Wd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpSFixedSW(machInst, rd, rn, scale); + case 1: // FCVTZS Wd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpSFixedDW(machInst, rd, rn, scale); + case 4: // FCVTZS Xd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpSFixedSX(machInst, rd, rn, scale); + case 5: // FCVTZS Xd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpSFixedDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x19: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // FCVTZU Wd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpUFixedSW(machInst, rd, rn, scale); + case 1: // FCVTZU Wd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpUFixedDW(machInst, rd, rn, scale); + case 4: // FCVTZU Xd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpUFixedSX(machInst, rd, rn, scale); + case 5: // FCVTZU Xd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpUFixedDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + } + } else { + // 30=0, 28:24=11110, 21=1 + uint8_t type = bits(machInst, 23, 22); + uint8_t imm8 = bits(machInst, 20, 13); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + switch (bits(machInst, 11, 10)) { + case 0x0: + if (bits(machInst, 12) == 1) { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 9, 5)) { + return new Unknown64(machInst); + } + // 31:29=000, 28:24=11110, 21=1, 12:10=100 + if (type == 0) { + // FMOV S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,5) + // :imm8<5:0>:Zeros(19) + uint32_t imm = vfp_modified_imm(imm8, false); + return new FmovImmS(machInst, rd, imm); + } else if (type == 1) { + // FMOV D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,8) + // :imm8<5:0>:Zeros(48) + uint64_t imm = vfp_modified_imm(imm8, true); + return new FmovImmD(machInst, rd, imm); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 13) == 1) { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 15, 14) || + bits(machInst, 23) || + bits(machInst, 2, 0)) { + return new Unknown64(machInst); + } + uint8_t switchVal = (bits(machInst, 4, 3) << 0) | + (bits(machInst, 22) << 2); + IntRegIndex rm = (IntRegIndex)(uint32_t) + bits(machInst, 20, 16); + // 28:23=000111100, 21=1, 15:10=001000, 2:0=000 + switch (switchVal) { + case 0x0: + // FCMP flags = compareQuiet(Sn,Sm) + return new FCmpRegS(machInst, rn, rm); + case 0x1: + // FCMP flags = compareQuiet(Sn,0.0) + return new FCmpImmS(machInst, rn, 0); + case 0x2: + // FCMPE flags = compareSignaling(Sn,Sm) + return new FCmpERegS(machInst, rn, rm); + case 0x3: + // FCMPE flags = compareSignaling(Sn,0.0) + return new FCmpEImmS(machInst, rn, 0); + case 0x4: + // FCMP flags = compareQuiet(Dn,Dm) + return new FCmpRegD(machInst, rn, rm); + case 0x5: + // FCMP flags = compareQuiet(Dn,0.0) + return new FCmpImmD(machInst, rn, 0); + case 0x6: + // FCMPE flags = compareSignaling(Dn,Dm) + return new FCmpERegD(machInst, rn, rm); + case 0x7: + // FCMPE flags = compareSignaling(Dn,0.0) + return new FCmpEImmD(machInst, rn, 0); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 14) == 1) { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t opcode = bits(machInst, 20, 15); + // Bits 31:24=00011110, 21=1, 14:10=10000 + switch (opcode) { + case 0x0: + if (type == 0) + // FMOV Sd = Sn + return new FmovRegS(machInst, rd, rn); + else if (type == 1) + // FMOV Dd = Dn + return new FmovRegD(machInst, rd, rn); + break; + case 0x1: + if (type == 0) + // FABS Sd = abs(Sn) + return new FAbsS(machInst, rd, rn); + else if (type == 1) + // FABS Dd = abs(Dn) + return new FAbsD(machInst, rd, rn); + break; + case 0x2: + if (type == 0) + // FNEG Sd = -Sn + return new FNegS(machInst, rd, rn); + else if (type == 1) + // FNEG Dd = -Dn + return new FNegD(machInst, rd, rn); + break; + case 0x3: + if (type == 0) + // FSQRT Sd = sqrt(Sn) + return new FSqrtS(machInst, rd, rn); + else if (type == 1) + // FSQRT Dd = sqrt(Dn) + return new FSqrtD(machInst, rd, rn); + break; + case 0x4: + if (type == 1) + // FCVT Sd = convertFormat(Dn) + return new FcvtFpDFpS(machInst, rd, rn); + else if (type == 3) + // FCVT Sd = convertFormat(Hn) + return new FcvtFpHFpS(machInst, rd, rn); + break; + case 0x5: + if (type == 0) + // FCVT Dd = convertFormat(Sn) + return new FCvtFpSFpD(machInst, rd, rn); + else if (type == 3) + // FCVT Dd = convertFormat(Hn) + return new FcvtFpHFpD(machInst, rd, rn); + break; + case 0x7: + if (type == 0) + // FCVT Hd = convertFormat(Sn) + return new FcvtFpSFpH(machInst, rd, rn); + else if (type == 1) + // FCVT Hd = convertFormat(Dn) + return new FcvtFpDFpH(machInst, rd, rn); + break; + case 0x8: + if (type == 0) // FRINTN Sd = roundToIntegralTiesToEven(Sn) + return new FRIntNS(machInst, rd, rn); + else if (type == 1) // FRINTN Dd = roundToIntegralTiesToEven(Dn) + return new FRIntND(machInst, rd, rn); + break; + case 0x9: + if (type == 0) // FRINTP Sd = roundToIntegralTowardPlusInf(Sn) + return new FRIntPS(machInst, rd, rn); + else if (type == 1) // FRINTP Dd = roundToIntegralTowardPlusInf(Dn) + return new FRIntPD(machInst, rd, rn); + break; + case 0xa: + if (type == 0) // FRINTM Sd = roundToIntegralTowardMinusInf(Sn) + return new FRIntMS(machInst, rd, rn); + else if (type == 1) // FRINTM Dd = roundToIntegralTowardMinusInf(Dn) + return new FRIntMD(machInst, rd, rn); + break; + case 0xb: + if (type == 0) // FRINTZ Sd = roundToIntegralTowardZero(Sn) + return new FRIntZS(machInst, rd, rn); + else if (type == 1) // FRINTZ Dd = roundToIntegralTowardZero(Dn) + return new FRIntZD(machInst, rd, rn); + break; + case 0xc: + if (type == 0) // FRINTA Sd = roundToIntegralTiesToAway(Sn) + return new FRIntAS(machInst, rd, rn); + else if (type == 1) // FRINTA Dd = roundToIntegralTiesToAway(Dn) + return new FRIntAD(machInst, rd, rn); + break; + case 0xe: + if (type == 0) // FRINTX Sd = roundToIntegralExact(Sn) + return new FRIntXS(machInst, rd, rn); + else if (type == 1) // FRINTX Dd = roundToIntegralExact(Dn) + return new FRIntXD(machInst, rd, rn); + break; + case 0xf: + if (type == 0) // FRINTI Sd = roundToIntegral(Sn) + return new FRIntIS(machInst, rd, rn); + else if (type == 1) // FRINTI Dd = roundToIntegral(Dn) + return new FRIntID(machInst, rd, rn); + break; + default: + return new Unknown64(machInst); + } + return new Unknown64(machInst); + } else if (bits(machInst, 15) == 1) { + return new Unknown64(machInst); + } else { + if (bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t rmode = bits(machInst, 20, 19); + uint8_t switchVal1 = bits(machInst, 18, 16); + uint8_t switchVal2 = (type << 1) | bits(machInst, 31); + // 30:24=0011110, 21=1, 15:10=000000 + switch (switchVal1) { + case 0x0: + switch ((switchVal2 << 2) | rmode) { + case 0x0: //FCVTNS Wd = convertToIntExactTiesToEven(Sn) + return new FcvtFpSIntWSN(machInst, rd, rn); + case 0x1: //FCVTPS Wd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpSIntWSP(machInst, rd, rn); + case 0x2: //FCVTMS Wd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpSIntWSM(machInst, rd, rn); + case 0x3: //FCVTZS Wd = convertToIntExactTowardZero(Sn) + return new FcvtFpSIntWSZ(machInst, rd, rn); + case 0x4: //FCVTNS Xd = convertToIntExactTiesToEven(Sn) + return new FcvtFpSIntXSN(machInst, rd, rn); + case 0x5: //FCVTPS Xd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpSIntXSP(machInst, rd, rn); + case 0x6: //FCVTMS Xd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpSIntXSM(machInst, rd, rn); + case 0x7: //FCVTZS Xd = convertToIntExactTowardZero(Sn) + return new FcvtFpSIntXSZ(machInst, rd, rn); + case 0x8: //FCVTNS Wd = convertToIntExactTiesToEven(Dn) + return new FcvtFpSIntWDN(machInst, rd, rn); + case 0x9: //FCVTPS Wd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpSIntWDP(machInst, rd, rn); + case 0xA: //FCVTMS Wd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpSIntWDM(machInst, rd, rn); + case 0xB: //FCVTZS Wd = convertToIntExactTowardZero(Dn) + return new FcvtFpSIntWDZ(machInst, rd, rn); + case 0xC: //FCVTNS Xd = convertToIntExactTiesToEven(Dn) + return new FcvtFpSIntXDN(machInst, rd, rn); + case 0xD: //FCVTPS Xd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpSIntXDP(machInst, rd, rn); + case 0xE: //FCVTMS Xd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpSIntXDM(machInst, rd, rn); + case 0xF: //FCVTZS Xd = convertToIntExactTowardZero(Dn) + return new FcvtFpSIntXDZ(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x1: + switch ((switchVal2 << 2) | rmode) { + case 0x0: //FCVTNU Wd = convertToIntExactTiesToEven(Sn) + return new FcvtFpUIntWSN(machInst, rd, rn); + case 0x1: //FCVTPU Wd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpUIntWSP(machInst, rd, rn); + case 0x2: //FCVTMU Wd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpUIntWSM(machInst, rd, rn); + case 0x3: //FCVTZU Wd = convertToIntExactTowardZero(Sn) + return new FcvtFpUIntWSZ(machInst, rd, rn); + case 0x4: //FCVTNU Xd = convertToIntExactTiesToEven(Sn) + return new FcvtFpUIntXSN(machInst, rd, rn); + case 0x5: //FCVTPU Xd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpUIntXSP(machInst, rd, rn); + case 0x6: //FCVTMU Xd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpUIntXSM(machInst, rd, rn); + case 0x7: //FCVTZU Xd = convertToIntExactTowardZero(Sn) + return new FcvtFpUIntXSZ(machInst, rd, rn); + case 0x8: //FCVTNU Wd = convertToIntExactTiesToEven(Dn) + return new FcvtFpUIntWDN(machInst, rd, rn); + case 0x9: //FCVTPU Wd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpUIntWDP(machInst, rd, rn); + case 0xA: //FCVTMU Wd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpUIntWDM(machInst, rd, rn); + case 0xB: //FCVTZU Wd = convertToIntExactTowardZero(Dn) + return new FcvtFpUIntWDZ(machInst, rd, rn); + case 0xC: //FCVTNU Xd = convertToIntExactTiesToEven(Dn) + return new FcvtFpUIntXDN(machInst, rd, rn); + case 0xD: //FCVTPU Xd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpUIntXDP(machInst, rd, rn); + case 0xE: //FCVTMU Xd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpUIntXDM(machInst, rd, rn); + case 0xF: //FCVTZU Xd = convertToIntExactTowardZero(Dn) + return new FcvtFpUIntXDZ(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x2: + if (rmode != 0) + return new Unknown64(machInst); + switch (switchVal2) { + case 0: // SCVTF Sd = convertFromInt(Wn) + return new FcvtWSIntFpS(machInst, rd, rn); + case 1: // SCVTF Sd = convertFromInt(Xn) + return new FcvtXSIntFpS(machInst, rd, rn); + case 2: // SCVTF Dd = convertFromInt(Wn) + return new FcvtWSIntFpD(machInst, rd, rn); + case 3: // SCVTF Dd = convertFromInt(Xn) + return new FcvtXSIntFpD(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x3: + switch (switchVal2) { + case 0: // UCVTF Sd = convertFromInt(Wn) + return new FcvtWUIntFpS(machInst, rd, rn); + case 1: // UCVTF Sd = convertFromInt(Xn) + return new FcvtXUIntFpS(machInst, rd, rn); + case 2: // UCVTF Dd = convertFromInt(Wn) + return new FcvtWUIntFpD(machInst, rd, rn); + case 3: // UCVTF Dd = convertFromInt(Xn) + return new FcvtXUIntFpD(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x4: + if (rmode != 0) + return new Unknown64(machInst); + switch (switchVal2) { + case 0: // FCVTAS Wd = convertToIntExactTiesToAway(Sn) + return new FcvtFpSIntWSA(machInst, rd, rn); + case 1: // FCVTAS Xd = convertToIntExactTiesToAway(Sn) + return new FcvtFpSIntXSA(machInst, rd, rn); + case 2: // FCVTAS Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpSIntWDA(machInst, rd, rn); + case 3: // FCVTAS Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpSIntXDA(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x5: + switch (switchVal2) { + case 0: // FCVTAU Wd = convertToIntExactTiesToAway(Sn) + return new FcvtFpUIntWSA(machInst, rd, rn); + case 1: // FCVTAU Xd = convertToIntExactTiesToAway(Sn) + return new FcvtFpUIntXSA(machInst, rd, rn); + case 2: // FCVTAU Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpUIntWDA(machInst, rd, rn); + case 3: // FCVTAU Xd = convertToIntExactTiesToAway(Dn) + return new FcvtFpUIntXDA(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x06: + switch (switchVal2) { + case 0: // FMOV Wd = Sn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovRegCoreW(machInst, rd, rn); + case 3: // FMOV Xd = Dn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovRegCoreX(machInst, rd, rn); + case 5: // FMOV Xd = Vn<127:64> + if (rmode != 1) + return new Unknown64(machInst); + return new FmovURegCoreX(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + break; + case 0x07: + switch (switchVal2) { + case 0: // FMOV Sd = Wn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovCoreRegW(machInst, rd, rn); + case 3: // FMOV Xd = Dn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovCoreRegX(machInst, rd, rn); + case 5: // FMOV Xd = Vn<127:64> + if (rmode != 1) + return new Unknown64(machInst); + return new FmovUCoreRegX(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + break; + default: // Warning! missing cases in switch statement above, that still need to be added + return new Unknown64(machInst); + } + } + case 0x1: + { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 23)) { + return new Unknown64(machInst); + } + IntRegIndex rm = (IntRegIndex)(uint32_t) bits(machInst, 20, 16); + IntRegIndex rn = (IntRegIndex)(uint32_t) bits(machInst, 9, 5); + uint8_t imm = (IntRegIndex)(uint32_t) bits(machInst, 3, 0); + ConditionCode cond = + (ConditionCode)(uint8_t)(bits(machInst, 15, 12)); + uint8_t switchVal = (bits(machInst, 4) << 0) | + (bits(machInst, 22) << 1); + // 31:23=000111100, 21=1, 11:10=01 + switch (switchVal) { + case 0x0: + // FCCMP flags = if cond the compareQuiet(Sn,Sm) else #nzcv + return new FCCmpRegS(machInst, rn, rm, cond, imm); + case 0x1: + // FCCMP flags = if cond then compareSignaling(Sn,Sm) + // else #nzcv + return new FCCmpERegS(machInst, rn, rm, cond, imm); + case 0x2: + // FCCMP flags = if cond then compareQuiet(Dn,Dm) else #nzcv + return new FCCmpRegD(machInst, rn, rm, cond, imm); + case 0x3: + // FCCMP flags = if cond then compareSignaling(Dn,Dm) + // else #nzcv + return new FCCmpERegD(machInst, rn, rm, cond, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 23)) { + return new Unknown64(machInst); + } + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + uint8_t switchVal = (bits(machInst, 15, 12) << 0) | + (bits(machInst, 22) << 4); + switch (switchVal) { + case 0x00: // FMUL Sd = Sn * Sm + return new FMulS(machInst, rd, rn, rm); + case 0x10: // FMUL Dd = Dn * Dm + return new FMulD(machInst, rd, rn, rm); + case 0x01: // FDIV Sd = Sn / Sm + return new FDivS(machInst, rd, rn, rm); + case 0x11: // FDIV Dd = Dn / Dm + return new FDivD(machInst, rd, rn, rm); + case 0x02: // FADD Sd = Sn + Sm + return new FAddS(machInst, rd, rn, rm); + case 0x12: // FADD Dd = Dn + Dm + return new FAddD(machInst, rd, rn, rm); + case 0x03: // FSUB Sd = Sn - Sm + return new FSubS(machInst, rd, rn, rm); + case 0x13: // FSUB Dd = Dn - Dm + return new FSubD(machInst, rd, rn, rm); + case 0x04: // FMAX Sd = max(Sn, Sm) + return new FMaxS(machInst, rd, rn, rm); + case 0x14: // FMAX Dd = max(Dn, Dm) + return new FMaxD(machInst, rd, rn, rm); + case 0x05: // FMIN Sd = min(Sn, Sm) + return new FMinS(machInst, rd, rn, rm); + case 0x15: // FMIN Dd = min(Dn, Dm) + return new FMinD(machInst, rd, rn, rm); + case 0x06: // FMAXNM Sd = maxNum(Sn, Sm) + return new FMaxNMS(machInst, rd, rn, rm); + case 0x16: // FMAXNM Dd = maxNum(Dn, Dm) + return new FMaxNMD(machInst, rd, rn, rm); + case 0x07: // FMINNM Sd = minNum(Sn, Sm) + return new FMinNMS(machInst, rd, rn, rm); + case 0x17: // FMINNM Dd = minNum(Dn, Dm) + return new FMinNMD(machInst, rd, rn, rm); + case 0x08: // FNMUL Sd = -(Sn * Sm) + return new FNMulS(machInst, rd, rn, rm); + case 0x18: // FNMUL Dd = -(Dn * Dm) + return new FNMulD(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } + case 0x3: + { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t type = bits(machInst, 23, 22); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + ConditionCode cond = + (ConditionCode)(uint8_t)(bits(machInst, 15, 12)); + if (type == 0) // FCSEL Sd = if cond then Sn else Sm + return new FCSelS(machInst, rd, rn, rm, cond); + else if (type == 1) // FCSEL Dd = if cond then Dn else Dm + return new FCSelD(machInst, rd, rn, rm, cond); + else + return new Unknown64(machInst); + } + } + } + return new FailUnimplemented("Unhandled Case4", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeAdvSIMDScalar(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 10) == 0) { + return decodeNeonScIndexedElem(machInst); + } else if (bits(machInst, 23) == 0) { + return decodeNeonScShiftByImm(machInst); + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 10) == 1) { + return decodeNeonSc3Same(machInst); + } else if (bits(machInst, 11) == 0) { + return decodeNeonSc3Diff(machInst); + } else if (bits(machInst, 20, 17) == 0x0) { + return decodeNeonSc2RegMisc(machInst); + } else if (bits(machInst, 20, 17) == 0x8) { + return decodeNeonScPwise(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 23, 22) == 0 && + bits(machInst, 15) == 0 && + bits(machInst, 10) == 1) { + return decodeNeonScCopy(machInst); + } else { + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case6", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeFpAdvSIMD(ExtMachInst machInst) + { + + if (bits(machInst, 28) == 0) { + if (bits(machInst, 31) == 0) { + return decodeAdvSIMD(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 30) == 0) { + return decodeFp(machInst); + } else if (bits(machInst, 31) == 0) { + return decodeAdvSIMDScalar(machInst); + } else { + return new Unknown64(machInst); + } + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeGem5Ops(ExtMachInst machInst) + { + const uint32_t m5func = bits(machInst, 23, 16); + switch (m5func) { + case 0x00: return new Arm(machInst); + case 0x01: return new Quiesce(machInst); + case 0x02: return new QuiesceNs64(machInst); + case 0x03: return new QuiesceCycles64(machInst); + case 0x04: return new QuiesceTime64(machInst); + case 0x07: return new Rpns64(machInst); + case 0x09: return new WakeCPU64(machInst); + case 0x10: return new Deprecated_ivlb(machInst); + case 0x11: return new Deprecated_ivle(machInst); + case 0x20: return new Deprecated_exit (machInst); + case 0x21: return new M5exit64(machInst); + case 0x31: return new Loadsymbol(machInst); + case 0x30: return new Initparam64(machInst); + case 0x40: return new Resetstats64(machInst); + case 0x41: return new Dumpstats64(machInst); + case 0x42: return new Dumpresetstats64(machInst); + case 0x43: return new M5checkpoint64(machInst); + case 0x4F: return new M5writefile64(machInst); + case 0x50: return new M5readfile64(machInst); + case 0x51: return new M5break(machInst); + case 0x52: return new M5switchcpu(machInst); + case 0x53: return new M5addsymbol64(machInst); + case 0x54: return new M5panic(machInst); + case 0x5a: return new M5workbegin64(machInst); + case 0x5b: return new M5workend64(machInst); + default: return new Unknown64(machInst); + } + } +} +}}; + +def format Aarch64() {{ + decode_block = ''' + { + using namespace Aarch64; + if (bits(machInst, 27) == 0x0) { + if (bits(machInst, 28) == 0x0) + return new Unknown64(machInst); + else if (bits(machInst, 26) == 0) + // bit 28:26=100 + return decodeDataProcImm(machInst); + else + // bit 28:26=101 + return decodeBranchExcSys(machInst); + } else if (bits(machInst, 25) == 0) { + // bit 27=1, 25=0 + return decodeLoadsStores(machInst); + } else if (bits(machInst, 26) == 0) { + // bit 27:25=101 + return decodeDataProcReg(machInst); + } else if (bits(machInst, 24) == 1 && + bits(machInst, 31, 28) == 0xF) { + return decodeGem5Ops(machInst); + } else { + // bit 27:25=111 + return decodeFpAdvSIMD(machInst); + } + } + ''' +}}; diff --git a/src/arch/arm/isa/formats/branch.isa b/src/arch/arm/isa/formats/branch.isa index f1b17ec90..513506d31 100644 --- a/src/arch/arm/isa/formats/branch.isa +++ b/src/arch/arm/isa/formats/branch.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -101,7 +101,7 @@ def format Thumb16CondBranchAndSvc() {{ return new B(machInst, sext<9>(bits(machInst, 7, 0) << 1), (ConditionCode)(uint32_t)bits(machInst, 11, 8)); } else if (bits(machInst, 8)) { - return new Svc(machInst); + return new Svc(machInst, bits(machInst, 7, 0)); } else { // This space will not be allocated in the future. return new Unknown(machInst); @@ -127,7 +127,7 @@ def format Thumb32BranchesAndMiscCtrl() {{ // Permanently undefined. return new Unknown(machInst); } else { - return new WarnUnimplemented("smc", machInst); + return new Smc(machInst); } } else if ((op & 0x38) != 0x38) { const uint32_t s = bits(machInst, 26); @@ -141,20 +141,26 @@ def format Thumb32BranchesAndMiscCtrl() {{ return new B(machInst, imm, (ConditionCode)(uint32_t)bits(machInst, 25, 22)); } else { + // HIGH: 12-11=10, LOW: 15-14=00, 12=0 switch (op) { case 0x38: - { - const IntRegIndex rn = - (IntRegIndex)(uint32_t)bits(machInst, 19, 16); - const uint8_t byteMask = bits(machInst, 11, 8); - return new MsrCpsrReg(machInst, rn, byteMask); - } case 0x39: { const IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 19, 16); const uint8_t byteMask = bits(machInst, 11, 8); - return new MsrSpsrReg(machInst, rn, byteMask); + const bool r = bits(machInst, 20); + if (bits(machInst, 5)) { + const uint8_t sysM = (bits(machInst, 4) << 4) | + byteMask; + return new MsrBankedReg(machInst, rn, sysM, r); + } else { + if (r) { + return new MsrSpsrReg(machInst, rn, byteMask); + } else { + return new MsrCpsrReg(machInst, rn, byteMask); + } + } } case 0x3a: { @@ -196,11 +202,11 @@ def format Thumb32BranchesAndMiscCtrl() {{ case 0x2: return new Clrex(machInst); case 0x4: - return new Dsb(machInst); + return new Dsb(machInst, 0); case 0x5: - return new Dmb(machInst); + return new Dmb(machInst, 0); case 0x6: - return new Isb(machInst); + return new Isb(machInst, 0); default: break; } @@ -208,28 +214,44 @@ def format Thumb32BranchesAndMiscCtrl() {{ } case 0x3c: { - // On systems that don't support bxj, bxj == bx - return new BxReg(machInst, + return new BxjReg(machInst, (IntRegIndex)(uint32_t)bits(machInst, 19, 16), COND_UC); } case 0x3d: { const uint32_t imm32 = bits(machInst, 7, 0); - return new SubsImmPclr(machInst, INTREG_PC, INTREG_LR, - imm32, false); + if (imm32 == 0) { + return new Eret(machInst); + } else { + return new SubsImmPclr(machInst, INTREG_PC, + INTREG_LR, imm32, false); + } } case 0x3e: + case 0x3f: { + const IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 11, 8); - return new MrsCpsr(machInst, rd); + const bool r = bits(machInst, 20); + if (bits(machInst, 5)) { + const uint8_t sysM = (bits(machInst, 4) << 4) | + bits(machInst, 11, 8); + return new MrsBankedReg(machInst, rd, sysM, r); + } else { + if (r) { + return new MrsSpsr(machInst, rd); + } else { + return new MrsCpsr(machInst, rd); + } + } } - case 0x3f: + case 0xfe: { - const IntRegIndex rd = - (IntRegIndex)(uint32_t)bits(machInst, 11, 8); - return new MrsSpsr(machInst, rd); + uint32_t imm16 = (bits(machInst, 19, 16) << 12) | + (bits(machInst, 11, 0) << 0); + return new Hvc(machInst, imm16); } } break; diff --git a/src/arch/arm/isa/formats/formats.isa b/src/arch/arm/isa/formats/formats.isa index 90144c101..44e9c5b5e 100644 --- a/src/arch/arm/isa/formats/formats.isa +++ b/src/arch/arm/isa/formats/formats.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -44,6 +44,12 @@ //Include the basic format ##include "basic.isa" +//Include support for decoding AArch64 instructions +##include "aarch64.isa" + +//Include support for decoding AArch64 NEON instructions +##include "neon64.isa" + //Include support for predicated instructions ##include "pred.isa" diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index 6d779e541..ccd4589a3 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -151,8 +151,7 @@ let {{ if (singleAll) { size = bits(machInst, 7, 6); bool t = bits(machInst, 5); - unsigned eBytes = (1 << size); - align = (eBytes - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; if (width == 1) { regs = t ? 2 : 1; inc = 1; @@ -164,7 +163,7 @@ let {{ case 1: case 2: if (bits(machInst, 4)) - align = width * eBytes - 1; + align = size + width - 1; break; case 3: break; @@ -173,20 +172,19 @@ let {{ if (bits(machInst, 4) == 0) return new Unknown(machInst); size = 2; - align = 0xf; + align = 0x4; } else if (size == 2) { if (bits(machInst, 4)) - align = 7; + align = 0x3; } else { if (bits(machInst, 4)) - align = 4 * eBytes - 1; + align = size + 2; } break; } } else { size = bits(machInst, 11, 10); - unsigned eBytes = (1 << size); - align = (eBytes - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; regs = width; unsigned indexAlign = bits(machInst, 7, 4); // If width is 1, inc is always 1. That's overridden later. @@ -219,13 +217,13 @@ let {{ break; case 2: if (bits(indexAlign, 1, 0)) - align = 3; + align = 2; break; } break; case 2: if (bits(indexAlign, 0)) - align = (2 * eBytes) - 1; + align = size + 1; break; case 3: break; @@ -234,11 +232,11 @@ let {{ case 0: case 1: if (bits(indexAlign, 0)) - align = (4 * eBytes) - 1; + align = size + 2; break; case 2: if (bits(indexAlign, 0)) - align = (4 << bits(indexAlign, 1, 0)) - 1; + align = bits(indexAlign, 1, 0) + 2; break; } break; @@ -252,9 +250,9 @@ let {{ align = bits(machInst, 5, 4); if (align == 0) { // @align wasn't specified, so alignment can be turned off. - align = ((1 << size) - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; } else { - align = ((4 << align) - 1); + align = align + 2; } switch (width) { case 1: @@ -588,6 +586,23 @@ let {{ } } case 0xc: + if (b) { + if (!u) { + if (bits(c, 1) == 0) { + if (q) { + return new NVfmaQFp<float>(machInst, vd, vn, vm); + } else { + return new NVfmaDFp<float>(machInst, vd, vn, vm); + } + } else { + if (q) { + return new NVfmsQFp<float>(machInst, vd, vn, vm); + } else { + return new NVfmsDFp<float>(machInst, vd, vn, vm); + } + } + } + } return new Unknown(machInst); case 0xd: if (b) { @@ -1827,7 +1842,7 @@ let {{ break; case 0x1: { - if (offset == 0 || vd + offset/2 > NumFloatArchRegs) { + if (offset == 0 || vd + offset/2 > NumFloatV7ArchRegs) { break; } switch (bits(opcode, 1, 0)) { @@ -1951,8 +1966,9 @@ let {{ } else if (a == 0x7) { const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - uint32_t specReg = bits(machInst, 19, 16); - switch (specReg) { + uint32_t reg = bits(machInst, 19, 16); + uint32_t specReg; + switch (reg) { case 0: specReg = MISCREG_FPSID; break; @@ -1974,7 +1990,9 @@ let {{ if (specReg == MISCREG_FPSCR) { return new VmsrFpscr(machInst, (IntRegIndex)specReg, rt); } else { - return new Vmsr(machInst, (IntRegIndex)specReg, rt); + uint32_t iss = mcrMrcIssBuild(0, bits(machInst, 3, 0), rt, + reg, a, bits(machInst, 7, 5)); + return new Vmsr(machInst, (IntRegIndex)specReg, rt, iss); } } } else if (l == 0 && c == 1) { @@ -2041,8 +2059,9 @@ let {{ } else if (a == 7) { const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - uint32_t specReg = bits(machInst, 19, 16); - switch (specReg) { + uint32_t reg = bits(machInst, 19, 16); + uint32_t specReg; + switch (reg) { case 0: specReg = MISCREG_FPSID; break; @@ -2070,7 +2089,9 @@ let {{ } else if (specReg == MISCREG_FPSCR) { return new VmrsFpscr(machInst, rt, (IntRegIndex)specReg); } else { - return new Vmrs(machInst, rt, (IntRegIndex)specReg); + uint32_t iss = mcrMrcIssBuild(l, bits(machInst, 3, 0), rt, + reg, a, bits(machInst, 7, 5)); + return new Vmrs(machInst, rt, (IntRegIndex)specReg, iss); } } } else { @@ -2235,6 +2256,44 @@ let {{ } } break; + case 0x9: + if ((opc3 & 0x1) == 0) { + if (single) { + return decodeVfpRegRegRegOp<VfnmaS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfnmaD>( + machInst, vd, vn, vm, true); + } + } else { + if (single) { + return decodeVfpRegRegRegOp<VfnmsS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfnmsD>( + machInst, vd, vn, vm, true); + } + } + break; + case 0xa: + if ((opc3 & 0x1) == 0) { + if (single) { + return decodeVfpRegRegRegOp<VfmaS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfmaD>( + machInst, vd, vn, vm, true); + } + } else { + if (single) { + return decodeVfpRegRegRegOp<VfmsS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfmsD>( + machInst, vd, vn, vm, true); + } + } + break; case 0xb: if ((opc3 & 0x1) == 0) { const uint32_t baseImm = diff --git a/src/arch/arm/isa/formats/mem.isa b/src/arch/arm/isa/formats/mem.isa index f7830eff3..abac27021 100644 --- a/src/arch/arm/isa/formats/mem.isa +++ b/src/arch/arm/isa/formats/mem.isa @@ -282,7 +282,7 @@ def format Thumb32SrsRfe() {{ } } else { const uint32_t mode = bits(machInst, 4, 0); - if (badMode((OperatingMode)mode)) + if (badMode32((OperatingMode)mode)) return new Unknown(machInst); if (!add && !wb) { return new %(srs)s(machInst, mode, diff --git a/src/arch/arm/isa/formats/misc.isa b/src/arch/arm/isa/formats/misc.isa index 00a37d17b..647f9846d 100644 --- a/src/arch/arm/isa/formats/misc.isa +++ b/src/arch/arm/isa/formats/misc.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -36,19 +36,42 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Authors: Gabe Black +// Giacomo Gabrielli + +def format ArmERet() {{ + decode_block = "return new Eret(machInst);" +}}; def format Svc() {{ - decode_block = "return new Svc(machInst);" + decode_block = "return new Svc(machInst, bits(machInst, 23, 0));" +}}; + +def format ArmSmcHyp() {{ + decode_block = ''' + { + if (bits(machInst, 21)) + { + return new Smc(machInst); + } else { + uint32_t imm16 = (bits(machInst, 19, 8) << 4) | + (bits(machInst, 3, 0) << 0); + return new Hvc(machInst, imm16); + } + } + ''' }}; def format ArmMsrMrs() {{ decode_block = ''' { const uint8_t byteMask = bits(machInst, 19, 16); + const uint8_t sysM = byteMask | (bits(machInst, 8) << 4); const IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 3, 0); const IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); const uint32_t opcode = bits(machInst, 24, 21); const bool useImm = bits(machInst, 25); + const bool r = bits(machInst, 22); + const bool isBanked = bits(machInst, 9); const uint32_t unrotated = bits(machInst, 7, 0); const uint32_t rotation = (bits(machInst, 11, 8) << 1); @@ -56,20 +79,36 @@ def format ArmMsrMrs() {{ switch (opcode) { case 0x8: - return new MrsCpsr(machInst, rd); + if (isBanked) { + return new MrsBankedReg(machInst, rd, sysM, r!=0); + } else { + return new MrsCpsr(machInst, rd); + } case 0x9: if (useImm) { return new MsrCpsrImm(machInst, imm, byteMask); } else { - return new MsrCpsrReg(machInst, rn, byteMask); + if (isBanked) { + return new MsrBankedReg(machInst, rn, sysM, r!=0); + } else { + return new MsrCpsrReg(machInst, rn, byteMask); + } } case 0xa: - return new MrsSpsr(machInst, rd); + if (isBanked) { + return new MrsBankedReg(machInst, rd, sysM, r!=0); + } else { + return new MrsSpsr(machInst, rd); + } case 0xb: if (useImm) { return new MsrSpsrImm(machInst, imm, byteMask); } else { - return new MsrSpsrReg(machInst, rn, byteMask); + if (isBanked) { + return new MsrBankedReg(machInst, rn, sysM, r!=0); + } else { + return new MsrSpsrReg(machInst, rn, byteMask); + } } default: return new Unknown(machInst); @@ -99,16 +138,17 @@ let {{ switch (miscReg) { case MISCREG_NOP: return new NopInst(machInst); - case NUM_MISCREGS: + case MISCREG_CP14_UNIMPL: return new FailUnimplemented( csprintf("miscreg crn:%d opc1:%d crm:%d opc2:%d %s unknown", crn, opc1, crm, opc2, isRead ? "read" : "write").c_str(), machInst); default: + uint32_t iss = mcrMrcIssBuild(isRead, crm, rt, crn, opc1, opc2); if (isRead) { - return new Mrc14(machInst, rt, (IntRegIndex)miscReg); + return new Mrc14(machInst, rt, (IntRegIndex)miscReg, iss); } else { - return new Mcr14(machInst, (IntRegIndex)miscReg, rt); + return new Mcr14(machInst, (IntRegIndex)miscReg, rt, iss); } } } @@ -123,8 +163,8 @@ def format McrMrc14() {{ let {{ header_output = ''' - StaticInstPtr - decodeMcrMrc15(ExtMachInst machInst); + StaticInstPtr decodeMcrMrc14(ExtMachInst machInst); + StaticInstPtr decodeMcrMrc15(ExtMachInst machInst); ''' decoder_output = ''' StaticInstPtr @@ -136,107 +176,50 @@ let {{ const uint32_t crm = bits(machInst, 3, 0); const MiscRegIndex miscReg = decodeCP15Reg(crn, opc1, crm, opc2); const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - const bool isRead = bits(machInst, 20); + uint32_t iss = mcrMrcIssBuild(isRead, crm, rt, crn, opc1, opc2); switch (miscReg) { case MISCREG_NOP: return new NopInst(machInst); - case NUM_MISCREGS: + case MISCREG_CP15_UNIMPL: return new FailUnimplemented( csprintf("miscreg crn:%d opc1:%d crm:%d opc2:%d %s unknown", crn, opc1, crm, opc2, isRead ? "read" : "write").c_str(), machInst); - case MISCREG_DCCISW: - return new WarnUnimplemented( - isRead ? "mrc dccisw" : "mcr dcisw", machInst); - case MISCREG_DCCIMVAC: - return new WarnUnimplemented( - isRead ? "mrc dccimvac" : "mcr dccimvac", machInst); - case MISCREG_DCIMVAC: - return new WarnUnimplemented( - isRead ? "mrc dcimvac" : "mcr dcimvac", machInst); case MISCREG_DCCMVAC: return new FlushPipeInst( isRead ? "mrc dccmvac" : "mcr dccmvac", machInst); - case MISCREG_DCCMVAU: - return new WarnUnimplemented( - isRead ? "mrc dccmvau" : "mcr dccmvau", machInst); case MISCREG_CP15ISB: - return new Isb(machInst); + return new Isb(machInst, iss); case MISCREG_CP15DSB: - return new Dsb(machInst); + return new Dsb(machInst, iss); case MISCREG_CP15DMB: - return new Dmb(machInst); - case MISCREG_ICIALLUIS: - return new WarnUnimplemented( - isRead ? "mrc icialluis" : "mcr icialluis", machInst); - case MISCREG_ICIMVAU: - return new WarnUnimplemented( - isRead ? "mrc icimvau" : "mcr icimvau", machInst); - case MISCREG_BPIMVA: - return new WarnUnimplemented( - isRead ? "mrc bpimva" : "mcr bpimva", machInst); - case MISCREG_BPIALLIS: - return new WarnUnimplemented( - isRead ? "mrc bpiallis" : "mcr bpiallis", machInst); - case MISCREG_BPIALL: - return new WarnUnimplemented( - isRead ? "mrc bpiall" : "mcr bpiall", machInst); - case MISCREG_L2LATENCY: - return new WarnUnimplemented( - isRead ? "mrc l2latency" : "mcr l2latency", machInst); - case MISCREG_CRN15: - return new WarnUnimplemented( - isRead ? "mrc crn15" : "mcr crn15", machInst); - - // Write only. - case MISCREG_TLBIALLIS: - case MISCREG_TLBIMVAIS: - case MISCREG_TLBIASIDIS: - case MISCREG_TLBIMVAAIS: - case MISCREG_ITLBIALL: - case MISCREG_ITLBIMVA: - case MISCREG_ITLBIASID: - case MISCREG_DTLBIALL: - case MISCREG_DTLBIMVA: - case MISCREG_DTLBIASID: - case MISCREG_TLBIALL: - case MISCREG_TLBIMVA: - case MISCREG_TLBIASID: - case MISCREG_TLBIMVAA: - if (isRead) { - return new Unknown(machInst); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); - } + return new Dmb(machInst, iss); + default: + if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + isRead ? "mrc" : "mcr", miscRegName[miscReg]); + warn("\\tinstruction '%s' unimplemented\\n", full_mnem); - // Read only in user mode. - case MISCREG_TPIDRURO: - if (isRead) { - return new Mrc15User(machInst, rt, (IntRegIndex)miscReg); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); + // Remove the warn flag and set the implemented flag. This + // prevents the instruction warning a second time, it also + // means the instruction is actually generated. Actually + // creating the instruction to access an register that isn't + // implemented sounds a bit silly, but its required to get + // the correct behaviour for hyp traps and undef exceptions. + miscRegInfo[miscReg][MISCREG_IMPLEMENTED] = true; + miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL] = false; } - // Read/write in user mode. - case MISCREG_TPIDRURW: - if (isRead) { - return new Mrc15User(machInst, rt, (IntRegIndex)miscReg); + if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + if (isRead) + return new Mrc15(machInst, rt, (IntRegIndex)miscReg, iss); + return new Mcr15(machInst, (IntRegIndex)miscReg, rt, iss); } else { - return new Mcr15User(machInst, (IntRegIndex)miscReg, rt); - } - - // Read/write, priveleged only. - default: - if (miscReg >= MISCREG_CP15_UNIMP_START) return new FailUnimplemented(csprintf("%s %s", isRead ? "mrc" : "mcr", miscRegName[miscReg]).c_str(), machInst); - if (isRead) { - return new Mrc15(machInst, rt, (IntRegIndex)miscReg); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); } } } @@ -248,3 +231,70 @@ def format McrMrc15() {{ return decodeMcrMrc15(machInst); ''' }}; + +let {{ + header_output = ''' + StaticInstPtr + decodeMcrrMrrc15(ExtMachInst machInst); + ''' + decoder_output = ''' + StaticInstPtr + decodeMcrrMrrc15(ExtMachInst machInst) + { + const uint32_t crm = bits(machInst, 3, 0); + const uint32_t opc1 = bits(machInst, 7, 4); + const MiscRegIndex miscReg = decodeCP15Reg64(crm, opc1); + const IntRegIndex rt = (IntRegIndex) (uint32_t) bits(machInst, 15, 12); + const IntRegIndex rt2 = (IntRegIndex) (uint32_t) bits(machInst, 19, 16); + + const bool isRead = bits(machInst, 20); + + switch (miscReg) { + case MISCREG_CP15_UNIMPL: + return new FailUnimplemented( + csprintf("miscreg crm:%d opc1:%d 64-bit %s unknown", + crm, opc1, isRead ? "read" : "write").c_str(), + machInst); + default: + if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + isRead ? "mrrc" : "mcrr", miscRegName[miscReg]); + warn("\\tinstruction '%s' unimplemented\\n", full_mnem); + + // Remove the warn flag and set the implemented flag. This + // prevents the instruction warning a second time, it also + // means the instruction is actually generated. Actually + // creating the instruction to access an register that isn't + // implemented sounds a bit silly, but its required to get + // the correct behaviour for hyp traps and undef exceptions. + miscRegInfo[miscReg][MISCREG_IMPLEMENTED] = true; + miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL] = false; + } + + if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + uint32_t iss = mcrrMrrcIssBuild(isRead, crm, rt, rt2, opc1); + + if (isRead) + return new Mrrc15(machInst, (IntRegIndex) miscReg, rt2, rt, iss); + return new Mcrr15(machInst, rt2, rt, (IntRegIndex) miscReg, iss); + } else { + return new FailUnimplemented(csprintf("%s %s", + isRead ? "mrrc" : "mcrr", miscRegName[miscReg]).c_str(), + machInst); + } + } + } + ''' +}}; + +def format Mcrr15() {{ + decode_block = ''' + return decodeMcrrMrrc15(machInst); + ''' +}}; + +def format Mrrc15() {{ + decode_block = ''' + return decodeMcrrMrrc15(machInst); + ''' +}}; diff --git a/src/arch/arm/isa/formats/neon64.isa b/src/arch/arm/isa/formats/neon64.isa new file mode 100644 index 000000000..72bbd0c60 --- /dev/null +++ b/src/arch/arm/isa/formats/neon64.isa @@ -0,0 +1,2626 @@ +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli +// Mbou Eyole + +output header {{ +namespace Aarch64 +{ + // AdvSIMD three same + StaticInstPtr decodeNeon3Same(ExtMachInst machInst); + // AdvSIMD three different + StaticInstPtr decodeNeon3Diff(ExtMachInst machInst); + // AdvSIMD two-reg misc + StaticInstPtr decodeNeon2RegMisc(ExtMachInst machInst); + // AdvSIMD across lanes + StaticInstPtr decodeNeonAcrossLanes(ExtMachInst machInst); + // AdvSIMD copy + StaticInstPtr decodeNeonCopy(ExtMachInst machInst); + // AdvSIMD vector x indexed element + StaticInstPtr decodeNeonIndexedElem(ExtMachInst machInst); + // AdvSIMD modified immediate + StaticInstPtr decodeNeonModImm(ExtMachInst machInst); + // AdvSIMD shift by immediate + StaticInstPtr decodeNeonShiftByImm(ExtMachInst machInst); + // AdvSIMD TBL/TBX + StaticInstPtr decodeNeonTblTbx(ExtMachInst machInst); + // AdvSIMD ZIP/UZP/TRN + StaticInstPtr decodeNeonZipUzpTrn(ExtMachInst machInst); + // AdvSIMD EXT + StaticInstPtr decodeNeonExt(ExtMachInst machInst); + + // AdvSIMD scalar three same + StaticInstPtr decodeNeonSc3Same(ExtMachInst machInst); + // AdvSIMD scalar three different + StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst); + // AdvSIMD scalar two-reg misc + StaticInstPtr decodeNeonSc2RegMisc(ExtMachInst machInst); + // AdvSIMD scalar pairwise + StaticInstPtr decodeNeonScPwise(ExtMachInst machInst); + // AdvSIMD scalar copy + StaticInstPtr decodeNeonScCopy(ExtMachInst machInst); + // AdvSIMD scalar x indexed element + StaticInstPtr decodeNeonScIndexedElem(ExtMachInst machInst); + // AdvSIMD scalar shift by immediate + StaticInstPtr decodeNeonScShiftByImm(ExtMachInst machInst); + + // AdvSIMD load/store + StaticInstPtr decodeNeonMem(ExtMachInst machInst); +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeNeon3Same(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + + switch (opcode) { + case 0x00: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UhaddDX, UhaddQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<ShaddDX, ShaddQX>( + q, size, machInst, vd, vn, vm); + case 0x01: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqaddDX, UqaddQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqaddDX, SqaddQX>( + q, size, machInst, vd, vn, vm); + case 0x02: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UrhaddDX, UrhaddQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SrhaddDX, SrhaddQX>( + q, size, machInst, vd, vn, vm); + case 0x03: + switch (size) { + case 0x0: + if (u) { + if (q) + return new EorQX<uint64_t>(machInst, vd, vn, vm); + else + return new EorDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new AndQX<uint64_t>(machInst, vd, vn, vm); + else + return new AndDX<uint64_t>(machInst, vd, vn, vm); + } + case 0x1: + if (u) { + if (q) + return new BslQX<uint64_t>(machInst, vd, vn, vm); + else + return new BslDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new BicQX<uint64_t>(machInst, vd, vn, vm); + else + return new BicDX<uint64_t>(machInst, vd, vn, vm); + } + case 0x2: + if (u) { + if (q) + return new BitQX<uint64_t>(machInst, vd, vn, vm); + else + return new BitDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new OrrQX<uint64_t>(machInst, vd, vn, vm); + else + return new OrrDX<uint64_t>(machInst, vd, vn, vm); + } + case 0x3: + if (u) { + if (q) + return new BifQX<uint64_t>(machInst, vd, vn, vm); + else + return new BifDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new OrnQX<uint64_t>(machInst, vd, vn, vm); + else + return new OrnDX<uint64_t>(machInst, vd, vn, vm); + } + } + case 0x04: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UhsubDX, UhsubQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<ShsubDX, ShsubQX>( + q, size, machInst, vd, vn, vm); + case 0x05: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqsubDX, UqsubQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqsubDX, SqsubQX>( + q, size, machInst, vd, vn, vm); + case 0x06: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<CmhiDX, CmhiQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<CmgtDX, CmgtQX>( + q, size, machInst, vd, vn, vm); + case 0x07: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<CmhsDX, CmhsQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<CmgeDX, CmgeQX>( + q, size, machInst, vd, vn, vm); + case 0x08: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UshlDX, UshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SshlDX, SshlQX>( + q, size, machInst, vd, vn, vm); + case 0x09: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqshlDX, UqshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqshlDX, SqshlQX>( + q, size, machInst, vd, vn, vm); + case 0x0a: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UrshlDX, UrshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SrshlDX, SrshlQX>( + q, size, machInst, vd, vn, vm); + case 0x0b: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqrshlDX, UqrshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqrshlDX, SqrshlQX>( + q, size, machInst, vd, vn, vm); + case 0x0c: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmaxDX, UmaxQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmaxDX, SmaxQX>( + q, size, machInst, vd, vn, vm); + case 0x0d: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UminDX, UminQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SminDX, SminQX>( + q, size, machInst, vd, vn, vm); + case 0x0e: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabdDX, UabdQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabdDX, SabdQX>( + q, size, machInst, vd, vn, vm); + case 0x0f: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabaDX, UabaQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabaDX, SabaQX>( + q, size, machInst, vd, vn, vm); + case 0x10: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<SubDX, SubQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeXReg<AddDX, AddQX>( + q, size, machInst, vd, vn, vm); + case 0x11: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<CmeqDX, CmeqQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeXReg<CmtstDX, CmtstQX>( + q, size, machInst, vd, vn, vm); + case 0x12: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<MlsDX, MlsQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg<MlaDX, MlaQX>( + q, size, machInst, vd, vn, vm); + case 0x13: + if (size == 0x3 || (size != 0x0 && bits(machInst, 29))) + return new Unknown64(machInst); + if (u) { + if (q) + return new PmulQX<uint8_t>(machInst, vd, vn, vm); + else + return new PmulDX<uint8_t>(machInst, vd, vn, vm); + } else { + return decodeNeonUThreeSReg<MulDX, MulQX>( + q, size, machInst, vd, vn, vm); + } + case 0x14: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmaxpDX, UmaxpQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmaxpDX, SmaxpQX>( + q, size, machInst, vd, vn, vm); + case 0x15: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UminpDX, UminpQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SminpDX, SminpQX>( + q, size, machInst, vd, vn, vm); + case 0x16: + if (size == 0x3 || size == 0x0) + return new Unknown64(machInst); + if (u) { + if (q) + return decodeNeonSThreeHAndWReg<SqrdmulhQX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg<SqrdmulhDX>( + size, machInst, vd, vn, vm); + } else { + if (q) + return decodeNeonSThreeHAndWReg<SqdmulhQX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg<SqdmulhDX>( + size, machInst, vd, vn, vm); + } + case 0x17: + if (u || size_q == 0x6) + return new Unknown64(machInst); + else + return decodeNeonUThreeXReg<AddpDX, AddpQX>( + q, size, machInst, vd, vn, vm); + case 0x18: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FmaxnmpDX, FmaxnmpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FmaxnmDX, FmaxnmQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FminnmpDX, FminnmpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FminnmDX, FminnmQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x19: + if (size < 0x2) { + if (u || sz_q == 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg<FmlaDX, FmlaQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u || sz_q == 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg<FmlsDX, FmlsQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FaddpDX, FaddpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FaddDX, FaddQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FabdDX, FabdQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FsubDX, FsubQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1b: + if (size < 0x2 && sz_q != 0x2) { + if (u) + return decodeNeonUThreeFpReg<FmulDX, FmulQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FmulxDX, FmulxQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + return new Unknown64(machInst); + } + case 0x1c: + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FcmgeDX, FcmgeQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FcmeqDX, FcmeqQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FcmgtDX, FcmgtQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1d: + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FacgeDX, FacgeQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } else { + if (u) + return decodeNeonUThreeFpReg<FacgtDX, FacgtQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1e: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FmaxpDX, FmaxpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FmaxDX, FmaxQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FminpDX, FminpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FminDX, FminQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1f: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FdivDX, FdivQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FrecpsDX, FrecpsQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg<FrsqrtsDX, FrsqrtsQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeon3Diff(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x0: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UaddlX, Uaddl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SaddlX, Saddl2X>( + q, size, machInst, vd, vn, vm); + case 0x1: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UaddwX, Uaddw2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SaddwX, Saddw2X>( + q, size, machInst, vd, vn, vm); + case 0x2: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UsublX, Usubl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SsublX, Ssubl2X>( + q, size, machInst, vd, vn, vm); + case 0x3: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UsubwX, Usubw2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SsubwX, Ssubw2X>( + q, size, machInst, vd, vn, vm); + case 0x4: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<RaddhnX, Raddhn2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg<AddhnX, Addhn2X>( + q, size, machInst, vd, vn, vm); + case 0x5: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabalX, Uabal2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabalX, Sabal2X>( + q, size, machInst, vd, vn, vm); + case 0x6: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<RsubhnX, Rsubhn2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg<SubhnX, Subhn2X>( + q, size, machInst, vd, vn, vm); + case 0x7: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabdlX, Uabdl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabdlX, Sabdl2X>( + q, size, machInst, vd, vn, vm); + case 0x8: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmlalX, Umlal2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmlalX, Smlal2X>( + q, size, machInst, vd, vn, vm); + case 0x9: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg<Sqdmlal2X>( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg<SqdmlalX>( + size, machInst, vd, vn, vm); + } + } + case 0xa: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmlslX, Umlsl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmlslX, Smlsl2X>( + q, size, machInst, vd, vn, vm); + case 0xb: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg<Sqdmlsl2X>( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg<SqdmlslX>( + size, machInst, vd, vn, vm); + } + } + case 0xc: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmullX, Umull2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmullX, Smull2X>( + q, size, machInst, vd, vn, vm); + case 0xd: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg<Sqdmull2X>( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg<SqdmullX>( + size, machInst, vd, vn, vm); + } + } + case 0xe: + if (u || size != 0) { + return new Unknown64(machInst); + } else { + if (q) + return new Pmull2X<uint8_t>(machInst, vd, vn, vm); + else + return new PmullX<uint8_t>(machInst, vd, vn, vm); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeon2RegMisc(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + uint8_t op = (uint8_t)((bits(machInst, 12) << 1) | + bits(machInst, 29)); + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + + switch (switchVal) { + case 0x00: + if (op + size >= 3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<Rev64DX, Rev64QX>( + q, size, machInst, vd, vn); + case 0x01: + if (op + size >= 3) + return new Unknown64(machInst); + if (q) + return new Rev16QX<uint8_t>(machInst, vd, vn); + else + return new Rev16DX<uint8_t>(machInst, vd, vn); + case 0x02: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SaddlpDX, SaddlpQX>( + q, size, machInst, vd, vn); + case 0x03: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscXReg<SuqaddDX, SuqaddQX>( + q, size, machInst, vd, vn); + case 0x04: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<ClsDX, ClsQX>( + q, size, machInst, vd, vn); + case 0x05: + if (size != 0x0) + return new Unknown64(machInst); + if (q) + return new CntQX<uint8_t>(machInst, vd, vn); + else + return new CntDX<uint8_t>(machInst, vd, vn); + case 0x06: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SadalpDX, SadalpQX>( + q, size, machInst, vd, vn); + case 0x07: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<SqabsDX, SqabsQX>( + q, size, machInst, vd, vn); + case 0x08: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmgtZeroDX, CmgtZeroQX>( + q, size, machInst, vd, vn); + case 0x09: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmeqZeroDX, CmeqZeroQX>( + q, size, machInst, vd, vn); + case 0x0a: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmltZeroDX, CmltZeroQX>( + q, size, machInst, vd, vn); + case 0x0b: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<AbsDX, AbsQX>( + q, size, machInst, vd, vn); + case 0x0c: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmgtZeroDX, FcmgtZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x0d: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmeqZeroDX, FcmeqZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x0e: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmltZeroDX, FcmltZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x0f: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FabsDX, FabsQX>( + q, size & 0x1, machInst, vd, vn); + case 0x12: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<XtnX, Xtn2X>( + q, size, machInst, vd, vn); + case 0x14: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SqxtnX, Sqxtn2X>( + q, size, machInst, vd, vn); + case 0x16: + if (size > 0x1) + return new Unknown64(machInst); + if (q) { + if (size) + return new Fcvtn2X<uint32_t>(machInst, vd, vn); + else + return new Fcvtn2X<uint16_t>(machInst, vd, vn); + } else { + if (size) + return new FcvtnX<uint32_t>(machInst, vd, vn); + else + return new FcvtnX<uint16_t>(machInst, vd, vn); + } + case 0x17: + if (size > 0x1) + return new Unknown64(machInst); + if (q) { + if (size) + return new Fcvtl2X<uint32_t>(machInst, vd, vn); + else + return new Fcvtl2X<uint16_t>(machInst, vd, vn); + } else { + if (size) + return new FcvtlX<uint32_t>(machInst, vd, vn); + else + return new FcvtlX<uint16_t>(machInst, vd, vn); + } + case 0x18: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FrintnDX, FrintnQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrintpDX, FrintpQX>( + q, size & 0x1, machInst, vd, vn); + case 0x19: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FrintmDX, FrintmQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrintzDX, FrintzQX>( + q, size & 0x1, machInst, vd, vn); + case 0x1a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtnsDX, FcvtnsQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtpsDX, FcvtpsQX>( + q, size & 0x1, machInst, vd, vn); + case 0x1b: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtmsDX, FcvtmsQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtzsIntDX, FcvtzsIntQX>( + q, size & 0x1, machInst, vd, vn); + case 0x1c: + if (size < 0x2) { + if (sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcvtasDX, FcvtasQX>( + q, size & 0x1, machInst, vd, vn); + } else { + if (size & 0x1) + return new Unknown64(machInst); + if (q) + return new UrecpeQX<uint32_t>(machInst, vd, vn); + else + return new UrecpeDX<uint32_t>(machInst, vd, vn); + } + case 0x1d: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) { + if (size & 0x1) + return new ScvtfIntDQX<uint64_t>(machInst, vd, vn); + else + return new ScvtfIntSQX<uint32_t>(machInst, vd, vn); + } else { + if (size & 0x1) + return new Unknown(machInst); + else + return new ScvtfIntDX<uint32_t>(machInst, vd, vn); + } + } else { + return decodeNeonUTwoMiscFpReg<FrecpeDX, FrecpeQX>( + q, size & 0x1, machInst, vd, vn); + } + case 0x20: + if (op + size >= 3) + return new Unknown64(machInst); + if (q) { + if (size & 0x1) + return new Rev32QX<uint16_t>(machInst, vd, vn); + else + return new Rev32QX<uint8_t>(machInst, vd, vn); + } else { + if (size & 0x1) + return new Rev32DX<uint16_t>(machInst, vd, vn); + else + return new Rev32DX<uint8_t>(machInst, vd, vn); + } + case 0x22: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<UaddlpDX, UaddlpQX>( + q, size, machInst, vd, vn); + case 0x23: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscXReg<UsqaddDX, UsqaddQX>( + q, size, machInst, vd, vn); + return new Unknown64(machInst); + case 0x24: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<ClzDX, ClzQX>( + q, size, machInst, vd, vn); + case 0x25: + if (size == 0x0) { + if (q) + return new MvnQX<uint64_t>(machInst, vd, vn); + else + return new MvnDX<uint64_t>(machInst, vd, vn); + } else if (size == 0x1) { + if (q) + return new RbitQX<uint8_t>(machInst, vd, vn); + else + return new RbitDX<uint8_t>(machInst, vd, vn); + } else { + return new Unknown64(machInst); + } + case 0x26: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<UadalpDX, UadalpQX>( + q, size, machInst, vd, vn); + case 0x27: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<SqnegDX, SqnegQX>( + q, size, machInst, vd, vn); + case 0x28: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmgeZeroDX, CmgeZeroQX>( + q, size, machInst, vd, vn); + case 0x29: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmleZeroDX, CmleZeroQX>( + q, size, machInst, vd, vn); + case 0x2b: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<NegDX, NegQX>( + q, size, machInst, vd, vn); + case 0x2c: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmgeZeroDX, FcmgeZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x2d: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmleZeroDX, FcmleZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x2f: + if (size < 0x2 || size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FnegDX, FnegQX>( + q, size & 0x1, machInst, vd, vn); + case 0x32: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SqxtunX, Sqxtun2X>( + q, size, machInst, vd, vn); + case 0x33: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<ShllX, Shll2X>( + q, size, machInst, vd, vn); + case 0x34: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<UqxtnX, Uqxtn2X>( + q, size, machInst, vd, vn); + case 0x36: + if (size != 0x1) + return new Unknown64(machInst); + if (q) + return new Fcvtxn2X<uint32_t>(machInst, vd, vn); + else + return new FcvtxnX<uint32_t>(machInst, vd, vn); + case 0x38: + if (size > 0x1 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FrintaDX, FrintaQX>( + q, size & 0x1, machInst, vd, vn); + case 0x39: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FrintxDX, FrintxQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrintiDX, FrintiQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtnuDX, FcvtnuQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtpuDX, FcvtpuQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3b: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtmuDX, FcvtmuQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtzuIntDX, FcvtzuIntQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3c: + if (size < 0x2) { + return decodeNeonUTwoMiscFpReg<FcvtauDX, FcvtauQX>( + q, size & 0x1, machInst, vd, vn); + } else if (size == 0x2) { + if (q) + return new UrsqrteQX<uint32_t>(machInst, vd, vn); + else + return new UrsqrteDX<uint32_t>(machInst, vd, vn); + } else { + return new Unknown64(machInst); + } + case 0x3d: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<UcvtfIntDX, UcvtfIntQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrsqrteDX, FrsqrteQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3f: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FsqrtDX, FsqrtQX>( + q, size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonAcrossLanes(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + + switch (switchVal) { + case 0x03: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesLongReg<SaddlvDX, SaddlvQX, + SaddlvBQX>( + q, size, machInst, vd, vn); + case 0x0a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesReg<SmaxvDX, SmaxvQX>( + q, size, machInst, vd, vn); + case 0x1a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesReg<SminvDX, SminvQX>( + q, size, machInst, vd, vn); + case 0x1b: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg<AddvDX, AddvQX>( + q, size, machInst, vd, vn); + case 0x23: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesLongReg<UaddlvDX, UaddlvQX, + UaddlvBQX>( + q, size, machInst, vd, vn); + case 0x2a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg<UmaxvDX, UmaxvQX>( + q, size, machInst, vd, vn); + case 0x2c: + if (sz_q != 0x1) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) + return new FmaxnmvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } else { + if (q) + return new FminnmvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x2f: + if (sz_q != 0x1) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) + return new FmaxvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } else { + if (q) + return new FminvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x3a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg<UminvDX, UminvQX>( + q, size, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonCopy(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op = bits(machInst, 29); + uint8_t imm5 = bits(machInst, 20, 16); + uint8_t imm4 = bits(machInst, 14, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t imm5_pos = findLsbSet(imm5); + uint8_t index1 = 0, index2 = 0; + + if (op) { + if (!q || (imm4 & mask(imm5_pos))) + return new Unknown64(machInst); + + index1 = bits(imm5, 4, imm5_pos + 1); // dst + index2 = bits(imm4, 3, imm5_pos); // src + + switch (imm5_pos) { + case 0: + return new InsElemX<uint8_t>(machInst, vd, vn, index1, index2); + case 1: + return new InsElemX<uint16_t>(machInst, vd, vn, index1, index2); + case 2: + return new InsElemX<uint32_t>(machInst, vd, vn, index1, index2); + case 3: + return new InsElemX<uint64_t>(machInst, vd, vn, index1, index2); + default: + return new Unknown64(machInst); + } + } + + switch (imm4) { + case 0x0: + index1 = bits(imm5, 4, imm5_pos + 1); + switch (imm5_pos) { + case 0: + if (q) + return new DupElemQX<uint8_t>(machInst, vd, vn, index1); + else + return new DupElemDX<uint8_t>(machInst, vd, vn, index1); + case 1: + if (q) + return new DupElemQX<uint16_t>(machInst, vd, vn, index1); + else + return new DupElemDX<uint16_t>(machInst, vd, vn, index1); + case 2: + if (q) + return new DupElemQX<uint32_t>(machInst, vd, vn, index1); + else + return new DupElemDX<uint32_t>(machInst, vd, vn, index1); + case 3: + if (q) + return new DupElemQX<uint64_t>(machInst, vd, vn, index1); + else + return new Unknown64(machInst); + default: + return new Unknown64(machInst); + } + case 0x1: + switch (imm5) { + case 0x1: + if (q) + return new DupGprWQX<uint8_t>(machInst, vd, vn); + else + return new DupGprWDX<uint8_t>(machInst, vd, vn); + case 0x2: + if (q) + return new DupGprWQX<uint16_t>(machInst, vd, vn); + else + return new DupGprWDX<uint16_t>(machInst, vd, vn); + case 0x4: + if (q) + return new DupGprWQX<uint32_t>(machInst, vd, vn); + else + return new DupGprWDX<uint32_t>(machInst, vd, vn); + case 0x8: + if (q) + return new DupGprXQX<uint64_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x3: + index1 = imm5 >> (imm5_pos + 1); + switch (imm5_pos) { + case 0: + return new InsGprWX<uint8_t>(machInst, vd, vn, index1); + case 1: + return new InsGprWX<uint16_t>(machInst, vd, vn, index1); + case 2: + return new InsGprWX<uint32_t>(machInst, vd, vn, index1); + case 3: + return new InsGprXX<uint64_t>(machInst, vd, vn, index1); + default: + return new Unknown64(machInst); + } + case 0x5: + index1 = bits(imm5, 4, imm5_pos + 1); + switch (imm5_pos) { + case 0: + if (q) + return new SmovXX<int8_t>(machInst, vd, vn, index1); + else + return new SmovWX<int8_t>(machInst, vd, vn, index1); + case 1: + if (q) + return new SmovXX<int16_t>(machInst, vd, vn, index1); + else + return new SmovWX<int16_t>(machInst, vd, vn, index1); + case 2: + if (q) + return new SmovXX<int32_t>(machInst, vd, vn, index1); + else + return new Unknown64(machInst); + default: + return new Unknown64(machInst); + } + case 0x7: + index1 = imm5 >> (imm5_pos + 1); + + if ((q && imm5_pos != 3) || (!q && imm5_pos >= 3)) + return new Unknown64(machInst); + + switch (imm5_pos) { + case 0: + return new UmovWX<uint8_t>(machInst, vd, vn, index1); + case 1: + return new UmovWX<uint16_t>(machInst, vd, vn, index1); + case 2: + return new UmovWX<uint32_t>(machInst, vd, vn, index1); + case 3: + return new UmovXX<uint64_t>(machInst, vd, vn, index1); + default: + return new Unknown64(machInst); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonIndexedElem(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t L = bits(machInst, 21); + uint8_t M = bits(machInst, 20); + uint8_t opcode = bits(machInst, 15, 12); + uint8_t H = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm_bf = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + + uint8_t index = 0; + uint8_t index_fp = 0; + uint8_t vmh = 0; + uint8_t sz = size & 0x1; + uint8_t sz_q = (sz << 1) | bits(machInst, 30); + uint8_t sz_L = (sz << 1) | L; + + // Index and 2nd register operand for integer instructions + if (size == 0x1) { + index = (H << 2) | (L << 1) | M; + // vmh = 0; + } else if (size == 0x2) { + index = (H << 1) | L; + vmh = M; + } + IntRegIndex vm = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + // Index and 2nd register operand for FP instructions + vmh = M; + if ((size & 0x1) == 0) { + index_fp = (H << 1) | L; + } else if (L == 0) { + index_fp = H; + } + IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + switch (opcode) { + case 0x0: + if (!u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmHAndWReg<MlaElemDX, MlaElemQX>( + q, size, machInst, vd, vn, vm, index); + case 0x1: + if (!u && size >= 2 && sz_q != 0x2 && sz_L != 0x3) + return decodeNeonUThreeImmFpReg<FmlaElemDX, FmlaElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return new Unknown64(machInst); + case 0x2: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg<UmlalElemX, UmlalElem2X>( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg<SmlalElemX, SmlalElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x3: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlalElemX, + SqdmlalElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x4: + if (u && !(size == 0x0 || size == 0x3)) + return decodeNeonUThreeImmHAndWReg<MlsElemDX, MlsElemQX>( + q, size, machInst, vd, vn, vm, index); + else + return new Unknown64(machInst); + case 0x5: + if (!u && size >= 0x2 && sz_L != 0x3 && sz_q != 0x2) + return decodeNeonUThreeImmFpReg<FmlsElemDX, FmlsElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return new Unknown64(machInst); + case 0x6: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg<UmlslElemX, UmlslElem2X>( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg<SmlslElemX, SmlslElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x7: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlslElemX, + SqdmlslElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x8: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmHAndWReg<MulElemDX, MulElemQX>( + q, size, machInst, vd, vn, vm, index); + case 0x9: + if (size >= 2 && sz_q != 0x2 && sz_L != 0x3) { + if (u) + return decodeNeonUThreeImmFpReg<FmulxElemDX, FmulxElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return decodeNeonUThreeImmFpReg<FmulElemDX, FmulElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + } else { + return new Unknown64(machInst); + } + case 0xa: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg<UmullElemX, UmullElem2X>( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg<SmullElemX, SmullElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0xb: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmullElemX, SqdmullElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0xc: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX, SqdmulhElemQX>( + q, size, machInst, vd, vn, vm, index); + case 0xd: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX, SqrdmulhElemQX>( + q, size, machInst, vd, vn, vm, index); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonModImm(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op = bits(machInst, 29); + uint8_t abcdefgh = (bits(machInst, 18, 16) << 5) | + bits(machInst, 9, 5); + uint8_t cmode = bits(machInst, 15, 12); + uint8_t o2 = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + + if (o2 == 0x1 || (op == 0x1 && cmode == 0xf && !q)) + return new Unknown64(machInst); + + bool immValid = true; + const uint64_t bigImm = simd_modified_imm(op, cmode, abcdefgh, + immValid, + true /* isAarch64 */); + if (!immValid) { + return new Unknown(machInst); + } + + if (op) { + if (bits(cmode, 3) == 0) { + if (bits(cmode, 0) == 0) { + if (q) + return new MvniQX<uint64_t>(machInst, vd, bigImm); + else + return new MvniDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new BicImmQX<uint64_t>(machInst, vd, bigImm); + else + return new BicImmDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 2) == 1) { + switch (bits(cmode, 1, 0)) { + case 0: + case 1: + if (q) + return new MvniQX<uint64_t>(machInst, vd, bigImm); + else + return new MvniDX<uint64_t>(machInst, vd, bigImm); + case 2: + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + case 3: + if (q) + return new FmovQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 0) == 0) { + if (q) + return new MvniQX<uint64_t>(machInst, vd, bigImm); + else + return new MvniDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new BicImmQX<uint64_t>(machInst, vd, + bigImm); + else + return new BicImmDX<uint64_t>(machInst, vd, + bigImm); + } + } + } + } else { + if (bits(cmode, 3) == 0) { + if (bits(cmode, 0) == 0) { + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new OrrImmQX<uint64_t>(machInst, vd, bigImm); + else + return new OrrImmDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 2) == 1) { + if (bits(cmode, 1, 0) == 0x3) { + if (q) + return new FmovQX<uint32_t>(machInst, vd, bigImm); + else + return new FmovDX<uint32_t>(machInst, vd, bigImm); + } else { + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 0) == 0) { + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new OrrImmQX<uint64_t>(machInst, vd, + bigImm); + else + return new OrrImmDX<uint64_t>(machInst, vd, bigImm); + } + } + } + } + return new Unknown(machInst); + } + + StaticInstPtr + decodeNeonShiftByImm(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t immh = bits(machInst, 22, 19); + uint8_t immb = bits(machInst, 18, 16); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t immh3 = bits(machInst, 22); + uint8_t immh3_q = (immh3 << 1) | q; + uint8_t op_u = (bits(machInst, 12) << 1) | u; + uint8_t size = findMsbSet(immh); + int shiftAmt = 0; + + switch (opcode) { + case 0x00: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UshrDX, UshrQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SshrDX, SshrQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x02: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UsraDX, UsraQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SsraDX, SsraQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x04: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UrshrDX, UrshrQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SrshrDX, SrshrQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x06: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UrsraDX, UrsraQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SrsraDX, SrsraQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x08: + if (u && !(immh3_q == 0x2)) { + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonUTwoShiftXReg<SriDX, SriQX>( + q, size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0a: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftXReg<SliDX, SliQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftXReg<ShlDX, ShlQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x0c: + if (u && !(immh3_q == 0x2 || op_u == 0x0)) { + shiftAmt = ((immh << 3) | immb) - (8 << size); + return decodeNeonSTwoShiftXReg<SqshluDX, SqshluQX>( + q, size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0e: + if (immh3_q == 0x2 || op_u == 0x0) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftXReg<UqshlImmDX, UqshlImmQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SqshlImmDX, SqshlImmQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x10: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonSTwoShiftSReg<SqshrunX, Sqshrun2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftSReg<ShrnX, Shrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x11: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonSTwoShiftSReg<SqrshrunX, Sqrshrun2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftSReg<RshrnX, Rshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x12: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftSReg<UqshrnX, Uqshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg<SqshrnX, Sqshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x13: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftSReg<UqrshrnX, Uqrshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg<SqrshrnX, Sqrshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x14: + if (immh3) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftSReg<UshllX, Ushll2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg<SshllX, Sshll2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x1c: + if (immh < 0x4 || immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) { + return decodeNeonUTwoShiftFpReg<UcvtfFixedDX, UcvtfFixedQX>( + q, size & 0x1, machInst, vd, vn, shiftAmt); + } else { + if (q) { + if (size & 0x1) + return new ScvtfFixedDQX<uint64_t>(machInst, vd, vn, + shiftAmt); + else + return new ScvtfFixedSQX<uint32_t>(machInst, vd, vn, + shiftAmt); + } else { + if (size & 0x1) + return new Unknown(machInst); + else + return new ScvtfFixedDX<uint32_t>(machInst, vd, vn, + shiftAmt); + } + } + case 0x1f: + if (immh < 0x4 || immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftFpReg<FcvtzuFixedDX, FcvtzuFixedQX>( + q, size & 0x1, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftFpReg<FcvtzsFixedDX, FcvtzsFixedQX>( + q, size & 0x1, machInst, vd, vn, shiftAmt); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonTblTbx(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t switchVal = bits(machInst, 14, 12); + + switch (switchVal) { + case 0x0: + if (q) + return new Tbl1QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl1DX<uint8_t>(machInst, vd, vn, vm); + case 0x1: + if (q) + return new Tbx1QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx1DX<uint8_t>(machInst, vd, vn, vm); + case 0x2: + if (q) + return new Tbl2QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl2DX<uint8_t>(machInst, vd, vn, vm); + case 0x3: + if (q) + return new Tbx2QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx2DX<uint8_t>(machInst, vd, vn, vm); + case 0x4: + if (q) + return new Tbl3QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl3DX<uint8_t>(machInst, vd, vn, vm); + case 0x5: + if (q) + return new Tbx3QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx3DX<uint8_t>(machInst, vd, vn, vm); + case 0x6: + if (q) + return new Tbl4QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl4DX<uint8_t>(machInst, vd, vn, vm); + case 0x7: + if (q) + return new Tbx4QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx4DX<uint8_t>(machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeNeonZipUzpTrn(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 14, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x1: + return decodeNeonUThreeXReg<Uzp1DX, Uzp1QX>( + q, size, machInst, vd, vn, vm); + case 0x2: + return decodeNeonUThreeXReg<Trn1DX, Trn1QX>( + q, size, machInst, vd, vn, vm); + case 0x3: + return decodeNeonUThreeXReg<Zip1DX, Zip1QX>( + q, size, machInst, vd, vn, vm); + case 0x5: + return decodeNeonUThreeXReg<Uzp2DX, Uzp2QX>( + q, size, machInst, vd, vn, vm); + case 0x6: + return decodeNeonUThreeXReg<Trn2DX, Trn2QX>( + q, size, machInst, vd, vn, vm); + case 0x7: + return decodeNeonUThreeXReg<Zip2DX, Zip2QX>( + q, size, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeNeonExt(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op2 = bits(machInst, 23, 22); + uint8_t imm4 = bits(machInst, 14, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (op2 != 0 || (q == 0x0 && bits(imm4, 3) == 0x1)) + return new Unknown64(machInst); + + uint8_t index = q ? imm4 : imm4 & 0x7; + + if (q) { + return new ExtQX<uint8_t>(machInst, vd, vn, vm, index); + } else { + return new ExtDX<uint8_t>(machInst, vd, vn, vm, index); + } + } + + StaticInstPtr + decodeNeonSc3Same(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 11); + uint8_t s = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x01: + if (u) + return decodeNeonUThreeUReg<UqaddScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqaddScX>( + size, machInst, vd, vn, vm); + case 0x05: + if (u) + return decodeNeonUThreeUReg<UqsubScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqsubScX>( + size, machInst, vd, vn, vm); + case 0x06: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmhiDX<uint64_t>(machInst, vd, vn, vm); + else + return new CmgtDX<int64_t>(machInst, vd, vn, vm); + case 0x07: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmhsDX<uint64_t>(machInst, vd, vn, vm); + else + return new CmgeDX<int64_t>(machInst, vd, vn, vm); + case 0x08: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return new UshlDX<uint64_t>(machInst, vd, vn, vm); + else + return new SshlDX<int64_t>(machInst, vd, vn, vm); + case 0x09: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeUReg<UqshlScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqshlScX>( + size, machInst, vd, vn, vm); + case 0x0a: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return new UrshlDX<uint64_t>(machInst, vd, vn, vm); + else + return new SrshlDX<int64_t>(machInst, vd, vn, vm); + case 0x0b: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeUReg<UqrshlScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqrshlScX>( + size, machInst, vd, vn, vm); + case 0x10: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new SubDX<uint64_t>(machInst, vd, vn, vm); + else + return new AddDX<uint64_t>(machInst, vd, vn, vm); + case 0x11: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmeqDX<uint64_t>(machInst, vd, vn, vm); + else + return new CmtstDX<uint64_t>(machInst, vd, vn, vm); + case 0x16: + if (size == 0x3 || size == 0x0) + return new Unknown64(machInst); + if (u) + return decodeNeonSThreeHAndWReg<SqrdmulhScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg<SqdmulhScX>( + size, machInst, vd, vn, vm); + case 0x1a: + if (!u || size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeScFpReg<FabdScX>( + size & 0x1, machInst, vd, vn, vm); + case 0x1b: + if (u || size > 0x1) + return new Unknown64(machInst); + else + return decodeNeonUThreeScFpReg<FmulxScX>( + size & 0x1, machInst, vd, vn, vm); + case 0x1c: + if (size < 0x2) { + if (u) + return decodeNeonUThreeScFpReg<FcmgeScX>( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg<FcmeqScX>( + size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeScFpReg<FcmgtScX>( + size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1d: + if (!u) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUThreeScFpReg<FacgeScX>( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg<FacgtScX>( + size & 0x1, machInst, vd, vn, vm); + case 0x1f: + if (u) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUThreeScFpReg<FrecpsScX>( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg<FrsqrtsScX>( + size & 0x1, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonSc3Diff(ExtMachInst machInst) + { + if (bits(machInst, 29)) + return new Unknown64(machInst); + + uint8_t size = bits(machInst, 23, 22); + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + + uint8_t opcode = bits(machInst, 15, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x9: + return decodeNeonSThreeHAndWReg<SqdmlalScX>(size, machInst, vd, vn, vm); + case 0xb: + return decodeNeonSThreeHAndWReg<SqdmlslScX>(size, machInst, vd, vn, vm); + case 0xd: + return decodeNeonSThreeHAndWReg<SqdmullScX>(size, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonSc2RegMisc(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + switch (switchVal) { + case 0x03: + return decodeNeonUTwoMiscUReg<SuqaddScX>(size, machInst, vd, vn); + case 0x07: + return decodeNeonSTwoMiscUReg<SqabsScX>(size, machInst, vd, vn); + case 0x08: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmgtZeroDX<int64_t>(machInst, vd, vn); + case 0x09: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmeqZeroDX<int64_t>(machInst, vd, vn); + case 0x0a: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmltZeroDX<int64_t>(machInst, vd, vn); + case 0x0b: + if (size != 0x3) + return new Unknown64(machInst); + else + return new AbsDX<int64_t>(machInst, vd, vn); + case 0x0c: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmgtZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x0d: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmeqZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x0e: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmltZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x14: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new SqxtnScX<int8_t>(machInst, vd, vn); + case 0x1: + return new SqxtnScX<int16_t>(machInst, vd, vn); + case 0x2: + return new SqxtnScX<int32_t>(machInst, vd, vn); + } + } + case 0x1a: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtnsScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtpsScX>( + size & 0x1, machInst, vd, vn); + case 0x1b: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtmsScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtzsIntScX>( + size & 0x1, machInst, vd, vn); + case 0x1c: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtasScX>( + size & 0x1, machInst, vd, vn); + else + return new Unknown64(machInst); + case 0x1d: + if (size < 0x2) { + if (size & 0x1) + return new ScvtfIntScDX<uint64_t>(machInst, vd, vn); + else + return new ScvtfIntScSX<uint32_t>(machInst, vd, vn); + } else { + return decodeNeonUTwoMiscScFpReg<FrecpeScX>( + size & 0x1, machInst, vd, vn); + } + case 0x1f: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FrecpxX>( + size & 0x1, machInst, vd, vn); + case 0x23: + return decodeNeonUTwoMiscUReg<UsqaddScX>(size, machInst, vd, vn); + case 0x27: + return decodeNeonSTwoMiscUReg<SqnegScX>(size, machInst, vd, vn); + case 0x28: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmgeZeroDX<int64_t>(machInst, vd, vn); + case 0x29: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmleZeroDX<int64_t>(machInst, vd, vn); + case 0x2b: + if (size != 0x3) + return new Unknown64(machInst); + else + return new NegDX<int64_t>(machInst, vd, vn); + case 0x2c: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmgeZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x2d: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmleZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x32: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new SqxtunScX<int8_t>(machInst, vd, vn); + case 0x1: + return new SqxtunScX<int16_t>(machInst, vd, vn); + case 0x2: + return new SqxtunScX<int32_t>(machInst, vd, vn); + } + } + case 0x34: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new UqxtnScX<uint8_t>(machInst, vd, vn); + case 0x1: + return new UqxtnScX<uint16_t>(machInst, vd, vn); + case 0x2: + return new UqxtnScX<uint32_t>(machInst, vd, vn); + } + } + case 0x36: + if (size != 0x1) { + return new Unknown64(machInst); + } else { + return new FcvtxnScX<uint32_t>(machInst, vd, vn); + } + case 0x3a: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtnuScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtpuScX>( + size & 0x1, machInst, vd, vn); + case 0x3b: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtmuScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtzuIntScX>( + size & 0x1, machInst, vd, vn); + case 0x3c: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtauScX>( + size & 0x1, machInst, vd, vn); + else + return new Unknown64(machInst); + case 0x3d: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<UcvtfIntScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FrsqrteScX>( + size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScPwise(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + if (!u) { + if (opcode == 0x1b && size == 0x3) + return new AddpScQX<uint64_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + + uint8_t switchVal = (opcode << 0) | (size << 5); + switch (switchVal) { + case 0x0c: + case 0x2c: + return decodeNeonUTwoMiscPwiseScFpReg<FmaxnmpScDX, FmaxnmpScQX>( + size & 0x1, machInst, vd, vn); + case 0x0d: + case 0x2d: + return decodeNeonUTwoMiscPwiseScFpReg<FaddpScDX, FaddpScQX>( + size & 0x1, machInst, vd, vn); + case 0x0f: + case 0x2f: + return decodeNeonUTwoMiscPwiseScFpReg<FmaxpScDX, FmaxpScQX>( + size & 0x1, machInst, vd, vn); + case 0x4c: + case 0x6c: + return decodeNeonUTwoMiscPwiseScFpReg<FminnmpScDX, FminnmpScQX>( + size & 0x1, machInst, vd, vn); + case 0x4f: + case 0x6f: + return decodeNeonUTwoMiscPwiseScFpReg<FminpScDX, FminpScQX>( + size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScCopy(ExtMachInst machInst) + { + if (bits(machInst, 14, 11) != 0 || bits(machInst, 29)) + return new Unknown64(machInst); + + uint8_t imm5 = bits(machInst, 20, 16); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size = findLsbSet(imm5); + uint8_t index = bits(imm5, 4, size + 1); + + return decodeNeonUTwoShiftUReg<DupElemScX>( + size, machInst, vd, vn, index); + } + + StaticInstPtr + decodeNeonScIndexedElem(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t L = bits(machInst, 21); + uint8_t M = bits(machInst, 20); + uint8_t opcode = bits(machInst, 15, 12); + uint8_t H = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm_bf = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + + uint8_t index = 0; + uint8_t index_fp = 0; + uint8_t vmh = 0; + uint8_t sz_L = bits(machInst, 22, 21); + + // Index and 2nd register operand for integer instructions + if (size == 0x1) { + index = (H << 2) | (L << 1) | M; + // vmh = 0; + } else if (size == 0x2) { + index = (H << 1) | L; + vmh = M; + } else if (size == 0x3) { + index = H; + vmh = M; + } + IntRegIndex vm = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + // Index and 2nd register operand for FP instructions + vmh = M; + if ((size & 0x1) == 0) { + index_fp = (H << 1) | L; + } else if (L == 0) { + index_fp = H; + } + IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + if (u && opcode != 9) + return new Unknown64(machInst); + + switch (opcode) { + case 0x1: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmScFpReg<FmlaElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0x3: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlalElemScX>( + size, machInst, vd, vn, vm, index); + case 0x5: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmScFpReg<FmlsElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0x7: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlslElemScX>( + size, machInst, vd, vn, vm, index); + case 0x9: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmScFpReg<FmulxElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + else + return decodeNeonUThreeImmScFpReg<FmulElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0xb: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmullElemScX>( + size, machInst, vd, vn, vm, index); + case 0xc: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmulhElemScX>( + size, machInst, vd, vn, vm, index); + case 0xd: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>( + size, machInst, vd, vn, vm, index); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScShiftByImm(ExtMachInst machInst) + { + bool u = bits(machInst, 29); + uint8_t immh = bits(machInst, 22, 19); + uint8_t immb = bits(machInst, 18, 16); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t immh3 = bits(machInst, 22); + uint8_t size = findMsbSet(immh); + int shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + + if (immh == 0x0) + return new Unknown64(machInst); + + switch (opcode) { + case 0x00: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UshrDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SshrDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x02: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UsraDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SsraDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x04: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UrshrDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SrshrDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x06: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UrsraDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SrsraDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x08: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new SriDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new Unknown64(machInst); + case 0x0a: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return new SliDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new ShlDX<uint64_t>(machInst, vd, vn, shiftAmt); + case 0x0c: + if (u) { + shiftAmt = ((immh << 3) | immb) - (8 << size); + return decodeNeonSTwoShiftUReg<SqshluScX>( + size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0e: + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftUReg<UqshlImmScX>( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUReg<SqshlImmScX>( + size, machInst, vd, vn, shiftAmt); + case 0x10: + if (!u || immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonSTwoShiftUSReg<SqshrunScX>( + size, machInst, vd, vn, shiftAmt); + case 0x11: + if (!u || immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonSTwoShiftUSReg<SqrshrunScX>( + size, machInst, vd, vn, shiftAmt); + case 0x12: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUSReg<UqshrnScX>( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUSReg<SqshrnScX>( + size, machInst, vd, vn, shiftAmt); + case 0x13: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUSReg<UqrshrnScX>( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUSReg<SqrshrnScX>( + size, machInst, vd, vn, shiftAmt); + case 0x1c: + if (immh < 0x4) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) { + return decodeNeonUTwoShiftUFpReg<UcvtfFixedScX>( + size & 0x1, machInst, vd, vn, shiftAmt); + } else { + if (size & 0x1) + return new ScvtfFixedScDX<uint64_t>(machInst, vd, vn, + shiftAmt); + else + return new ScvtfFixedScSX<uint32_t>(machInst, vd, vn, + shiftAmt); + } + case 0x1f: + if (immh < 0x4) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUFpReg<FcvtzuFixedScX>( + size & 0x1, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftUFpReg<FcvtzsFixedScX>( + size & 0x1, machInst, vd, vn, shiftAmt); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonMem(ExtMachInst machInst) + { + uint8_t dataSize = bits(machInst, 30) ? 128 : 64; + bool multiple = bits(machInst, 24, 23) < 0x2; + bool load = bits(machInst, 22); + + uint8_t numStructElems = 0; + uint8_t numRegs = 0; + + if (multiple) { // AdvSIMD load/store multiple structures + uint8_t opcode = bits(machInst, 15, 12); + uint8_t eSize = bits(machInst, 11, 10); + bool wb = !(bits(machInst, 20, 16) == 0x0 && !bits(machInst, 23)); + + switch (opcode) { + case 0x0: // LD/ST4 (4 regs) + numStructElems = 4; + numRegs = 4; + break; + case 0x2: // LD/ST1 (4 regs) + numStructElems = 1; + numRegs = 4; + break; + case 0x4: // LD/ST3 (3 regs) + numStructElems = 3; + numRegs = 3; + break; + case 0x6: // LD/ST1 (3 regs) + numStructElems = 1; + numRegs = 3; + break; + case 0x7: // LD/ST1 (1 reg) + numStructElems = 1; + numRegs = 1; + break; + case 0x8: // LD/ST2 (2 regs) + numStructElems = 2; + numRegs = 2; + break; + case 0xa: // LD/ST1 (2 regs) + numStructElems = 1; + numRegs = 2; + break; + default: + return new Unknown64(machInst); + } + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (load) { + return new VldMult64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, numRegs, wb); + } else { + return new VstMult64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, numRegs, wb); + } + } else { // AdvSIMD load/store single structure + uint8_t scale = bits(machInst, 15, 14); + uint8_t numStructElems = (((uint8_t) bits(machInst, 13) << 1) | + (uint8_t) bits(machInst, 21)) + 1; + uint8_t index = 0; + bool wb = !(bits(machInst, 20, 16) == 0x0 && !bits(machInst, 23)); + bool replicate = false; + + switch (scale) { + case 0x0: + index = ((uint8_t) bits(machInst, 30) << 3) | + ((uint8_t) bits(machInst, 12) << 2) | + (uint8_t) bits(machInst, 11, 10); + break; + case 0x1: + index = ((uint8_t) bits(machInst, 30) << 2) | + ((uint8_t) bits(machInst, 12) << 1) | + (uint8_t) bits(machInst, 11); + break; + case 0x2: + if (bits(machInst, 10) == 0x0) { + index = ((uint8_t) bits(machInst, 30) << 1) | + bits(machInst, 12); + } else { + index = (uint8_t) bits(machInst, 30); + scale = 0x3; + } + break; + case 0x3: + scale = bits(machInst, 11, 10); + replicate = true; + break; + default: + return new Unknown64(machInst); + } + + uint8_t eSize = scale; + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (load) { + return new VldSingle64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, index, wb, replicate); + } else { + return new VstSingle64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, index, wb, replicate); + } + } + } +} +}}; diff --git a/src/arch/arm/isa/formats/uncond.isa b/src/arch/arm/isa/formats/uncond.isa index 4a18a55bb..c376cd9ce 100644 --- a/src/arch/arm/isa/formats/uncond.isa +++ b/src/arch/arm/isa/formats/uncond.isa @@ -99,11 +99,11 @@ def format ArmUnconditional() {{ case 0x1: return new Clrex(machInst); case 0x4: - return new Dsb(machInst); + return new Dsb(machInst, 0); case 0x5: - return new Dmb(machInst); + return new Dmb(machInst, 0); case 0x6: - return new Isb(machInst); + return new Isb(machInst, 0); } } } else if (bits(op2, 0) == 0) { @@ -166,7 +166,7 @@ def format ArmUnconditional() {{ const uint32_t val = ((machInst >> 20) & 0x5); if (val == 0x4) { const uint32_t mode = bits(machInst, 4, 0); - if (badMode((OperatingMode)mode)) + if (badMode32((OperatingMode)mode)) return new Unknown(machInst); switch (bits(machInst, 24, 21)) { case 0x2: @@ -250,17 +250,10 @@ def format ArmUnconditional() {{ "ldc, ldc2 (immediate)", machInst); } } - if (op1 == 0xC5) { - return new WarnUnimplemented( - "mrrc, mrrc2", machInst); - } } else { if (bits(op1, 4, 3) != 0 || bits(op1, 1) == 1) { return new WarnUnimplemented( "stc, stc2", machInst); - } else if (op1 == 0xC4) { - return new WarnUnimplemented( - "mcrr, mcrrc", machInst); } } } diff --git a/src/arch/arm/isa/formats/unimp.isa b/src/arch/arm/isa/formats/unimp.isa index 1c9a4b402..8e346112c 100644 --- a/src/arch/arm/isa/formats/unimp.isa +++ b/src/arch/arm/isa/formats/unimp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -85,6 +85,9 @@ output header {{ private: /// Have we warned on this instruction yet? mutable bool warned; + /// Full mnemonic for MRC and MCR instructions including the + /// coproc. register name + std::string fullMnemonic; public: /// Constructor @@ -96,6 +99,16 @@ output header {{ flags[IsNonSpeculative] = true; } + WarnUnimplemented(const char *_mnemonic, ExtMachInst _machInst, + const std::string& _fullMnemonic) + : ArmStaticInst(_mnemonic, _machInst, No_OpClass), warned(false), + fullMnemonic(_fullMnemonic) + { + // don't call execute() (which panics) if we're on a + // speculative path + flags[IsNonSpeculative] = true; + } + %(BasicExecDeclare)s std::string @@ -147,10 +160,7 @@ output exec {{ FailUnimplemented::execute(%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(machInst, false, mnemonic); + return new UndefinedInstruction(machInst, false, mnemonic); } Fault @@ -158,7 +168,8 @@ output exec {{ Trace::InstRecord *traceData) const { if (!warned) { - warn("\tinstruction '%s' unimplemented\n", mnemonic); + warn("\tinstruction '%s' unimplemented\n", + fullMnemonic.size() ? fullMnemonic.c_str() : mnemonic); warned = true; } diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa index 5dd13d623..a2ce84345 100644 --- a/src/arch/arm/isa/includes.isa +++ b/src/arch/arm/isa/includes.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -50,10 +50,16 @@ output header {{ #include <sstream> #include "arch/arm/insts/branch.hh" +#include "arch/arm/insts/branch64.hh" +#include "arch/arm/insts/data64.hh" +#include "arch/arm/insts/fplib.hh" #include "arch/arm/insts/macromem.hh" #include "arch/arm/insts/mem.hh" +#include "arch/arm/insts/mem64.hh" #include "arch/arm/insts/misc.hh" +#include "arch/arm/insts/misc64.hh" #include "arch/arm/insts/mult.hh" +#include "arch/arm/insts/neon64_mem.hh" #include "arch/arm/insts/pred_inst.hh" #include "arch/arm/insts/static_inst.hh" #include "arch/arm/insts/vfp.hh" @@ -63,6 +69,7 @@ output header {{ }}; output decoder {{ +#include <string> #include "arch/arm/decoder.hh" #include "arch/arm/faults.hh" #include "arch/arm/intregs.hh" diff --git a/src/arch/arm/isa/insts/aarch64.isa b/src/arch/arm/isa/insts/aarch64.isa new file mode 100644 index 000000000..6fcf9b5d2 --- /dev/null +++ b/src/arch/arm/isa/insts/aarch64.isa @@ -0,0 +1,58 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + movzCode = 'Dest64 = ((uint64_t)imm1) << imm2;' + movzIop = InstObjParams("movz", "Movz", "RegImmImmOp", movzCode, []) + header_output += RegImmImmOpDeclare.subst(movzIop) + decoder_output += RegImmImmOpConstructor.subst(movzIop) + exec_output += BasicExecute.subst(movzIop) + + movkCode = 'Dest64 = insertBits(Dest64, imm2 + 15, imm2, imm1);' + movkIop = InstObjParams("movk", "Movk", "RegImmImmOp", movkCode, []) + header_output += RegImmImmOpDeclare.subst(movkIop) + decoder_output += RegImmImmOpConstructor.subst(movkIop) + exec_output += BasicExecute.subst(movkIop) + + movnCode = 'Dest64 = ~(((uint64_t)imm1) << imm2);' + movnIop = InstObjParams("movn", "Movn", "RegImmImmOp", movnCode, []) + header_output += RegImmImmOpDeclare.subst(movnIop) + decoder_output += RegImmImmOpConstructor.subst(movnIop) + exec_output += BasicExecute.subst(movnIop) +}}; diff --git a/src/arch/arm/isa/insts/branch.isa b/src/arch/arm/isa/insts/branch.isa index e360f4581..3ee9d88e4 100644 --- a/src/arch/arm/isa/insts/branch.isa +++ b/src/arch/arm/isa/insts/branch.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -48,7 +48,7 @@ let {{ bCode = ''' NPC = (uint32_t)(PC + imm); ''' - br_tgt_code = '''pcs.instNPC(branchPC.instPC() + imm);''' + br_tgt_code = '''pcs.instNPC((uint32_t)(branchPC.instPC() + imm));''' instFlags = ["IsDirectControl"] if (link): bCode += ''' @@ -86,9 +86,9 @@ let {{ Name += "Imm" # Since we're switching ISAs, the target ISA will be the opposite # of the current ISA. Thumb is whether the target is ARM. - newPC = '(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))' + newPC = '(uint32_t)(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))' br_tgt_code = ''' - pcs.instNPC((branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) : + pcs.instNPC((uint32_t)(branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) : (branchPC.instPC() + imm))); ''' base = "BranchImmCond" @@ -150,7 +150,26 @@ let {{ if imm: decoder_output += BranchTarget.subst(blxIop) - #Ignore BXJ for now + bxjcode = ''' + HSTR hstr = Hstr; + CPSR cpsr = Cpsr; + SCR scr = Scr; + + if (ArmSystem::haveVirtualization(xc->tcBase()) && hstr.tjdbx && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + fault = new HypervisorTrap(machInst, op1, EC_TRAPPED_BXJ); + } + IWNPC = Op1; + ''' + + bxjIop = InstObjParams("bxj", "BxjReg", "BranchRegCond", + {"code": bxjcode, + "predicate_test": predicateTest, + "is_ras_pop": "op1 == INTREG_LR" }, + ["IsIndirectControl"]) + header_output += BranchRegCondDeclare.subst(bxjIop) + decoder_output += BranchRegCondConstructor.subst(bxjIop) + exec_output += PredOpExecute.subst(bxjIop) #CBNZ, CBZ. These are always unconditional as far as predicates for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")): diff --git a/src/arch/arm/isa/insts/branch64.isa b/src/arch/arm/isa/insts/branch64.isa new file mode 100644 index 000000000..89cee6c22 --- /dev/null +++ b/src/arch/arm/isa/insts/branch64.isa @@ -0,0 +1,248 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black +// Giacomo Gabrielli + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # B, BL + for (mnem, link) in (("b", False), ("bl", True)): + bCode = ('NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsDirectControl', 'IsUncondControl'] + if (link): + bCode += 'XLR = RawPC + 4;\n' + instFlags += ['IsCall'] + + bIop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImm64", bCode, instFlags) + header_output += BranchImm64Declare.subst(bIop) + decoder_output += BranchImm64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # BR, BLR + for (mnem, link) in (("br", False), ("blr", True)): + bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsIndirectControl', 'IsUncondControl'] + if (link): + bCode += 'XLR = RawPC + 4;\n' + instFlags += ['IsCall'] + + bIop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchReg64", bCode, instFlags) + header_output += BranchReg64Declare.subst(bIop) + decoder_output += BranchReg64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # B conditional + bCode = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) + NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(), + currEL(xc->tcBase())); + else + NPC = NPC; + ''' + bIop = InstObjParams("b", "BCond64", "BranchImmCond64", bCode, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmCond64Declare.subst(bIop) + decoder_output += BranchImmCond64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # RET + bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsIndirectControl', 'IsUncondControl', 'IsReturn'] + + bIop = InstObjParams('ret', 'Ret64', "BranchRet64", bCode, instFlags) + header_output += BranchReg64Declare.subst(bIop) + decoder_output += BranchReg64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # ERET + bCode = '''Addr newPc; + CPSR cpsr = Cpsr; + CPSR spsr = Spsr; + + ExceptionLevel curr_el = opModeToEL((OperatingMode) (uint8_t) cpsr.mode); + switch (curr_el) { + case EL3: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL3); + break; + case EL2: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL2); + break; + case EL1: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL1); + break; + default: + return new UndefinedInstruction(machInst, false, mnemonic); + break; + } + if (spsr.width && (newPc & mask(2))) { + // To avoid PC Alignment fault when returning to AArch32 + if (spsr.t) + newPc = newPc & ~mask(1); + else + newPc = newPc & ~mask(2); + } + spsr.q = 0; + spsr.it1 = 0; + spsr.j = 0; + spsr.res0_23_22 = 0; + spsr.ge = 0; + spsr.it2 = 0; + spsr.t = 0; + + OperatingMode mode = (OperatingMode) (uint8_t) spsr.mode; + bool illegal = false; + ExceptionLevel target_el; + if (badMode(mode)) { + illegal = true; + } else { + target_el = opModeToEL(mode); + if (((target_el == EL2) && + !ArmSystem::haveVirtualization(xc->tcBase())) || + (target_el > curr_el) || + (spsr.width == 1)) { + illegal = true; + } else { + bool known = true; + bool from32 = (spsr.width == 1); + bool to32 = false; + if (false) { // TODO: !haveAArch32EL + to32 = false; + } else if (!ArmSystem::highestELIs64(xc->tcBase())) { + to32 = true; + } else { + bool scr_rw, hcr_rw; + if (ArmSystem::haveSecurity(xc->tcBase())) { + SCR scr = xc->tcBase()->readMiscReg(MISCREG_SCR_EL3); + scr_rw = scr.rw; + } else { + scr_rw = true; + } + + if (ArmSystem::haveVirtualization(xc->tcBase())) { + HCR hcr = xc->tcBase()->readMiscReg(MISCREG_HCR_EL2); + hcr_rw = hcr.rw; + } else { + hcr_rw = scr_rw; + } + + switch (target_el) { + case EL3: + to32 = false; + break; + case EL2: + to32 = !scr_rw; + break; + case EL1: + to32 = !scr_rw || !hcr_rw; + break; + case EL0: + if (curr_el == EL0) { + to32 = cpsr.width; + } else if (!scr_rw || !hcr_rw) { + // EL0 using AArch32 if EL1 using AArch32 + to32 = true; + } else { + known = false; + to32 = false; + } + } + } + if (known) + illegal = (from32 != to32); + } + } + + if (illegal) { + uint8_t old_mode = cpsr.mode; + spsr.mode = old_mode; // Preserve old mode when invalid + spsr.il = 1; + } else { + if (cpsr.width != spsr.width) + panic("AArch32/AArch64 interprocessing not supported yet"); + } + Cpsr = spsr; + + CondCodesNZ = spsr.nz; + CondCodesC = spsr.c; + CondCodesV = spsr.v; + NPC = purifyTaggedAddr(newPc, xc->tcBase(), + opModeToEL((OperatingMode) (uint8_t) spsr.mode)); + LLSCLock = 0; // Clear exclusive monitor + SevMailbox = 1; //Set Event Register + ''' + instFlags = ['IsSerializeAfter', 'IsNonSpeculative', 'IsSquashAfter'] + bIop = InstObjParams('eret', 'Eret64', "BranchEret64", bCode, instFlags) + header_output += BasicDeclare.subst(bIop) + decoder_output += BasicConstructor64.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # CBNZ, CBZ + for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")): + code = ('NPC = (Op164 %(test)s 0) ? ' + 'purifyTaggedAddr(RawPC + imm, xc->tcBase(), ' + 'currEL(xc->tcBase())) : NPC;\n') + code = code % {"test": test} + iop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImmReg64", code, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmReg64Declare.subst(iop) + decoder_output += BranchImmReg64Constructor.subst(iop) + exec_output += BasicExecute.subst(iop) + + # TBNZ, TBZ + for (mnem, test) in (("tbz", "=="), ("tbnz", "!=")): + code = ('NPC = ((Op164 & imm1) %(test)s 0) ? ' + 'purifyTaggedAddr(RawPC + imm2, xc->tcBase(), ' + 'currEL(xc->tcBase())) : NPC;\n') + code = code % {"test": test} + iop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImmImmReg64", code, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmImmReg64Declare.subst(iop) + decoder_output += BranchImmImmReg64Constructor.subst(iop) + exec_output += BasicExecute.subst(iop) +}}; diff --git a/src/arch/arm/isa/insts/data.isa b/src/arch/arm/isa/insts/data.isa index be56554b0..881676496 100644 --- a/src/arch/arm/isa/insts/data.isa +++ b/src/arch/arm/isa/insts/data.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -257,7 +257,8 @@ let {{ CPSR old_cpsr = Cpsr; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; diff --git a/src/arch/arm/isa/insts/data64.isa b/src/arch/arm/isa/insts/data64.isa new file mode 100644 index 000000000..77d7541ca --- /dev/null +++ b/src/arch/arm/isa/insts/data64.isa @@ -0,0 +1,465 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + def createCcCode64(carry, overflow): + code = "" + code += ''' + uint16_t _iz, _in; + _in = bits(resTemp, intWidth - 1); + _iz = ((resTemp & mask(intWidth)) == 0); + CondCodesNZ = (_in << 1) | _iz; + DPRINTF(Arm, "(in, iz) = (%%d, %%d)\\n", _in, _iz); + ''' + if overflow and overflow != "none": + code += ''' + uint16_t _iv; + _iv = %s & 1; + CondCodesV = _iv; + DPRINTF(Arm, "(iv) = (%%d)\\n", _iv); + ''' % overflow + if carry and carry != "none": + code += ''' + uint16_t _ic; + _ic = %s & 1; + CondCodesC = _ic; + DPRINTF(Arm, "(ic) = (%%d)\\n", _ic); + ''' % carry + return code + + oldC = 'CondCodesC' + oldV = 'CondCodesV' + # Dicts of ways to set the carry flag. + carryCode64 = { + "none": "none", + "add": 'findCarry(intWidth, resTemp, Op164, secOp)', + "sub": 'findCarry(intWidth, resTemp, Op164, ~secOp)', + "logic": '0' + } + # Dict of ways to set the overflow flag. + overflowCode64 = { + "none": "none", + "add": 'findOverflow(intWidth, resTemp, Op164, secOp)', + "sub": 'findOverflow(intWidth, resTemp, Op164, ~secOp)', + "logic": '0' + } + + immOp2 = "uint64_t secOp M5_VAR_USED = imm;" + sRegOp2 = "uint64_t secOp M5_VAR_USED = " + \ + "shiftReg64(Op264, shiftAmt, shiftType, intWidth);" + eRegOp2 = "uint64_t secOp M5_VAR_USED = " + \ + "extendReg64(Op264, extendType, shiftAmt, intWidth);" + + def buildDataWork(mnem, code, flagType, suffix, buildCc, buildNonCc, + base, templateBase): + code = ''' + uint64_t resTemp M5_VAR_USED = 0; + ''' + code + ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType]) + Name = mnem.capitalize() + suffix + iop = InstObjParams(mnem, Name, base, code) + iopCc = InstObjParams(mnem + "s", Name + "Cc", base, code + ccCode) + + def subst(iop): + global header_output, decoder_output, exec_output + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + if buildNonCc: + subst(iop) + if buildCc: + subst(iopCc) + + def buildXImmDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XImm"): + buildDataWork(mnem, immOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXImmOp", "DataXImm") + + def buildXSRegDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XSReg"): + buildDataWork(mnem, sRegOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXSRegOp", "DataXSReg") + + def buildXERegDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XEReg"): + buildDataWork(mnem, eRegOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXERegOp", "DataXEReg") + + def buildDataInst(mnem, code, flagType = "logic", + buildCc = True, buildNonCc = True): + buildXImmDataInst(mnem, code, flagType, buildCc, buildNonCc) + buildXSRegDataInst(mnem, code, flagType, buildCc, buildNonCc) + buildXERegDataInst(mnem, code, flagType, buildCc, buildNonCc) + + buildXImmDataInst("adr", "Dest64 = RawPC + imm", buildCc = False); + buildXImmDataInst("adrp", "Dest64 = (RawPC & ~mask(12)) + imm", + buildCc = False); + buildDataInst("and", "Dest64 = resTemp = Op164 & secOp;") + buildDataInst("eor", "Dest64 = Op164 ^ secOp;", buildCc = False) + buildXSRegDataInst("eon", "Dest64 = Op164 ^ ~secOp;", buildCc = False) + buildDataInst("sub", "Dest64 = resTemp = Op164 - secOp;", "sub") + buildDataInst("add", "Dest64 = resTemp = Op164 + secOp;", "add") + buildXSRegDataInst("adc", + "Dest64 = resTemp = Op164 + secOp + %s;" % oldC, "add") + buildXSRegDataInst("sbc", + "Dest64 = resTemp = Op164 - secOp - !%s;" % oldC, "sub") + buildDataInst("orr", "Dest64 = Op164 | secOp;", buildCc = False) + buildXSRegDataInst("orn", "Dest64 = Op164 | ~secOp;", buildCc = False) + buildXSRegDataInst("bic", "Dest64 = resTemp = Op164 & ~secOp;") + + def buildDataXImmInst(mnem, code, optArgs = []): + global header_output, decoder_output, exec_output + classNamePrefix = mnem[0].upper() + mnem[1:] + templateBase = "DataXImm" + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", code, optArgs) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + def buildDataXRegInst(mnem, regOps, code, optArgs = [], + overrideOpClass=None): + global header_output, decoder_output, exec_output + templateBase = "DataX%dReg" % regOps + classNamePrefix = mnem[0].upper() + mnem[1:] + if overrideOpClass: + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", + { 'code': code, 'op_class': overrideOpClass}, + optArgs) + else: + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", code, optArgs) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + buildDataXRegInst("madd", 3, "Dest64 = Op164 + Op264 * Op364", + overrideOpClass="IntMultOp") + buildDataXRegInst("msub", 3, "Dest64 = Op164 - Op264 * Op364", + overrideOpClass="IntMultOp") + buildDataXRegInst("smaddl", 3, + "XDest = XOp1 + sext<32>(WOp2) * sext<32>(WOp3)", + overrideOpClass="IntMultOp") + buildDataXRegInst("smsubl", 3, + "XDest = XOp1 - sext<32>(WOp2) * sext<32>(WOp3)", + overrideOpClass="IntMultOp") + buildDataXRegInst("smulh", 2, ''' + uint64_t op1H = (int32_t)(XOp1 >> 32); + uint64_t op1L = (uint32_t)XOp1; + uint64_t op2H = (int32_t)(XOp2 >> 32); + uint64_t op2L = (uint32_t)XOp2; + uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L; + uint64_t mid2 = op1L * op2H; + uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32; + result += shiftReg64(mid1, 32, ASR, intWidth); + result += shiftReg64(mid2, 32, ASR, intWidth); + XDest = result + op1H * op2H; + ''', overrideOpClass="IntMultOp") + buildDataXRegInst("umaddl", 3, "XDest = XOp1 + WOp2 * WOp3", + overrideOpClass="IntMultOp") + buildDataXRegInst("umsubl", 3, "XDest = XOp1 - WOp2 * WOp3", + overrideOpClass="IntMultOp") + buildDataXRegInst("umulh", 2, ''' + uint64_t op1H = (uint32_t)(XOp1 >> 32); + uint64_t op1L = (uint32_t)XOp1; + uint64_t op2H = (uint32_t)(XOp2 >> 32); + uint64_t op2L = (uint32_t)XOp2; + uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L; + uint64_t mid2 = op1L * op2H; + uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32; + result += mid1 >> 32; + result += mid2 >> 32; + XDest = result + op1H * op2H; + ''', overrideOpClass="IntMultOp") + + buildDataXRegInst("asrv", 2, + "Dest64 = shiftReg64(Op164, Op264, ASR, intWidth)") + buildDataXRegInst("lslv", 2, + "Dest64 = shiftReg64(Op164, Op264, LSL, intWidth)") + buildDataXRegInst("lsrv", 2, + "Dest64 = shiftReg64(Op164, Op264, LSR, intWidth)") + buildDataXRegInst("rorv", 2, + "Dest64 = shiftReg64(Op164, Op264, ROR, intWidth)") + buildDataXRegInst("sdiv", 2, ''' + int64_t op1 = Op164; + int64_t op2 = Op264; + if (intWidth == 32) { + op1 = sext<32>(op1); + op2 = sext<32>(op2); + } + Dest64 = op2 == -1 ? -op1 : op2 ? op1 / op2 : 0; + ''', overrideOpClass="IntDivOp") + buildDataXRegInst("udiv", 2, "Dest64 = Op264 ? Op164 / Op264 : 0", + overrideOpClass="IntDivOp") + + buildDataXRegInst("cls", 1, ''' + uint64_t op1 = Op164; + if (bits(op1, intWidth - 1)) + op1 ^= mask(intWidth); + Dest64 = (op1 == 0) ? intWidth - 1 : (intWidth - 2 - findMsbSet(op1)); + ''') + buildDataXRegInst("clz", 1, ''' + Dest64 = (Op164 == 0) ? intWidth : (intWidth - 1 - findMsbSet(Op164)); + ''') + buildDataXRegInst("rbit", 1, ''' + uint64_t result = Op164; + uint64_t lBit = 1ULL << (intWidth - 1); + uint64_t rBit = 1ULL; + while (lBit > rBit) { + uint64_t maskBits = lBit | rBit; + uint64_t testBits = result & maskBits; + // If these bits are different, swap them by toggling them. + if (testBits && testBits != maskBits) + result ^= maskBits; + lBit >>= 1; rBit <<= 1; + } + Dest64 = result; + ''') + buildDataXRegInst("rev", 1, ''' + if (intWidth == 32) + Dest64 = betole<uint32_t>(Op164); + else + Dest64 = betole<uint64_t>(Op164); + ''') + buildDataXRegInst("rev16", 1, ''' + int count = intWidth / 16; + uint64_t result = 0; + for (unsigned i = 0; i < count; i++) { + uint16_t hw = Op164 >> (i * 16); + result |= (uint64_t)betole<uint16_t>(hw) << (i * 16); + } + Dest64 = result; + ''') + buildDataXRegInst("rev32", 1, ''' + int count = intWidth / 32; + uint64_t result = 0; + for (unsigned i = 0; i < count; i++) { + uint32_t hw = Op164 >> (i * 32); + result |= (uint64_t)betole<uint32_t>(hw) << (i * 32); + } + Dest64 = result; + ''') + + msrMrs64EnabledCheckCode = ''' + // Check for read/write access right + if (!can%sAArch64SysReg(flat_idx, Scr64, cpsr, xc->tcBase())) { + if (flat_idx == MISCREG_DAIF || + flat_idx == MISCREG_DC_ZVA_Xt || + flat_idx == MISCREG_DC_CVAC_Xt || + flat_idx == MISCREG_DC_CIVAC_Xt + ) + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + return new UndefinedInstruction(machInst, false, mnemonic); + } + + // Check for traps to supervisor (FP/SIMD regs) + if (el <= EL1 && msrMrs64TrapToSup(flat_idx, el, Cpacr64)) + return new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_SIMD_FP); + + bool is_vfp_neon = false; + + // Check for traps to hypervisor + if ((ArmSystem::haveVirtualization(xc->tcBase()) && el <= EL2) && + msrMrs64TrapToHyp(flat_idx, %s, CptrEl264, Hcr64, &is_vfp_neon)) { + return new HypervisorTrap(machInst, is_vfp_neon ? 0x1E00000 : imm, + is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64); + } + + // Check for traps to secure monitor + if ((ArmSystem::haveSecurity(xc->tcBase()) && el <= EL3) && + msrMrs64TrapToMon(flat_idx, CptrEl364, el, &is_vfp_neon)) { + return new SecureMonitorTrap(machInst, + is_vfp_neon ? 0x1E00000 : imm, + is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64); + } + ''' + + buildDataXImmInst("mrs", ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()-> + flattenMiscIndex(op1); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + %s + XDest = MiscOp1_ud; + ''' % (msrMrs64EnabledCheckCode % ('Read', 'true'),), + ["IsSerializeBefore"]) + + buildDataXRegInst("mrsNZCV", 1, ''' + CPSR cpsr = 0; + cpsr.nz = CondCodesNZ; + cpsr.c = CondCodesC; + cpsr.v = CondCodesV; + XDest = cpsr; + ''') + + buildDataXImmInst("msr", ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()-> + flattenMiscIndex(dest); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + %s + MiscDest_ud = XOp1; + ''' % (msrMrs64EnabledCheckCode % ('Write', 'false'),), + ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXRegInst("msrNZCV", 1, ''' + CPSR cpsr = XOp1; + CondCodesNZ = cpsr.nz; + CondCodesC = cpsr.c; + CondCodesV = cpsr.v; + ''') + + msrdczva_ea_code = ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + ''' + + msrdczva_ea_code += msrMrs64EnabledCheckCode % ('Write', 'false') + msrdczva_ea_code += ''' + Request::Flags memAccessFlags = Request::CACHE_BLOCK_ZERO|ArmISA::TLB::MustBeOne; + EA = XBase; + assert(!(Dczid & 0x10)); + uint64_t op_size = power(2, Dczid + 2); + EA &= ~(op_size - 1); + + ''' + + msrDCZVAIop = InstObjParams("dczva", "Dczva", "SysDC64", + { "ea_code" : msrdczva_ea_code, + "memacc_code" : ";", "use_uops" : 0, + "op_wb" : ";", "fa_code" : ";"}, ['IsStore', 'IsMemRef']); + header_output += DCStore64Declare.subst(msrDCZVAIop); + decoder_output += DCStore64Constructor.subst(msrDCZVAIop); + exec_output += DCStore64Execute.subst(msrDCZVAIop); + exec_output += DCStore64InitiateAcc.subst(msrDCZVAIop); + exec_output += Store64CompleteAcc.subst(msrDCZVAIop); + + + + buildDataXImmInst("msrSP", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + MiscDest_ud = imm; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXImmInst("msrDAIFSet", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + } + CPSR cpsr = Cpsr; + cpsr.daif = cpsr.daif | imm; + Cpsr = cpsr; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXImmInst("msrDAIFClr", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + } + CPSR cpsr = Cpsr; + cpsr.daif = cpsr.daif & ~imm; + Cpsr = cpsr; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + def buildDataXCompInst(mnem, instType, suffix, code): + global header_output, decoder_output, exec_output + templateBase = "DataXCond%s" % instType + iop = InstObjParams(mnem, mnem.capitalize() + suffix + "64", + templateBase + "Op", code) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + def buildDataXCondImmInst(mnem, code): + buildDataXCompInst(mnem, "CompImm", "Imm", code) + def buildDataXCondRegInst(mnem, code): + buildDataXCompInst(mnem, "CompReg", "Reg", code) + def buildDataXCondSelInst(mnem, code): + buildDataXCompInst(mnem, "Sel", "", code) + + def condCompCode(flagType, op, imm): + ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType]) + opDecl = "uint64_t secOp M5_VAR_USED = imm;" + if not imm: + opDecl = "uint64_t secOp M5_VAR_USED = Op264;" + return opDecl + ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + uint64_t resTemp = Op164 ''' + op + ''' secOp; + ''' + ccCode + ''' + } else { + CondCodesNZ = (defCc >> 2) & 0x3; + CondCodesC = (defCc >> 1) & 0x1; + CondCodesV = defCc & 0x1; + } + ''' + + buildDataXCondImmInst("ccmn", condCompCode("add", "+", True)) + buildDataXCondImmInst("ccmp", condCompCode("sub", "-", True)) + buildDataXCondRegInst("ccmn", condCompCode("add", "+", False)) + buildDataXCondRegInst("ccmp", condCompCode("sub", "-", False)) + + condSelCode = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + Dest64 = Op164; + } else { + Dest64 = %(altVal)s; + } + ''' + buildDataXCondSelInst("csel", condSelCode % {"altVal" : "Op264"}) + buildDataXCondSelInst("csinc", condSelCode % {"altVal" : "Op264 + 1"}) + buildDataXCondSelInst("csinv", condSelCode % {"altVal" : "~Op264"}) + buildDataXCondSelInst("csneg", condSelCode % {"altVal" : "-Op264"}) +}}; diff --git a/src/arch/arm/isa/insts/div.isa b/src/arch/arm/isa/insts/div.isa index 1ff6ef9e4..0896ea94f 100644 --- a/src/arch/arm/isa/insts/div.isa +++ b/src/arch/arm/isa/insts/div.isa @@ -40,12 +40,6 @@ let {{ sdivCode = ''' if (Op2_sw == 0) { - if (((SCTLR)Sctlr).dz) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); - } Dest_sw = 0; } else if (Op1_sw == INT_MIN && Op2_sw == -1) { Dest_sw = INT_MIN; @@ -63,12 +57,6 @@ let {{ udivCode = ''' if (Op2_uw == 0) { - if (((SCTLR)Sctlr).dz) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); - } Dest_uw = 0; } else { Dest_uw = Op1_uw / Op2_uw; diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index b701995f4..60f030c3d 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -191,14 +191,17 @@ let {{ decoder_output = "" exec_output = "" - vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegOp", - { "code": vmsrEnabledCheckCode + \ - "MiscDest = Op1;", + vmsrCode = vmsrEnabledCheckCode + ''' + MiscDest = Op1; + ''' + + vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegImmOp", + { "code": vmsrCode, "predicate_test": predicateTest, "op_class": "SimdFloatMiscOp" }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += FpRegRegOpDeclare.subst(vmsrIop); - decoder_output += FpRegRegOpConstructor.subst(vmsrIop); + header_output += FpRegRegImmOpDeclare.subst(vmsrIop); + decoder_output += FpRegRegImmOpConstructor.subst(vmsrIop); exec_output += PredOpExecute.subst(vmsrIop); vmsrFpscrCode = vmsrEnabledCheckCode + ''' @@ -215,14 +218,36 @@ let {{ decoder_output += FpRegRegOpConstructor.subst(vmsrFpscrIop); exec_output += PredOpExecute.subst(vmsrFpscrIop); - vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegOp", - { "code": vmrsEnabledCheckCode + \ - "Dest = MiscOp1;", + vmrsCode = vmrsEnabledCheckCode + ''' + CPSR cpsr = Cpsr; + SCR scr = Scr; + if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + HCR hcr = Hcr; + bool hypTrap = false; + switch(xc->tcBase()->flattenMiscIndex(op1)) { + case MISCREG_FPSID: + hypTrap = hcr.tid0; + break; + case MISCREG_MVFR0: + case MISCREG_MVFR1: + hypTrap = hcr.tid3; + break; + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP10_MRC_VMRS); + } + } + Dest = MiscOp1; + ''' + + vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegImmOp", + { "code": vmrsCode, "predicate_test": predicateTest, "op_class": "SimdFloatMiscOp" }, ["IsSerializeBefore"]) - header_output += FpRegRegOpDeclare.subst(vmrsIop); - decoder_output += FpRegRegOpConstructor.subst(vmrsIop); + header_output += FpRegRegImmOpDeclare.subst(vmrsIop); + decoder_output += FpRegRegImmOpConstructor.subst(vmrsIop); exec_output += PredOpExecute.subst(vmrsIop); vmrsFpscrIop = InstObjParams("vmrs", "VmrsFpscr", "FpRegRegOp", @@ -323,7 +348,7 @@ let {{ decoder_output += FpRegRegOpConstructor.subst(vmovRegQIop); exec_output += PredOpExecute.subst(vmovRegQIop); - vmovCoreRegBCode = vfpEnabledCheckCode + ''' + vmovCoreRegBCode = simdEnabledCheckCode + ''' FpDest_uw = insertBits(FpDest_uw, imm * 8 + 7, imm * 8, Op1_ub); ''' vmovCoreRegBIop = InstObjParams("vmov", "VmovCoreRegB", "FpRegRegImmOp", @@ -334,7 +359,7 @@ let {{ decoder_output += FpRegRegImmOpConstructor.subst(vmovCoreRegBIop); exec_output += PredOpExecute.subst(vmovCoreRegBIop); - vmovCoreRegHCode = vfpEnabledCheckCode + ''' + vmovCoreRegHCode = simdEnabledCheckCode + ''' FpDest_uw = insertBits(FpDest_uw, imm * 16 + 15, imm * 16, Op1_uh); ''' vmovCoreRegHIop = InstObjParams("vmov", "VmovCoreRegH", "FpRegRegImmOp", @@ -453,6 +478,17 @@ let {{ singleCode = singleSimpleCode + ''' FpscrExc = fpscr; ''' + singleTernOp = vfpEnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + float cOp1 = FpOp1; + float cOp2 = FpOp2; + float cOp3 = FpDestP0; + FpDestP0 = ternaryOp(fpscr, %(palam)s, %(op)s, + fpscr.fz, fpscr.dn, fpscr.rMode); + finishVfp(fpscr, state, fpscr.fz); + FpscrExc = fpscr; + ''' singleBinOp = "binaryOp(fpscr, FpOp1, FpOp2," + \ "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" singleUnaryOp = "unaryOp(fpscr, FpOp1, %(func)s, fpscr.fz, fpscr.rMode)" @@ -463,6 +499,19 @@ let {{ FpDestP1_uw = dblHi(dest); FpscrExc = fpscr; ''' + doubleTernOp = vfpEnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw); + double cOp2 = dbl(FpOp2P0_uw, FpOp2P1_uw); + double cOp3 = dbl(FpDestP0_uw, FpDestP1_uw); + double cDest = ternaryOp(fpscr, %(palam)s, %(op)s, + fpscr.fz, fpscr.dn, fpscr.rMode); + FpDestP0_uw = dblLow(cDest); + FpDestP1_uw = dblHi(cDest); + finishVfp(fpscr, state, fpscr.fz); + FpscrExc = fpscr; + ''' doubleBinOp = ''' binaryOp(fpscr, dbl(FpOp1P0_uw, FpOp1P1_uw), dbl(FpOp2P0_uw, FpOp2P1_uw), @@ -473,6 +522,37 @@ let {{ fpscr.fz, fpscr.rMode) ''' + def buildTernaryFpOp(Name, base, opClass, singleOp, doubleOp, paramStr): + global header_output, decoder_output, exec_output + + code = singleTernOp % { "op": singleOp, "palam": paramStr } + sIop = InstObjParams(Name.lower() + "s", Name + "S", base, + { "code": code, + "predicate_test": predicateTest, + "op_class": opClass }, []) + code = doubleTernOp % { "op": doubleOp, "palam": paramStr } + dIop = InstObjParams(Name.lower() + "d", Name + "D", base, + { "code": code, + "predicate_test": predicateTest, + "op_class": opClass }, []) + + declareTempl = eval(base + "Declare"); + constructorTempl = eval(base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += PredOpExecute.subst(iop) + + buildTernaryFpOp("Vfma", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", " cOp1, cOp2, cOp3" ) + buildTernaryFpOp("Vfms", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", "-cOp1, cOp2, cOp3" ) + buildTernaryFpOp("Vfnma", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", "-cOp1, cOp2, -cOp3" ) + buildTernaryFpOp("Vfnms", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", " cOp1, cOp2, -cOp3" ) + def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp): global header_output, decoder_output, exec_output @@ -830,7 +910,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); vfpFlushToZero(fpscr, FpOp1); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0, false); + FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0, false); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -849,7 +929,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, false, false, 0, false); + uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0, false); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -868,7 +948,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); vfpFlushToZero(fpscr, FpOp1); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0, false); + FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0, false); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -887,7 +967,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - int64_t result = vfpFpDToFixed(cOp1, true, false, 0, false); + int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0, false); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -907,7 +987,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0); + FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -927,7 +1007,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, false, false, 0); + uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -947,7 +1027,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0); + FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -967,7 +1047,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - int64_t result = vfpFpDToFixed(cOp1, true, false, 0); + int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -1333,7 +1413,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, imm); + FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, imm); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1352,7 +1432,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, true, false, imm); + uint64_t mid = vfpFpToFixed<double>(cOp1, true, 32, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1372,7 +1452,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, imm); + FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, imm); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1391,7 +1471,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, false, false, imm); + uint64_t mid = vfpFpToFixed<double>(cOp1, false, 32, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1410,7 +1490,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_sw) : "m" (FpOp1_sw)); - FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, false, imm); + FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, 32, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1428,7 +1508,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm); + double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1447,7 +1527,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_uw) : "m" (FpOp1_uw)); - FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, false, imm); + FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, 32, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1465,7 +1545,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm); + double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1485,7 +1565,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sh = vfpFpSToFixed(FpOp1, true, true, imm); + FpDest_sh = vfpFpToFixed<float>(FpOp1, true, 16, imm); __asm__ __volatile__("" :: "m" (FpDest_sh)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1505,7 +1585,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, true, true, imm); + uint64_t result = vfpFpToFixed<double>(cOp1, true, 16, imm); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -1526,7 +1606,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uh = vfpFpSToFixed(FpOp1, false, true, imm); + FpDest_uh = vfpFpToFixed<float>(FpOp1, false, 16, imm); __asm__ __volatile__("" :: "m" (FpDest_uh)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1546,7 +1626,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, false, true, imm); + uint64_t mid = vfpFpToFixed<double>(cOp1, false, 16, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1566,7 +1646,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_sh) : "m" (FpOp1_sh)); - FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, true, imm); + FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, 16, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1585,7 +1665,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm); + double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1605,7 +1685,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_uh) : "m" (FpOp1_uh)); - FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, true, imm); + FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, 16, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1624,7 +1704,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm); + double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa new file mode 100644 index 000000000..95dec5062 --- /dev/null +++ b/src/arch/arm/isa/insts/fp64.isa @@ -0,0 +1,811 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Thomas Grocutt +// Edmund Grimley Evans + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + fmovImmSCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = bits(imm, 31, 0); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp", + { "code": fmovImmSCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegImmOpDeclare.subst(fmovImmSIop); + decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop); + exec_output += BasicExecute.subst(fmovImmSIop); + + fmovImmDCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = bits(imm, 31, 0); + AA64FpDestP1_uw = bits(imm, 63, 32); + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp", + { "code": fmovImmDCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegImmOpDeclare.subst(fmovImmDIop); + decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop); + exec_output += BasicExecute.subst(fmovImmDIop); + + fmovRegSCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = AA64FpOp1P0_uw; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp", + { "code": fmovRegSCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegSIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop); + exec_output += BasicExecute.subst(fmovRegSIop); + + fmovRegDCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = AA64FpOp1P0_uw; + AA64FpDestP1_uw = AA64FpOp1P1_uw; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp", + { "code": fmovRegDCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop); + exec_output += BasicExecute.subst(fmovRegDIop); + + fmovCoreRegWCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = WOp1_uw; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp", + { "code": fmovCoreRegWCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop); + exec_output += BasicExecute.subst(fmovCoreRegWIop); + + fmovCoreRegXCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = XOp1_ud; + AA64FpDestP1_uw = XOp1_ud >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp", + { "code": fmovCoreRegXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop); + exec_output += BasicExecute.subst(fmovCoreRegXIop); + + fmovUCoreRegXCode = vfp64EnabledCheckCode + ''' + AA64FpDestP2_uw = XOp1_ud; + AA64FpDestP3_uw = XOp1_ud >> 32; + ''' + fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp", + { "code": fmovUCoreRegXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop); + exec_output += BasicExecute.subst(fmovUCoreRegXIop); + + fmovRegCoreWCode = vfp64EnabledCheckCode + ''' + WDest = AA64FpOp1P0_uw; + ''' + fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp", + { "code": fmovRegCoreWCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop); + exec_output += BasicExecute.subst(fmovRegCoreWIop); + + fmovRegCoreXCode = vfp64EnabledCheckCode + ''' + XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw; + ''' + fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp", + { "code": fmovRegCoreXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop); + exec_output += BasicExecute.subst(fmovRegCoreXIop); + + fmovURegCoreXCode = vfp64EnabledCheckCode + ''' + XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw; + ''' + fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp", + { "code": fmovURegCoreXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop); + exec_output += BasicExecute.subst(fmovURegCoreXIop); +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + singleIntConvCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + singleIntConvCode2 = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cOp2 = AA64FpOp2P0_uw; + uint32_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \ + "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" + singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)" + + doubleIntConvCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw; + uint64_t cDest = %(op)s; + AA64FpDestP0_uw = cDest & 0xFFFFFFFF; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + doubleIntConvCode2 = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw; + uint64_t cOp2 = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw; + uint64_t cDest = %(op)s; + AA64FpDestP0_uw = cDest & 0xFFFFFFFF; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + doubleBinOp = ''' + binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), + dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw), + %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode); + ''' + doubleUnaryOp = ''' + unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s, + fpscr.fz, fpscr.rMode) + ''' + + def buildTernaryFpOp(name, opClass, sOp, dOp): + global header_output, decoder_output, exec_output + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + ''' + if isDouble: + code += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32; + uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32; + uint64_t cDest; + ''' "cDest = " + dOp + ";" + ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' + else: + code += ''' + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cOp2 = AA64FpOp2P0_uw; + uint32_t cOp3 = AA64FpOp3P0_uw; + uint32_t cDest; + ''' "cDest = " + sOp + ";" + ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"), + "FpRegRegRegRegOp", + { "code": code, "op_class": opClass }, []) + + header_output += AA64FpRegRegRegRegOpDeclare.subst(iop) + decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)", + "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" ) + buildTernaryFpOp("FMSub", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)", + "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" ) + buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), fplibNeg<uint32_t>(cOp1), cOp2, fpscr)", + "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" ) + buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)", + "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" ) + + def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp): + global header_output, decoder_output, exec_output + + code = singleIntConvCode2 % { "op": singleOp } + sIop = InstObjParams(name, Name + "S", base, + { "code": code, + "op_class": opClass }, []) + + code = doubleIntConvCode2 % { "op": doubleOp } + dIop = InstObjParams(name, Name + "D", base, + { "code": code, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp", + "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)", + "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp", + "fplibSub<uint32_t>(cOp1, cOp2, fpscr)", + "fplibSub<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp", + "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)", + "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp", + "fplibMul<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMul<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp", + "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))", + "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))") + buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMin<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMin<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMax<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMax<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)") + + def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None): + if doubleOp is None: + doubleOp = singleOp + global header_output, decoder_output, exec_output + + code = singleIntConvCode % { "op": singleOp } + sIop = InstObjParams(name, Name + "S", base, + { "code": code, + "op_class": opClass }, []) + code = doubleIntConvCode % { "op": doubleOp } + dIop = InstObjParams(name, Name + "D", base, + { "code": code, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp", + "fplibSqrt<uint32_t>(cOp1, fpscr)", "fplibSqrt<uint64_t>(cOp1, fpscr)") + + def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp, + doubleOp = None, isIntConv = True): + if doubleOp is None: + doubleOp = singleOp + global header_output, decoder_output, exec_output + + if isIntConv: + sCode = singleIntConvCode + dCode = doubleIntConvCode + else: + sCode = singleCode + dCode = doubleCode + + for code, op, suffix in [[sCode, singleOp, "S"], + [dCode, doubleOp, "D"]]: + iop = InstObjParams(name, Name + suffix, base, + { "code": code % { "op": op }, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp", + "fplibNeg<uint32_t>(cOp1)", "fplibNeg<uint64_t>(cOp1)") + buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp", + "fplibAbs<uint32_t>(cOp1)", "fplibAbs<uint64_t>(cOp1)") + buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)") + buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)") + buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)") + buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)") + buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)") + buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)") + buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)") +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # Creates the integer to floating point instructions, including variants for + # signed/unsigned, float/double, etc + for regL, regOpL, width in [["W", "w", 32], + ["X", "d", 64]]: + for isDouble in True, False: + for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)], + ["S", "int%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]: + fcvtIntFpDCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s + ''' %(usCode) + + if isDouble: + fcvtIntFpDCode += ''' + uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0, + %s, FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' % ("true" if us == "U" else "false") + else: + fcvtIntFpDCode += ''' + uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0, + %s, FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' % ("true" if us == "U" else "false") + fcvtIntFpDCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S") + mnem = "%scvtf" %(us.lower()) + fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp", + { "code": fcvtIntFpDCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop); + exec_output += BasicExecute.subst(fcvtIntFpDIop); + + # Generates the floating point to integer conversion instructions in various + # variants, eg signed/unsigned + def buildFpCvtIntOp(isDouble, isSigned, isXReg): + global header_output, decoder_output, exec_output + + for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"], + ["P", "FPRounding_POSINF"], + ["M", "FPRounding_NEGINF"], + ["Z", "FPRounding_ZERO"], + ["A", "FPRounding_TIEAWAY"]]: + fcvtFpIntCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc;''' + if isDouble: + fcvtFpIntCode += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + ''' + else: + fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;" + + fcvtFpIntCode += ''' + %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr); + FpscrExc = fpscr; + ''' %("X" if isXReg else "W", + "64" if isDouble else "32", + "64" if isXReg else "32", + "false" if isSigned else "true", + roundingMode) + + instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U", + "X" if isXReg else "W", + "D" if isDouble else "S", rmode) + mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u") + fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp", + { "code": fcvtFpIntCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop); + decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop); + exec_output += BasicExecute.subst(fcvtFpIntIop); + + # Now actually do the building with the different variants + for isDouble in True, False: + for isSigned in True, False: + for isXReg in True, False: + buildFpCvtIntOp(isDouble, isSigned, isXReg) + + fcvtFpSFpDCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw, + FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp", + { "code": fcvtFpSFpDCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop); + exec_output += BasicExecute.subst(fcvtFpSFpDIop); + + fcvtFpDFpSCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1, + FPCRRounding(fpscr), fpscr); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp", + {"code": fcvtFpDFpSCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop); + exec_output += BasicExecute.subst(fcvtFpDFpSIop); + + # Half precision to single or double precision conversion + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw, + FPCRRounding(fpscr), fpscr); + ''' % ("uint64_t" if isDouble else "uint32_t", + "64" if isDouble else "32") + if isDouble: + code += ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' + else: + code += ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "FcvtFpHFp%s" %("D" if isDouble else "S") + fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp", + { "code": code, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop); + exec_output += BasicExecute.subst(fcvtFpHFpIop); + + # single or double precision to Half precision conversion + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s; + AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1, + FPCRRounding(fpscr), fpscr); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw", + "64" if isDouble else "32") + + instName = "FcvtFp%sFpH" %("D" if isDouble else "S") + fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp", + { "code": code, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop); + exec_output += BasicExecute.subst(fcvtFpFpHIop); + + # Build the various versions of the floating point compare instructions + def buildFCmpOp(isQuiet, isDouble, isImm): + global header_output, decoder_output, exec_output + + fcmpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s cOp1 = %s; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32" + if isDouble else "AA64FpDestP0_uw") + if isImm: + fcmpCode += ''' + %s cOp2 = imm; + ''' % ("uint64_t" if isDouble else "uint32_t") + else: + fcmpCode += ''' + %s cOp2 = %s; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "AA64FpOp1P0_uw") + fcmpCode += ''' + int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr); + CondCodesNZ = cc >> 2 & 3; + CondCodesC = cc >> 1 & 1; + CondCodesV = cc & 1; + FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; + ''' % ("64" if isDouble else "32", "false" if isQuiet else "true") + + typeName = "Imm" if isImm else "Reg" + instName = "FCmp%s%s%s" %("" if isQuiet else "E", typeName, + "D" if isDouble else "S") + fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName, + "FpReg%sOp" %(typeName), + {"code": fcmpCode, + "op_class": "SimdFloatCmpOp"}, []) + + declareTemp = eval("FpReg%sOpDeclare" %(typeName)); + constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName)); + header_output += declareTemp.subst(fcmpIop); + decoder_output += constructorTemp.subst(fcmpIop); + exec_output += BasicExecute.subst(fcmpIop); + + for isQuiet in True, False: + for isDouble in True, False: + for isImm in True, False: + buildFCmpOp(isQuiet, isDouble, isImm) + + # Build the various versions of the conditional floating point compare + # instructions + def buildFCCmpOp(isQuiet, isDouble): + global header_output, decoder_output, exec_output + + fccmpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + %s cOp1 = %s; + %s cOp2 = %s; + int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr); + CondCodesNZ = cc >> 2 & 3; + CondCodesC = cc >> 1 & 1; + CondCodesV = cc & 1; + } else { + CondCodesNZ = (defCc >> 2) & 0x3; + CondCodesC = (defCc >> 1) & 0x1; + CondCodesV = defCc & 0x1; + } + FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "AA64FpOp1P0_uw", + "uint64_t" if isDouble else "uint32_t", + "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32" + if isDouble else "AA64FpOp2P0_uw", + "64" if isDouble else "32", "false" if isQuiet else "true") + + instName = "FCCmp%sReg%s" %("" if isQuiet else "E", + "D" if isDouble else "S") + fccmpIop = InstObjParams("fccmp%s" %("" if isQuiet else "e"), + instName, "FpCondCompRegOp", + {"code": fccmpCode, + "op_class": "SimdFloatCmpOp"}, []) + header_output += DataXCondCompRegDeclare.subst(fccmpIop); + decoder_output += DataXCondCompRegConstructor.subst(fccmpIop); + exec_output += BasicExecute.subst(fccmpIop); + + for isQuiet in True, False: + for isDouble in True, False: + buildFCCmpOp(isQuiet, isDouble) + +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # Generates the variants of the floating to fixed point instructions + def buildFpCvtFixedOp(isSigned, isDouble, isXReg): + global header_output, decoder_output, exec_output + + fcvtFpFixedCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + ''' + if isDouble: + fcvtFpFixedCode += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + ''' + else: + fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;" + fcvtFpFixedCode += ''' + %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s, + FPRounding_ZERO, fpscr); + FpscrExc = fpscr; + ''' %("X" if isXReg else "W", + "64" if isDouble else "32", + "64" if isXReg else "32", + "false" if isSigned else "true") + + instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U", + "D" if isDouble else "S", + "X" if isXReg else "W") + mnem = "fcvtz%s" %("s" if isSigned else "u") + fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp", + { "code": fcvtFpFixedCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop); + decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop); + exec_output += BasicExecute.subst(fcvtFpFixedIop); + + # Generates the variants of the fixed to floating point instructions + def buildFixedCvtFpOp(isSigned, isDouble, isXReg): + global header_output, decoder_output, exec_output + + srcRegType = "X" if isXReg else "W" + fcvtFixedFpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm, + %s, FPCRRounding(fpscr), fpscr); + ''' %("uint64_t" if isDouble else "uint32_t", + "64" if isDouble else "32", + "int" if isSigned else "uint", "64" if isXReg else "32", + srcRegType, + "false" if isSigned else "true") + if isDouble: + fcvtFixedFpCode += ''' + AA64FpDestP0_uw = result; + AA64FpDestP1_uw = result >> 32; + ''' + else: + fcvtFixedFpCode += ''' + AA64FpDestP0_uw = result; + AA64FpDestP1_uw = 0; + ''' + fcvtFixedFpCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U", + "D" if isDouble else "S", + srcRegType) + mnem = "%scvtf" %("s" if isSigned else "u") + fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp", + { "code": fcvtFixedFpCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop); + decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop); + exec_output += BasicExecute.subst(fcvtFixedFpIop); + + # loop over the variants building the instructions for each + for isXReg in True, False: + for isDouble in True, False: + for isSigned in True, False: + buildFpCvtFixedOp(isSigned, isDouble, isXReg) + buildFixedCvtFpOp(isSigned, isDouble, isXReg) +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + for isDouble in True, False: + code = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + AA64FpDestP0_uw = AA64FpOp1P0_uw; + ''' + if isDouble: + code += ''' + AA64FpDestP1_uw = AA64FpOp1P1_uw; + } else { + AA64FpDestP0_uw = AA64FpOp2P0_uw; + AA64FpDestP1_uw = AA64FpOp2P1_uw; + } + ''' + else: + code += ''' + } else { + AA64FpDestP0_uw = AA64FpOp2P0_uw; + } + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + + iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"), + "FpCondSelOp", code) + header_output += DataXCondSelDeclare.subst(iop) + decoder_output += DataXCondSelConstructor.subst(iop) + exec_output += BasicExecute.subst(iop) +}}; diff --git a/src/arch/arm/isa/insts/insts.isa b/src/arch/arm/isa/insts/insts.isa index c01e87df8..9d90f7779 100644 --- a/src/arch/arm/isa/insts/insts.isa +++ b/src/arch/arm/isa/insts/insts.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -37,6 +37,9 @@ // // Authors: Gabe Black +//AArch64 instructions +##include "aarch64.isa" + //Basic forms of various templates ##include "basic.isa" @@ -46,8 +49,15 @@ //Loads of a single item ##include "ldr.isa" +//Loads of a single item, AArch64 +##include "ldr64.isa" + //Miscellaneous instructions that don't fit elsewhere ##include "misc.isa" +##include "misc64.isa" + +//Stores of a single item, AArch64 +##include "str64.isa" //Stores of a single item ##include "str.isa" @@ -61,8 +71,12 @@ //Data processing instructions ##include "data.isa" +//AArch64 data processing instructions +##include "data64.isa" + //Branches ##include "branch.isa" +##include "branch64.isa" //Multiply ##include "mult.isa" @@ -72,9 +86,14 @@ //VFP ##include "fp.isa" +##include "fp64.isa" //Neon ##include "neon.isa" +//AArch64 Neon +##include "neon64.isa" +##include "neon64_mem.isa" + //m5 Psuedo-ops ##include "m5ops.isa" diff --git a/src/arch/arm/isa/insts/ldr.isa b/src/arch/arm/isa/insts/ldr.isa index f599fa4b9..6bfe40118 100644 --- a/src/arch/arm/isa/insts/ldr.isa +++ b/src/arch/arm/isa/insts/ldr.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -38,6 +38,7 @@ // Authors: Gabe Black let {{ + import math header_output = "" decoder_output = "" @@ -78,7 +79,8 @@ let {{ newDecoder, newExec) = self.fillTemplates(self.name, self.Name, codeBlobs, self.memFlags, instFlags, base, - wbDecl, pcDecl, self.rasPop) + wbDecl, pcDecl, self.rasPop, + self.size, self.sign) header_output += newHeader decoder_output += newDecoder @@ -160,7 +162,7 @@ let {{ self.size, self.sign, self.user) # Add memory request flags where necessary - self.memFlags.append("%d" % (self.size - 1)) + self.memFlags.append("%d" % int(math.log(self.size, 2))) if self.user: self.memFlags.append("ArmISA::TLB::UserMode") diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa new file mode 100644 index 000000000..78460f661 --- /dev/null +++ b/src/arch/arm/isa/insts/ldr64.isa @@ -0,0 +1,446 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + class LoadInst64(LoadStoreInst): + execBase = 'Load64' + micro = False + + def __init__(self, mnem, Name, size=4, sign=False, user=False, + literal=False, flavor="normal", top=False): + super(LoadInst64, self).__init__() + + self.name = mnem + self.Name = Name + self.size = size + self.sign = sign + self.user = user + self.literal = literal + self.flavor = flavor + self.top = top + + self.memFlags = ["ArmISA::TLB::MustBeOne"] + self.instFlags = [] + self.codeBlobs = {"postacc_code" : ""} + + # Add memory request flags where necessary + if self.user: + self.memFlags.append("ArmISA::TLB::UserMode") + + if self.flavor == "dprefetch": + self.memFlags.append("Request::PREFETCH") + self.instFlags = ['IsDataPrefetch'] + elif self.flavor == "iprefetch": + self.memFlags.append("Request::PREFETCH") + self.instFlags = ['IsInstPrefetch'] + if self.micro: + self.instFlags.append("IsMicroop") + + if self.flavor in ("acexp", "exp"): + # For exclusive pair ops alignment check is based on total size + self.memFlags.append("%d" % int(math.log(self.size, 2) + 1)) + elif not (self.size == 16 and self.top): + # Only the first microop should perform alignment checking. + self.memFlags.append("%d" % int(math.log(self.size, 2))) + + if self.flavor not in ("acquire", "acex", "exclusive", + "acexp", "exp"): + self.memFlags.append("ArmISA::TLB::AllowUnaligned") + + if self.flavor in ("acquire", "acex", "acexp"): + self.instFlags.extend(["IsMemBarrier", + "IsWriteBarrier", + "IsReadBarrier"]) + if self.flavor in ("acex", "exclusive", "exp", "acexp"): + self.memFlags.append("Request::LLSC") + + def buildEACode(self): + # Address computation code + eaCode = "" + if self.flavor == "fp": + eaCode += vfp64EnabledCheckCode + + if self.literal: + eaCode += "EA = RawPC" + else: + eaCode += SPAlignmentCheckCode + "EA = XBase" + + if self.size == 16: + if self.top: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 0 : 8)" + else: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 8 : 0)" + if not self.post: + eaCode += self.offset + eaCode += ";" + + self.codeBlobs["ea_code"] = eaCode + + def emitHelper(self, base='Memory64', wbDecl=None): + global header_output, decoder_output, exec_output + + # If this is a microop itself, don't allow anything that would + # require further microcoding. + if self.micro: + assert not wbDecl + + fa_code = None + if not self.micro and self.flavor in ("normal", "widen", "acquire"): + fa_code = ''' + fault->annotate(ArmFault::SAS, %s); + fault->annotate(ArmFault::SSE, %s); + fault->annotate(ArmFault::SRT, dest); + fault->annotate(ArmFault::SF, %s); + fault->annotate(ArmFault::AR, %s); + ''' % ("0" if self.size == 1 else + "1" if self.size == 2 else + "2" if self.size == 4 else "3", + "true" if self.sign else "false", + "true" if (self.size == 8 or + self.flavor == "widen") else "false", + "true" if self.flavor == "acquire" else "false") + + (newHeader, newDecoder, newExec) = \ + self.fillTemplates(self.name, self.Name, self.codeBlobs, + self.memFlags, self.instFlags, + base, wbDecl, faCode=fa_code) + + header_output += newHeader + decoder_output += newDecoder + exec_output += newExec + + class LoadImmInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadImmInst64, self).__init__(*args, **kargs) + self.offset = " + imm" + + self.wbDecl = "MicroAddXiUop(machInst, base, base, imm);" + + class LoadRegInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadRegInst64, self).__init__(*args, **kargs) + self.offset = " + extendReg64(XOffset, type, shiftAmt, 64)" + + self.wbDecl = \ + "MicroAddXERegUop(machInst, base, base, " + \ + " offset, type, shiftAmt);" + + class LoadRawRegInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadRawRegInst64, self).__init__(*args, **kargs) + self.offset = "" + + class LoadSingle64(LoadInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor in ("dprefetch", "iprefetch"): + accCode = 'uint64_t temp M5_VAR_USED = Mem%s;' + elif self.flavor == "fp": + if self.size in (1, 2, 4): + accCode = ''' + AA64FpDestP0_uw = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + elif self.size == 8 or (self.size == 16 and not self.top): + accCode = ''' + uint64_t data = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP0_uw = (uint32_t)data; + AA64FpDestP1_uw = (data >> 32); + ''' + # Only zero out the other half if this isn't part of a + # pair of 8 byte loads implementing a 16 byte load. + if self.size == 8: + accCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + elif self.size == 16 and self.top: + accCode = ''' + uint64_t data = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP2_uw = (uint32_t)data; + AA64FpDestP3_uw = (data >> 32); + ''' + elif self.flavor == "widen" or self.size == 8: + accCode = "XDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));" + else: + accCode = "WDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));" + if self.size == 16: + accCode = accCode % buildMemSuffix(self.sign, 8) + else: + accCode = accCode % buildMemSuffix(self.sign, self.size) + + self.codeBlobs["memacc_code"] = accCode + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class LoadDouble64(LoadInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor == "fp": + accCode = ''' + uint64_t data = cSwap(Mem_ud, isBigEndian64(xc->tcBase())); + AA64FpDestP0_uw = (uint32_t)data; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + AA64FpDest2P0_uw = (data >> 32); + AA64FpDest2P1_uw = 0; + AA64FpDest2P2_uw = 0; + AA64FpDest2P3_uw = 0; + ''' + else: + if self.sign: + if self.size == 4: + accCode = ''' + uint64_t data = cSwap(Mem_ud, + isBigEndian64(xc->tcBase())); + XDest = sext<32>((uint32_t)data); + XDest2 = sext<32>(data >> 32); + ''' + elif self.size == 8: + accCode = ''' + XDest = sext<64>(Mem_tud.a); + XDest2 = sext<64>(Mem_tud.b); + ''' + else: + if self.size == 4: + accCode = ''' + uint64_t data = cSwap(Mem_ud, + isBigEndian64(xc->tcBase())); + XDest = (uint32_t)data; + XDest2 = data >> 32; + ''' + elif self.size == 8: + accCode = ''' + XDest = Mem_tud.a; + XDest2 = Mem_tud.b; + ''' + self.codeBlobs["memacc_code"] = accCode + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class LoadImm64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryImm64' + writeback = False + post = False + + class LoadPre64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPreIndex64' + writeback = True + post = False + + class LoadPost64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPostIndex64' + writeback = True + post = True + + class LoadReg64(LoadRegInst64, LoadSingle64): + decConstBase = 'LoadStoreReg64' + base = 'ArmISA::MemoryReg64' + writeback = False + post = False + + class LoadRaw64(LoadRawRegInst64, LoadSingle64): + decConstBase = 'LoadStoreRaw64' + base = 'ArmISA::MemoryRaw64' + writeback = False + post = False + + class LoadEx64(LoadRawRegInst64, LoadSingle64): + decConstBase = 'LoadStoreEx64' + base = 'ArmISA::MemoryEx64' + writeback = False + post = False + + class LoadLit64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreLit64' + base = 'ArmISA::MemoryLiteral64' + writeback = False + post = False + + def buildLoads64(mnem, NameBase, size, sign, flavor="normal"): + LoadImm64(mnem, NameBase + "_IMM", size, sign, flavor=flavor).emit() + LoadPre64(mnem, NameBase + "_PRE", size, sign, flavor=flavor).emit() + LoadPost64(mnem, NameBase + "_POST", size, sign, flavor=flavor).emit() + LoadReg64(mnem, NameBase + "_REG", size, sign, flavor=flavor).emit() + + buildLoads64("ldrb", "LDRB64", 1, False) + buildLoads64("ldrsb", "LDRSBW64", 1, True) + buildLoads64("ldrsb", "LDRSBX64", 1, True, flavor="widen") + buildLoads64("ldrh", "LDRH64", 2, False) + buildLoads64("ldrsh", "LDRSHW64", 2, True) + buildLoads64("ldrsh", "LDRSHX64", 2, True, flavor="widen") + buildLoads64("ldrsw", "LDRSW64", 4, True, flavor="widen") + buildLoads64("ldr", "LDRW64", 4, False) + buildLoads64("ldr", "LDRX64", 8, False) + buildLoads64("ldr", "LDRBFP64", 1, False, flavor="fp") + buildLoads64("ldr", "LDRHFP64", 2, False, flavor="fp") + buildLoads64("ldr", "LDRSFP64", 4, False, flavor="fp") + buildLoads64("ldr", "LDRDFP64", 8, False, flavor="fp") + + LoadImm64("prfm", "PRFM64_IMM", 8, flavor="dprefetch").emit() + LoadReg64("prfm", "PRFM64_REG", 8, flavor="dprefetch").emit() + LoadLit64("prfm", "PRFM64_LIT", 8, literal=True, flavor="dprefetch").emit() + LoadImm64("prfum", "PRFUM64_IMM", 8, flavor="dprefetch").emit() + + LoadImm64("ldurb", "LDURB64_IMM", 1, False).emit() + LoadImm64("ldursb", "LDURSBW64_IMM", 1, True).emit() + LoadImm64("ldursb", "LDURSBX64_IMM", 1, True, flavor="widen").emit() + LoadImm64("ldurh", "LDURH64_IMM", 2, False).emit() + LoadImm64("ldursh", "LDURSHW64_IMM", 2, True).emit() + LoadImm64("ldursh", "LDURSHX64_IMM", 2, True, flavor="widen").emit() + LoadImm64("ldursw", "LDURSW64_IMM", 4, True, flavor="widen").emit() + LoadImm64("ldur", "LDURW64_IMM", 4, False).emit() + LoadImm64("ldur", "LDURX64_IMM", 8, False).emit() + LoadImm64("ldur", "LDURBFP64_IMM", 1, flavor="fp").emit() + LoadImm64("ldur", "LDURHFP64_IMM", 2, flavor="fp").emit() + LoadImm64("ldur", "LDURSFP64_IMM", 4, flavor="fp").emit() + LoadImm64("ldur", "LDURDFP64_IMM", 8, flavor="fp").emit() + + LoadImm64("ldtrb", "LDTRB64_IMM", 1, False, True).emit() + LoadImm64("ldtrsb", "LDTRSBW64_IMM", 1, True, True).emit() + LoadImm64("ldtrsb", "LDTRSBX64_IMM", 1, True, True, flavor="widen").emit() + LoadImm64("ldtrh", "LDTRH64_IMM", 2, False, True).emit() + LoadImm64("ldtrsh", "LDTRSHW64_IMM", 2, True, True).emit() + LoadImm64("ldtrsh", "LDTRSHX64_IMM", 2, True, True, flavor="widen").emit() + LoadImm64("ldtrsw", "LDTRSW64_IMM", 4, True, flavor="widen").emit() + LoadImm64("ldtr", "LDTRW64_IMM", 4, False, True).emit() + LoadImm64("ldtr", "LDTRX64_IMM", 8, False, True).emit() + + LoadLit64("ldrsw", "LDRSWL64_LIT", 4, True, \ + literal=True, flavor="widen").emit() + LoadLit64("ldr", "LDRWL64_LIT", 4, False, literal=True).emit() + LoadLit64("ldr", "LDRXL64_LIT", 8, False, literal=True).emit() + LoadLit64("ldr", "LDRSFP64_LIT", 4, literal=True, flavor="fp").emit() + LoadLit64("ldr", "LDRDFP64_LIT", 8, literal=True, flavor="fp").emit() + + LoadRaw64("ldar", "LDARX64", 8, flavor="acquire").emit() + LoadRaw64("ldar", "LDARW64", 4, flavor="acquire").emit() + LoadRaw64("ldarh", "LDARH64", 2, flavor="acquire").emit() + LoadRaw64("ldarb", "LDARB64", 1, flavor="acquire").emit() + + LoadEx64("ldaxr", "LDAXRX64", 8, flavor="acex").emit() + LoadEx64("ldaxr", "LDAXRW64", 4, flavor="acex").emit() + LoadEx64("ldaxrh", "LDAXRH64", 2, flavor="acex").emit() + LoadEx64("ldaxrb", "LDAXRB64", 1, flavor="acex").emit() + + LoadEx64("ldxr", "LDXRX64", 8, flavor="exclusive").emit() + LoadEx64("ldxr", "LDXRW64", 4, flavor="exclusive").emit() + LoadEx64("ldxrh", "LDXRH64", 2, flavor="exclusive").emit() + LoadEx64("ldxrb", "LDXRB64", 1, flavor="exclusive").emit() + + class LoadImmU64(LoadImm64): + decConstBase = 'LoadStoreImmU64' + micro = True + + class LoadImmDU64(LoadImmInst64, LoadDouble64): + decConstBase = 'LoadStoreImmDU64' + base = 'ArmISA::MemoryDImm64' + micro = True + post = False + writeback = False + + class LoadImmDouble64(LoadImmInst64, LoadDouble64): + decConstBase = 'LoadStoreImmDU64' + base = 'ArmISA::MemoryDImm64' + micro = False + post = False + writeback = False + + class LoadRegU64(LoadReg64): + decConstBase = 'LoadStoreRegU64' + micro = True + + class LoadLitU64(LoadLit64): + decConstBase = 'LoadStoreLitU64' + micro = True + + LoadImmDouble64("ldaxp", "LDAXPW64", 4, flavor="acexp").emit() + LoadImmDouble64("ldaxp", "LDAXPX64", 8, flavor="acexp").emit() + LoadImmDouble64("ldxp", "LDXPW64", 4, flavor="exp").emit() + LoadImmDouble64("ldxp", "LDXPX64", 8, flavor="exp").emit() + + LoadImmU64("ldrxi_uop", "MicroLdrXImmUop", 8).emit() + LoadRegU64("ldrxr_uop", "MicroLdrXRegUop", 8).emit() + LoadLitU64("ldrxl_uop", "MicroLdrXLitUop", 8, literal=True).emit() + LoadImmU64("ldrfpxi_uop", "MicroLdrFpXImmUop", 8, flavor="fp").emit() + LoadRegU64("ldrfpxr_uop", "MicroLdrFpXRegUop", 8, flavor="fp").emit() + LoadLitU64("ldrfpxl_uop", "MicroLdrFpXLitUop", 8, literal=True, + flavor="fp").emit() + LoadImmU64("ldrqbfpxi_uop", "MicroLdrQBFpXImmUop", + 16, flavor="fp", top = False).emit() + LoadRegU64("ldrqbfpxr_uop", "MicroLdrQBFpXRegUop", + 16, flavor="fp", top = False).emit() + LoadLitU64("ldrqbfpxl_uop", "MicroLdrQBFpXLitUop", + 16, literal=True, flavor="fp", top = False).emit() + LoadImmU64("ldrqtfpxi_uop", "MicroLdrQTFpXImmUop", + 16, flavor="fp", top = True).emit() + LoadRegU64("ldrqtfpxr_uop", "MicroLdrQTFpXRegUop", + 16, flavor="fp", top = True).emit() + LoadLitU64("ldrqtfpxl_uop", "MicroLdrQTFpXLitUop", + 16, literal=True, flavor="fp", top = True).emit() + LoadImmDU64("ldrduxi_uop", "MicroLdrDUXImmUop", 4, sign=False).emit() + LoadImmDU64("ldrdsxi_uop", "MicroLdrDSXImmUop", 4, sign=True).emit() + LoadImmDU64("ldrdfpxi_uop", "MicroLdrDFpXImmUop", 4, flavor="fp").emit() +}}; diff --git a/src/arch/arm/isa/insts/m5ops.isa b/src/arch/arm/isa/insts/m5ops.isa index 06ed34af8..928d1be0d 100644 --- a/src/arch/arm/isa/insts/m5ops.isa +++ b/src/arch/arm/isa/insts/m5ops.isa @@ -1,5 +1,5 @@ // -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -58,6 +58,7 @@ let {{ armCode = ''' PseudoInst::arm(xc->tcBase()); ''' + armIop = InstObjParams("arm", "Arm", "PredOp", { "code": armCode, "predicate_test": predicateTest }, @@ -69,6 +70,7 @@ let {{ quiesceCode = ''' PseudoInst::quiesce(xc->tcBase()); ''' + quiesceIop = InstObjParams("quiesce", "Quiesce", "PredOp", { "code": quiesceCode, "predicate_test": predicateTest }, @@ -81,6 +83,10 @@ let {{ PseudoInst::quiesceNs(xc->tcBase(), join32to64(R1, R0)); ''' + quiesceNsCode64 = ''' + PseudoInst::quiesceNs(xc->tcBase(), X0); + ''' + quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs", "PredOp", { "code": quiesceNsCode, "predicate_test": predicateTest }, @@ -89,10 +95,22 @@ let {{ decoder_output += BasicConstructor.subst(quiesceNsIop) exec_output += QuiescePredOpExecute.subst(quiesceNsIop) + quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs64", "PredOp", + { "code": quiesceNsCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsQuiesce"]) + header_output += BasicDeclare.subst(quiesceNsIop) + decoder_output += BasicConstructor.subst(quiesceNsIop) + exec_output += QuiescePredOpExecute.subst(quiesceNsIop) + quiesceCyclesCode = ''' PseudoInst::quiesceCycles(xc->tcBase(), join32to64(R1, R0)); ''' + quiesceCyclesCode64 = ''' + PseudoInst::quiesceCycles(xc->tcBase(), X0); + ''' + quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles", "PredOp", { "code": quiesceCyclesCode, "predicate_test": predicateTest }, @@ -101,12 +119,23 @@ let {{ decoder_output += BasicConstructor.subst(quiesceCyclesIop) exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop) + quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles64", "PredOp", + { "code": quiesceCyclesCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsQuiesce", "IsUnverifiable"]) + header_output += BasicDeclare.subst(quiesceCyclesIop) + decoder_output += BasicConstructor.subst(quiesceCyclesIop) + exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop) + quiesceTimeCode = ''' uint64_t qt_val = PseudoInst::quiesceTime(xc->tcBase()); R0 = bits(qt_val, 31, 0); R1 = bits(qt_val, 63, 32); ''' + quiesceTimeCode64 = ''' + X0 = PseudoInst::quiesceTime(xc->tcBase()); + ''' quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime", "PredOp", { "code": quiesceTimeCode, "predicate_test": predicateTest }, @@ -115,12 +144,23 @@ let {{ decoder_output += BasicConstructor.subst(quiesceTimeIop) exec_output += PredOpExecute.subst(quiesceTimeIop) + quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime64", "PredOp", + { "code": quiesceTimeCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(quiesceTimeIop) + decoder_output += BasicConstructor.subst(quiesceTimeIop) + exec_output += PredOpExecute.subst(quiesceTimeIop) + rpnsCode = ''' uint64_t rpns_val = PseudoInst::rpns(xc->tcBase()); R0 = bits(rpns_val, 31, 0); R1 = bits(rpns_val, 63, 32); ''' + rpnsCode64 = ''' + X0 = PseudoInst::rpns(xc->tcBase()); + ''' rpnsIop = InstObjParams("rpns", "Rpns", "PredOp", { "code": rpnsCode, "predicate_test": predicateTest }, @@ -129,10 +169,22 @@ let {{ decoder_output += BasicConstructor.subst(rpnsIop) exec_output += PredOpExecute.subst(rpnsIop) + rpnsIop = InstObjParams("rpns", "Rpns64", "PredOp", + { "code": rpnsCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(rpnsIop) + decoder_output += BasicConstructor.subst(rpnsIop) + exec_output += PredOpExecute.subst(rpnsIop) + wakeCpuCode = ''' PseudoInst::wakeCPU(xc->tcBase(), join32to64(R1,R0)); ''' + wakeCpuCode64 = ''' + PseudoInst::wakeCPU(xc->tcBase(), X0); + ''' + wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU", "PredOp", { "code": wakeCpuCode, "predicate_test": predicateTest }, @@ -141,6 +193,14 @@ let {{ decoder_output += BasicConstructor.subst(wakeCPUIop) exec_output += PredOpExecute.subst(wakeCPUIop) + wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU64", "PredOp", + { "code": wakeCpuCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(wakeCPUIop) + decoder_output += BasicConstructor.subst(wakeCPUIop) + exec_output += PredOpExecute.subst(wakeCPUIop) + deprecated_ivlbIop = InstObjParams("deprecated_ivlb", "Deprecated_ivlb", "PredOp", { "code": '''warn_once("Obsolete M5 ivlb instruction encountered.\\n");''', "predicate_test": predicateTest }) @@ -171,6 +231,11 @@ let {{ m5exit_code = ''' PseudoInst::m5exit(xc->tcBase(), join32to64(R1, R0)); ''' + + m5exit_code64 = ''' + PseudoInst::m5exit(xc->tcBase(), X0); + ''' + m5exitIop = InstObjParams("m5exit", "M5exit", "PredOp", { "code": m5exit_code, "predicate_test": predicateTest }, @@ -190,6 +255,14 @@ let {{ decoder_output += BasicConstructor.subst(m5failIop) exec_output += PredOpExecute.subst(m5failIop) + m5exitIop = InstObjParams("m5exit", "M5exit64", "PredOp", + { "code": m5exit_code64, + "predicate_test": predicateTest }, + ["No_OpClass", "IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5exitIop) + decoder_output += BasicConstructor.subst(m5exitIop) + exec_output += PredOpExecute.subst(m5exitIop) + loadsymbolCode = ''' PseudoInst::loadsymbol(xc->tcBase()); ''' @@ -208,6 +281,10 @@ let {{ R1 = bits(ip_val, 63, 32); ''' + initparamCode64 = ''' + X0 = PseudoInst::initParam(xc->tcBase()); + ''' + initparamIop = InstObjParams("initparam", "Initparam", "PredOp", { "code": initparamCode, "predicate_test": predicateTest }, @@ -216,10 +293,21 @@ let {{ decoder_output += BasicConstructor.subst(initparamIop) exec_output += PredOpExecute.subst(initparamIop) + initparamIop = InstObjParams("initparam", "Initparam64", "PredOp", + { "code": initparamCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(initparamIop) + decoder_output += BasicConstructor.subst(initparamIop) + exec_output += PredOpExecute.subst(initparamIop) + resetstats_code = ''' PseudoInst::resetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + resetstats_code64 = ''' + PseudoInst::resetstats(xc->tcBase(), X0, X1); + ''' resetstatsIop = InstObjParams("resetstats", "Resetstats", "PredOp", { "code": resetstats_code, "predicate_test": predicateTest }, @@ -228,9 +316,22 @@ let {{ decoder_output += BasicConstructor.subst(resetstatsIop) exec_output += PredOpExecute.subst(resetstatsIop) + resetstatsIop = InstObjParams("resetstats", "Resetstats64", "PredOp", + { "code": resetstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(resetstatsIop) + decoder_output += BasicConstructor.subst(resetstatsIop) + exec_output += PredOpExecute.subst(resetstatsIop) + dumpstats_code = ''' PseudoInst::dumpstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + dumpstats_code64 = ''' + PseudoInst::dumpstats(xc->tcBase(), X0, X1); + ''' + dumpstatsIop = InstObjParams("dumpstats", "Dumpstats", "PredOp", { "code": dumpstats_code, "predicate_test": predicateTest }, @@ -239,9 +340,22 @@ let {{ decoder_output += BasicConstructor.subst(dumpstatsIop) exec_output += PredOpExecute.subst(dumpstatsIop) + dumpstatsIop = InstObjParams("dumpstats", "Dumpstats64", "PredOp", + { "code": dumpstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(dumpstatsIop) + decoder_output += BasicConstructor.subst(dumpstatsIop) + exec_output += PredOpExecute.subst(dumpstatsIop) + dumpresetstats_code = ''' PseudoInst::dumpresetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + dumpresetstats_code64 = ''' + PseudoInst::dumpresetstats(xc->tcBase(), X0, X1); + ''' + dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats", "PredOp", { "code": dumpresetstats_code, "predicate_test": predicateTest }, @@ -250,9 +364,22 @@ let {{ decoder_output += BasicConstructor.subst(dumpresetstatsIop) exec_output += PredOpExecute.subst(dumpresetstatsIop) + dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats64", "PredOp", + { "code": dumpresetstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(dumpresetstatsIop) + decoder_output += BasicConstructor.subst(dumpresetstatsIop) + exec_output += PredOpExecute.subst(dumpresetstatsIop) + m5checkpoint_code = ''' PseudoInst::m5checkpoint(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + m5checkpoint_code64 = ''' + PseudoInst::m5checkpoint(xc->tcBase(), X0, X1); + ''' + m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint", "PredOp", { "code": m5checkpoint_code, "predicate_test": predicateTest }, @@ -261,11 +388,27 @@ let {{ decoder_output += BasicConstructor.subst(m5checkpointIop) exec_output += PredOpExecute.subst(m5checkpointIop) + m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint64", "PredOp", + { "code": m5checkpoint_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(m5checkpointIop) + decoder_output += BasicConstructor.subst(m5checkpointIop) + exec_output += PredOpExecute.subst(m5checkpointIop) + m5readfileCode = ''' int n = 4; uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); R0 = PseudoInst::readfile(xc->tcBase(), R0, join32to64(R3,R2), offset); ''' + + m5readfileCode64 = ''' + int n = 4; + uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); + n = 6; + X0 = PseudoInst::readfile(xc->tcBase(), (uint32_t)X0, X1, offset); + ''' + m5readfileIop = InstObjParams("m5readfile", "M5readfile", "PredOp", { "code": m5readfileCode, "predicate_test": predicateTest }, @@ -274,6 +417,14 @@ let {{ decoder_output += BasicConstructor.subst(m5readfileIop) exec_output += PredOpExecute.subst(m5readfileIop) + m5readfileIop = InstObjParams("m5readfile", "M5readfile64", "PredOp", + { "code": m5readfileCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(m5readfileIop) + decoder_output += BasicConstructor.subst(m5readfileIop) + exec_output += PredOpExecute.subst(m5readfileIop) + m5writefileCode = ''' int n = 4; uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); @@ -282,6 +433,16 @@ let {{ R0 = PseudoInst::writefile(xc->tcBase(), R0, join32to64(R3,R2), offset, filenameAddr); ''' + + m5writefileCode64 = ''' + int n = 4; + uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); + n = 6; + Addr filenameAddr = getArgument(xc->tcBase(), n, sizeof(Addr), false); + X0 = PseudoInst::writefile(xc->tcBase(), (uint32_t)X0, X1, offset, + filenameAddr); + ''' + m5writefileIop = InstObjParams("m5writefile", "M5writefile", "PredOp", { "code": m5writefileCode, "predicate_test": predicateTest }, @@ -290,6 +451,14 @@ let {{ decoder_output += BasicConstructor.subst(m5writefileIop) exec_output += PredOpExecute.subst(m5writefileIop) + m5writefileIop = InstObjParams("m5writefile", "M5writefile64", "PredOp", + { "code": m5writefileCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5writefileIop) + decoder_output += BasicConstructor.subst(m5writefileIop) + exec_output += PredOpExecute.subst(m5writefileIop) + m5breakIop = InstObjParams("m5break", "M5break", "PredOp", { "code": "PseudoInst::debugbreak(xc->tcBase());", "predicate_test": predicateTest }, @@ -309,6 +478,9 @@ let {{ m5addsymbolCode = ''' PseudoInst::addsymbol(xc->tcBase(), join32to64(R1, R0), R2); ''' + m5addsymbolCode64 = ''' + PseudoInst::addsymbol(xc->tcBase(), X0, (uint32_t)X1); + ''' m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol", "PredOp", { "code": m5addsymbolCode, "predicate_test": predicateTest }, @@ -317,8 +489,17 @@ let {{ decoder_output += BasicConstructor.subst(m5addsymbolIop) exec_output += PredOpExecute.subst(m5addsymbolIop) + m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol64", "PredOp", + { "code": m5addsymbolCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5addsymbolIop) + decoder_output += BasicConstructor.subst(m5addsymbolIop) + exec_output += PredOpExecute.subst(m5addsymbolIop) + m5panicCode = '''panic("M5 panic instruction called at pc=%#x.", xc->pcState().pc());''' + m5panicIop = InstObjParams("m5panic", "M5panic", "PredOp", { "code": m5panicCode, "predicate_test": predicateTest }, @@ -332,6 +513,13 @@ let {{ join32to64(R1, R0), join32to64(R3, R2) );''' + + m5workbeginCode64 = '''PseudoInst::workbegin( + xc->tcBase(), + X0, + X1 + );''' + m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin", "PredOp", { "code": m5workbeginCode, "predicate_test": predicateTest }, @@ -340,11 +528,26 @@ let {{ decoder_output += BasicConstructor.subst(m5workbeginIop) exec_output += PredOpExecute.subst(m5workbeginIop) + m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin64", "PredOp", + { "code": m5workbeginCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5workbeginIop) + decoder_output += BasicConstructor.subst(m5workbeginIop) + exec_output += PredOpExecute.subst(m5workbeginIop) + m5workendCode = '''PseudoInst::workend( xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2) );''' + + m5workendCode64 = '''PseudoInst::workend( + xc->tcBase(), + X0, + X1 + );''' + m5workendIop = InstObjParams("m5workend", "M5workend", "PredOp", { "code": m5workendCode, "predicate_test": predicateTest }, @@ -353,4 +556,11 @@ let {{ decoder_output += BasicConstructor.subst(m5workendIop) exec_output += PredOpExecute.subst(m5workendIop) + m5workendIop = InstObjParams("m5workend", "M5workend64", "PredOp", + { "code": m5workendCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5workendIop) + decoder_output += BasicConstructor.subst(m5workendIop) + exec_output += PredOpExecute.subst(m5workendIop) }}; diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa index db36a3fff..f164595dd 100644 --- a/src/arch/arm/isa/insts/macromem.isa +++ b/src/arch/arm/isa/insts/macromem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -91,7 +91,8 @@ let {{ SCTLR sctlr = Sctlr; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -158,8 +159,8 @@ let {{ header_output = decoder_output = exec_output = '' - loadIops = (microLdrUopIop, microLdrRetUopIop, - microLdrFpUopIop, microLdrDBFpUopIop, microLdrDTFpUopIop) + loadIops = (microLdrUopIop, microLdrRetUopIop, microLdrFpUopIop, + microLdrDBFpUopIop, microLdrDTFpUopIop) storeIops = (microStrUopIop, microStrFpUopIop, microStrDBFpUopIop, microStrDTFpUopIop) for iop in loadIops + storeIops: @@ -178,7 +179,7 @@ let {{ let {{ exec_output = header_output = '' - eaCode = 'EA = URa + imm;' + eaCode = 'EA = XURa + imm;' for size in (1, 2, 3, 4, 6, 8, 12, 16): # Set up the memory access. @@ -592,6 +593,26 @@ let {{ URa = URb + shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC); ''' + microAddXiUopIop = InstObjParams('addxi_uop', 'MicroAddXiUop', + 'MicroIntImmXOp', + 'XURa = XURb + imm;', + ['IsMicroop']) + + microAddXiSpAlignUopIop = InstObjParams('addxi_uop', 'MicroAddXiSpAlignUop', + 'MicroIntImmXOp', ''' + if (isSP((IntRegIndex) urb) && bits(XURb, 3, 0) && + SPAlignmentCheckEnabled(xc->tcBase())) { + return new SPAlignmentFault(); + } + XURa = XURb + imm; + ''', ['IsMicroop']) + + microAddXERegUopIop = InstObjParams('addxr_uop', 'MicroAddXERegUop', + 'MicroIntRegXOp', + 'XURa = XURb + ' + \ + 'extendReg64(XURc, type, shiftAmt, 64);', + ['IsMicroop']) + microAddUopIop = InstObjParams('add_uop', 'MicroAddUop', 'MicroIntRegOp', {'code': microAddUopCode, @@ -604,6 +625,11 @@ let {{ 'predicate_test': predicateTest}, ['IsMicroop']) + microSubXiUopIop = InstObjParams('subxi_uop', 'MicroSubXiUop', + 'MicroIntImmXOp', + 'XURa = XURb - imm;', + ['IsMicroop']) + microSubUopCode = ''' URa = URb - shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC); ''' @@ -631,8 +657,8 @@ let {{ SCTLR sctlr = Sctlr; pNPC = URa; CPSR new_cpsr = - cpsrWriteByInstr(cpsrOrCondCodes, URb, - 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(cpsrOrCondCodes, URb, Scr, Nsacr, + 0xF, true, sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; NextThumb = new_cpsr.t; NextJazelle = new_cpsr.j; @@ -651,25 +677,37 @@ let {{ ['IsMicroop']) header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \ + MicroIntImmDeclare.subst(microAddXiUopIop) + \ + MicroIntImmDeclare.subst(microAddXiSpAlignUopIop) + \ MicroIntImmDeclare.subst(microSubiUopIop) + \ + MicroIntImmDeclare.subst(microSubXiUopIop) + \ MicroIntRegDeclare.subst(microAddUopIop) + \ MicroIntRegDeclare.subst(microSubUopIop) + \ + MicroIntXERegDeclare.subst(microAddXERegUopIop) + \ MicroIntMovDeclare.subst(microUopRegMovIop) + \ MicroIntMovDeclare.subst(microUopRegMovRetIop) + \ MicroSetPCCPSRDeclare.subst(microUopSetPCCPSRIop) decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \ + MicroIntImmXConstructor.subst(microAddXiUopIop) + \ + MicroIntImmXConstructor.subst(microAddXiSpAlignUopIop) + \ MicroIntImmConstructor.subst(microSubiUopIop) + \ + MicroIntImmXConstructor.subst(microSubXiUopIop) + \ MicroIntRegConstructor.subst(microAddUopIop) + \ MicroIntRegConstructor.subst(microSubUopIop) + \ + MicroIntXERegConstructor.subst(microAddXERegUopIop) + \ MicroIntMovConstructor.subst(microUopRegMovIop) + \ MicroIntMovConstructor.subst(microUopRegMovRetIop) + \ MicroSetPCCPSRConstructor.subst(microUopSetPCCPSRIop) exec_output = PredOpExecute.subst(microAddiUopIop) + \ + BasicExecute.subst(microAddXiUopIop) + \ + BasicExecute.subst(microAddXiSpAlignUopIop) + \ PredOpExecute.subst(microSubiUopIop) + \ + BasicExecute.subst(microSubXiUopIop) + \ PredOpExecute.subst(microAddUopIop) + \ PredOpExecute.subst(microSubUopIop) + \ + BasicExecute.subst(microAddXERegUopIop) + \ PredOpExecute.subst(microUopRegMovIop) + \ PredOpExecute.subst(microUopRegMovRetIop) + \ PredOpExecute.subst(microUopSetPCCPSRIop) @@ -681,6 +719,25 @@ let {{ header_output = MacroMemDeclare.subst(iop) decoder_output = MacroMemConstructor.subst(iop) + iop = InstObjParams("ldpstp", "LdpStp", 'PairMemOp', "", []) + header_output += PairMemDeclare.subst(iop) + decoder_output += PairMemConstructor.subst(iop) + + iopImm = InstObjParams("bigfpmemimm", "BigFpMemImm", "BigFpMemImmOp", "") + iopPre = InstObjParams("bigfpmempre", "BigFpMemPre", "BigFpMemPreOp", "") + iopPost = InstObjParams("bigfpmempost", "BigFpMemPost", "BigFpMemPostOp", "") + for iop in (iopImm, iopPre, iopPost): + header_output += BigFpMemImmDeclare.subst(iop) + decoder_output += BigFpMemImmConstructor.subst(iop) + + iop = InstObjParams("bigfpmemreg", "BigFpMemReg", "BigFpMemRegOp", "") + header_output += BigFpMemRegDeclare.subst(iop) + decoder_output += BigFpMemRegConstructor.subst(iop) + + iop = InstObjParams("bigfpmemlit", "BigFpMemLit", "BigFpMemLitOp", "") + header_output += BigFpMemLitDeclare.subst(iop) + decoder_output += BigFpMemLitConstructor.subst(iop) + iop = InstObjParams("vldmult", "VldMult", 'VldMultOp', "", []) header_output += VMemMultDeclare.subst(iop) decoder_output += VMemMultConstructor.subst(iop) diff --git a/src/arch/arm/isa/insts/mem.isa b/src/arch/arm/isa/insts/mem.isa index c39f1b14f..aed6bab0d 100644 --- a/src/arch/arm/isa/insts/mem.isa +++ b/src/arch/arm/isa/insts/mem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -48,8 +48,8 @@ let {{ self.constructTemplate = eval(self.decConstBase + 'Constructor') def fillTemplates(self, name, Name, codeBlobs, memFlags, instFlags, - base = 'Memory', wbDecl = None, pcDecl = None, - rasPop = False): + base='Memory', wbDecl=None, pcDecl=None, + rasPop=False, size=4, sign=False, faCode=None): # Make sure flags are in lists (convert to lists if not). memFlags = makeList(memFlags) instFlags = makeList(instFlags) @@ -63,6 +63,22 @@ let {{ codeBlobs["ea_code"] = eaCode + if faCode: + # For AArch64 the fa_code snippet comes already assembled here + codeBlobs["fa_code"] = faCode + elif wbDecl == None: + codeBlobs["fa_code"] = ''' + if (dest != INTREG_PC) { + fault->annotate(ArmFault::SAS, %s); + fault->annotate(ArmFault::SSE, %s); + fault->annotate(ArmFault::SRT, dest); + } + ''' %("0" if size == 1 else + "1" if size == 2 else "2", + "true" if sign else "false") + else: + codeBlobs["fa_code"] = '' + macroName = Name instFlagsCopy = list(instFlags) codeBlobsCopy = dict(codeBlobs) @@ -108,6 +124,7 @@ let {{ "use_uops" : use_uops, "use_pc" : use_pc, "use_wb" : use_wb, + "fa_code" : '', "is_ras_pop" : is_ras_pop }, ['IsMacroop']) header_output += self.declareTemplate.subst(iop) @@ -176,8 +193,13 @@ let {{ return Name def buildMemSuffix(sign, size): - if size == 4: - memSuffix = '' + if size == 8: + memSuffix = '_ud' + elif size == 4: + if sign: + memSuffix = '_sw' + else: + memSuffix = '_uw' elif size == 2: if sign: memSuffix = '_sh' diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa index b8425a240..678a125fb 100644 --- a/src/arch/arm/isa/insts/misc.isa +++ b/src/arch/arm/isa/insts/misc.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -40,21 +40,102 @@ let {{ svcCode = ''' - if (FullSystem) { - fault = new SupervisorCall; - } else { - fault = new SupervisorCall(machInst); - } + fault = new SupervisorCall(machInst, imm); ''' - svcIop = InstObjParams("svc", "Svc", "PredOp", + svcIop = InstObjParams("svc", "Svc", "ImmOp", { "code": svcCode, "predicate_test": predicateTest }, ["IsSyscall", "IsNonSpeculative", "IsSerializeAfter"]) - header_output = BasicDeclare.subst(svcIop) - decoder_output = BasicConstructor.subst(svcIop) + header_output = ImmOpDeclare.subst(svcIop) + decoder_output = ImmOpConstructor.subst(svcIop) exec_output = PredOpExecute.subst(svcIop) + smcCode = ''' + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr; + + if ((cpsr.mode != MODE_USER) && FullSystem) { + if (ArmSystem::haveVirtualization(xc->tcBase()) && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) && hcr.tsc) { + fault = new HypervisorTrap(machInst, 0, EC_SMC_TO_HYP); + } else { + if (scr.scd) { + fault = disabledFault(); + } else { + fault = new SecureMonitorCall(machInst); + } + } + } else { + fault = disabledFault(); + } + ''' + + smcIop = InstObjParams("smc", "Smc", "PredOp", + { "code": smcCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(smcIop) + decoder_output += BasicConstructor.subst(smcIop) + exec_output += PredOpExecute.subst(smcIop) + + hvcCode = ''' + CPSR cpsr = Cpsr; + SCR scr = Scr; + + // Filter out the various cases where this instruction isn't defined + if (!FullSystem || !ArmSystem::haveVirtualization(xc->tcBase()) || + (cpsr.mode == MODE_USER) || + (ArmSystem::haveSecurity(xc->tcBase()) && (!scr.ns || !scr.hce))) { + fault = disabledFault(); + } else { + fault = new HypervisorCall(machInst, imm); + } + ''' + + hvcIop = InstObjParams("hvc", "Hvc", "ImmOp", + { "code": hvcCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += ImmOpDeclare.subst(hvcIop) + decoder_output += ImmOpConstructor.subst(hvcIop) + exec_output += PredOpExecute.subst(hvcIop) + + eretCode = ''' + SCTLR sctlr = Sctlr; + CPSR old_cpsr = Cpsr; + old_cpsr.nz = CondCodesNZ; + old_cpsr.c = CondCodesC; + old_cpsr.v = CondCodesV; + old_cpsr.ge = CondCodesGE; + + CPSR new_cpsr = cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, + true, sctlr.nmfi, xc->tcBase()); + Cpsr = ~CondCodesMask & new_cpsr; + CondCodesNZ = new_cpsr.nz; + CondCodesC = new_cpsr.c; + CondCodesV = new_cpsr.v; + CondCodesGE = new_cpsr.ge; + + NextThumb = (new_cpsr).t; + NextJazelle = (new_cpsr).j; + NextItState = (((new_cpsr).it2 << 2) & 0xFC) + | ((new_cpsr).it1 & 0x3); + + NPC = (old_cpsr.mode == MODE_HYP) ? ElrHyp : LR; + ''' + + eretIop = InstObjParams("eret", "Eret", "PredOp", + { "code": eretCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(eretIop) + decoder_output += BasicConstructor.subst(eretIop) + exec_output += PredOpExecute.subst(eretIop) + + + }}; let {{ @@ -87,6 +168,59 @@ let {{ decoder_output += MrsConstructor.subst(mrsSpsrIop) exec_output += PredOpExecute.subst(mrsSpsrIop) + mrsBankedRegCode = ''' + bool isIntReg; + int regIdx; + + if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) { + if (isIntReg) { + Dest = DecodedBankedIntReg; + } else { + Dest = xc->readMiscReg(regIdx); + } + } else { + return new UndefinedInstruction(machInst, false, mnemonic); + } + ''' + mrsBankedRegIop = InstObjParams("mrs", "MrsBankedReg", "MrsOp", + { "code": mrsBankedRegCode, + "predicate_test": predicateTest }, + ["IsSerializeBefore"]) + header_output += MrsBankedRegDeclare.subst(mrsBankedRegIop) + decoder_output += MrsBankedRegConstructor.subst(mrsBankedRegIop) + exec_output += PredOpExecute.subst(mrsBankedRegIop) + + msrBankedRegCode = ''' + bool isIntReg; + int regIdx; + + if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) { + if (isIntReg) { + // This is a bit nasty, you would have thought that + // DecodedBankedIntReg wouldn't be written to unless the + // conditions on the IF statements above are met, however if + // you look at the generated C code you'll find that they are. + // However this is safe as DecodedBankedIntReg (which is used + // in operands.isa to get the index of DecodedBankedIntReg) + // will return INTREG_DUMMY if its not a valid integer + // register, so redirecting the write to somewhere we don't + // care about. + DecodedBankedIntReg = Op1; + } else { + xc->setMiscReg(regIdx, Op1); + } + } else { + return new UndefinedInstruction(machInst, false, mnemonic); + } + ''' + msrBankedRegIop = InstObjParams("msr", "MsrBankedReg", "MsrRegOp", + { "code": msrBankedRegCode, + "predicate_test": predicateTest }, + ["IsSerializeAfter"]) + header_output += MsrBankedRegDeclare.subst(msrBankedRegIop) + decoder_output += MsrBankedRegConstructor.subst(msrBankedRegIop) + exec_output += PredOpExecute.subst(msrBankedRegIop) + msrCpsrRegCode = ''' SCTLR sctlr = Sctlr; CPSR old_cpsr = Cpsr; @@ -96,7 +230,8 @@ let {{ old_cpsr.ge = CondCodesGE; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Op1, byteMask, false, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Op1, Scr, Nsacr, byteMask, false, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -128,7 +263,8 @@ let {{ old_cpsr.v = CondCodesV; old_cpsr.ge = CondCodesGE; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, imm, byteMask, false, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, imm, Scr, Nsacr, byteMask, false, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -488,12 +624,10 @@ let {{ decoder_output += BasicConstructor.subst(bkptIop) exec_output += BasicExecute.subst(bkptIop) - nopIop = InstObjParams("nop", "NopInst", "PredOp", \ - { "code" : "", "predicate_test" : predicateTest }, - ['IsNop']) + nopIop = InstObjParams("nop", "NopInst", "ArmStaticInst", "", ['IsNop']) header_output += BasicDeclare.subst(nopIop) - decoder_output += BasicConstructor.subst(nopIop) - exec_output += PredOpExecute.subst(nopIop) + decoder_output += BasicConstructor64.subst(nopIop) + exec_output += BasicExecute.subst(nopIop) yieldIop = InstObjParams("yield", "YieldInst", "PredOp", \ { "code" : "", "predicate_test" : predicateTest }) @@ -502,14 +636,31 @@ let {{ exec_output += PredOpExecute.subst(yieldIop) wfeCode = ''' - // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr64; + SCTLR sctlr = Sctlr; + + // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending, + ThreadContext *tc = xc->tcBase(); if (SevMailbox == 1) { SevMailbox = 0; - PseudoInst::quiesceSkip(xc->tcBase()); - } else if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkInterrupts(xc->tcBase())) { - PseudoInst::quiesceSkip(xc->tcBase()); + PseudoInst::quiesceSkip(tc); + } else if (tc->getCpuPtr()->getInterruptController()->checkInterrupts(tc)) { + PseudoInst::quiesceSkip(tc); + } else if (cpsr.el == EL0 && !sctlr.ntwe) { + PseudoInst::quiesceSkip(tc); + fault = new SupervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveVirtualization(tc) && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) && + hcr.twe) { + PseudoInst::quiesceSkip(tc); + fault = new HypervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twe) { + PseudoInst::quiesceSkip(tc); + fault = new SecureMonitorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); } else { - PseudoInst::quiesce(xc->tcBase()); + PseudoInst::quiesce(tc); } ''' wfePredFixUpCode = ''' @@ -528,12 +679,30 @@ let {{ exec_output += QuiescePredOpExecuteWithFixup.subst(wfeIop) wfiCode = ''' + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr64; + SCTLR sctlr = Sctlr; + // WFI doesn't sleep if interrupts are pending (masked or not) - if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkRaw()) { - PseudoInst::quiesceSkip(xc->tcBase()); + ThreadContext *tc = xc->tcBase(); + if (tc->getCpuPtr()->getInterruptController()->checkWfiWake(hcr, cpsr, + scr)) { + PseudoInst::quiesceSkip(tc); + } else if (cpsr.el == EL0 && !sctlr.ntwi) { + PseudoInst::quiesceSkip(tc); + fault = new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveVirtualization(tc) && hcr.twi && + (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr)) { + PseudoInst::quiesceSkip(tc); + fault = new HypervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twi) { + PseudoInst::quiesceSkip(tc); + fault = new SecureMonitorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); } else { - PseudoInst::quiesce(xc->tcBase()); + PseudoInst::quiesce(tc); } + tc->getCpuPtr()->clearInterrupt(INT_ABT, 0); ''' wfiIop = InstObjParams("wfi", "WfiInst", "PredOp", \ { "code" : wfiCode, "predicate_test" : predicateTest }, @@ -564,6 +733,16 @@ let {{ decoder_output += BasicConstructor.subst(sevIop) exec_output += PredOpExecute.subst(sevIop) + sevlCode = ''' + SevMailbox = 1; + ''' + sevlIop = InstObjParams("sevl", "SevlInst", "PredOp", \ + { "code" : sevlCode, "predicate_test" : predicateTest }, + ["IsNonSpeculative", "IsSquashAfter", "IsUnverifiable"]) + header_output += BasicDeclare.subst(sevlIop) + decoder_output += BasicConstructor.subst(sevlIop) + exec_output += BasicExecute.subst(sevlIop) + itIop = InstObjParams("it", "ItInst", "PredOp", \ { "code" : ";", "predicate_test" : predicateTest }, []) @@ -571,10 +750,7 @@ let {{ decoder_output += BasicConstructor.subst(itIop) exec_output += PredOpExecute.subst(itIop) unknownCode = ''' - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(machInst, true); + return new UndefinedInstruction(machInst, true); ''' unknownIop = InstObjParams("unknown", "Unknown", "UnknownOp", \ { "code": unknownCode, @@ -626,108 +802,152 @@ let {{ exec_output += PredOpExecute.subst(bfiIop) mrc14code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(op1); + if (!canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (mcrMrc14TrapToHyp((const MiscRegIndex) op1, Hcr, Cpsr, Scr, Hdcr, + Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC); } Dest = MiscOp1; ''' - mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegOp", + mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegImmOp", { "code": mrc14code, "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc14Iop) - decoder_output += RegRegOpConstructor.subst(mrc14Iop) + header_output += RegRegImmOpDeclare.subst(mrc14Iop) + decoder_output += RegRegImmOpConstructor.subst(mrc14Iop) exec_output += PredOpExecute.subst(mrc14Iop) mcr14code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest); + if (!canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (mcrMrc14TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, + Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC); } MiscDest = Op1; ''' - mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegOp", + mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegImmOp", { "code": mcr14code, "predicate_test": predicateTest }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr14Iop) - decoder_output += RegRegOpConstructor.subst(mcr14Iop) + header_output += RegRegImmOpDeclare.subst(mcr14Iop) + decoder_output += RegRegImmOpConstructor.subst(mcr14Iop) exec_output += PredOpExecute.subst(mcr14Iop) - mrc14UserIop = InstObjParams("mrc", "Mrc14User", "RegRegOp", - { "code": "Dest = MiscOp1;", - "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc14UserIop) - decoder_output += RegRegOpConstructor.subst(mrc14UserIop) - exec_output += PredOpExecute.subst(mrc14UserIop) - - mcr14UserIop = InstObjParams("mcr", "Mcr14User", "RegRegOp", - { "code": "MiscDest = Op1", - "predicate_test": predicateTest }, - ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr14UserIop) - decoder_output += RegRegOpConstructor.subst(mcr14UserIop) - exec_output += PredOpExecute.subst(mcr14UserIop) - mrc15code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatOp1); + bool hypTrap = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr, + Hcptr, imm); + bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); } - Dest = MiscOp1; + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC); + } + Dest = MiscNsBankedOp1; ''' - mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegOp", + mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegImmOp", { "code": mrc15code, "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc15Iop) - decoder_output += RegRegOpConstructor.subst(mrc15Iop) + header_output += RegRegImmOpDeclare.subst(mrc15Iop) + decoder_output += RegRegImmOpConstructor.subst(mrc15Iop) exec_output += PredOpExecute.subst(mrc15Iop) mcr15code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatDest); + bool hypTrap = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr, + Hcptr, imm); + bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); } - MiscDest = Op1; + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC); + } + MiscNsBankedDest = Op1; ''' - mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegOp", + mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegImmOp", { "code": mcr15code, "predicate_test": predicateTest }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr15Iop) - decoder_output += RegRegOpConstructor.subst(mcr15Iop) + header_output += RegRegImmOpDeclare.subst(mcr15Iop) + decoder_output += RegRegImmOpConstructor.subst(mcr15Iop) exec_output += PredOpExecute.subst(mcr15Iop) - mrc15UserIop = InstObjParams("mrc", "Mrc15User", "RegRegOp", - { "code": "Dest = MiscOp1;", - "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc15UserIop) - decoder_output += RegRegOpConstructor.subst(mrc15UserIop) - exec_output += PredOpExecute.subst(mrc15UserIop) - - mcr15UserIop = InstObjParams("mcr", "Mcr15User", "RegRegOp", - { "code": "MiscDest = Op1", - "predicate_test": predicateTest }, - ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr15UserIop) - decoder_output += RegRegOpConstructor.subst(mcr15UserIop) - exec_output += PredOpExecute.subst(mcr15UserIop) + + mrrc15code = ''' + int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatOp1); + bool hypTrap = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm); + bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC); + } + Dest = bits(MiscNsBankedOp164, 63, 32); + Dest2 = bits(MiscNsBankedOp164, 31, 0); + ''' + mrrc15Iop = InstObjParams("mrrc", "Mrrc15", "MrrcOp", + { "code": mrrc15code, + "predicate_test": predicateTest }, []) + header_output += MrrcOpDeclare.subst(mrrc15Iop) + decoder_output += MrrcOpConstructor.subst(mrrc15Iop) + exec_output += PredOpExecute.subst(mrrc15Iop) + + + mcrr15code = ''' + int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatDest); + bool hypTrap = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm); + bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC); + } + MiscNsBankedDest64 = ((uint64_t) Op1 << 32) | Op2; + ''' + mcrr15Iop = InstObjParams("mcrr", "Mcrr15", "McrrOp", + { "code": mcrr15code, + "predicate_test": predicateTest }, []) + header_output += McrrOpDeclare.subst(mcrr15Iop) + decoder_output += McrrOpConstructor.subst(mcrr15Iop) + exec_output += PredOpExecute.subst(mcrr15Iop) + enterxCode = ''' NextThumb = true; @@ -775,35 +995,53 @@ let {{ exec_output += PredOpExecute.subst(clrexIop) isbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15ISB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } fault = new FlushPipe; ''' - isbIop = InstObjParams("isb", "Isb", "PredOp", + isbIop = InstObjParams("isb", "Isb", "ImmOp", {"code": isbCode, "predicate_test": predicateTest}, ['IsSerializeAfter']) - header_output += BasicDeclare.subst(isbIop) - decoder_output += BasicConstructor.subst(isbIop) + header_output += ImmOpDeclare.subst(isbIop) + decoder_output += ImmOpConstructor.subst(isbIop) exec_output += PredOpExecute.subst(isbIop) dsbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DSB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } fault = new FlushPipe; ''' - dsbIop = InstObjParams("dsb", "Dsb", "PredOp", + dsbIop = InstObjParams("dsb", "Dsb", "ImmOp", {"code": dsbCode, "predicate_test": predicateTest}, ['IsMemBarrier', 'IsSerializeAfter']) - header_output += BasicDeclare.subst(dsbIop) - decoder_output += BasicConstructor.subst(dsbIop) + header_output += ImmOpDeclare.subst(dsbIop) + decoder_output += ImmOpConstructor.subst(dsbIop) exec_output += PredOpExecute.subst(dsbIop) dmbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DMB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } ''' - dmbIop = InstObjParams("dmb", "Dmb", "PredOp", + dmbIop = InstObjParams("dmb", "Dmb", "ImmOp", {"code": dmbCode, "predicate_test": predicateTest}, ['IsMemBarrier']) - header_output += BasicDeclare.subst(dmbIop) - decoder_output += BasicConstructor.subst(dmbIop) + header_output += ImmOpDeclare.subst(dmbIop) + decoder_output += ImmOpConstructor.subst(dmbIop) exec_output += PredOpExecute.subst(dmbIop) dbgCode = ''' diff --git a/src/arch/arm/isa/insts/misc64.isa b/src/arch/arm/isa/insts/misc64.isa new file mode 100644 index 000000000..6ebbcc2ba --- /dev/null +++ b/src/arch/arm/isa/insts/misc64.isa @@ -0,0 +1,147 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + svcCode = ''' + fault = new SupervisorCall(machInst, bits(machInst, 20, 5)); + ''' + + svcIop = InstObjParams("svc", "Svc64", "ArmStaticInst", + svcCode, ["IsSyscall", "IsNonSpeculative", + "IsSerializeAfter"]) + header_output = BasicDeclare.subst(svcIop) + decoder_output = BasicConstructor64.subst(svcIop) + exec_output = BasicExecute.subst(svcIop) + + # @todo: extend to take into account Virtualization. + smcCode = ''' + SCR scr = Scr64; + CPSR cpsr = Cpsr; + + if (!ArmSystem::haveSecurity(xc->tcBase()) || inUserMode(cpsr) || scr.smd) { + fault = disabledFault(); + } else { + fault = new SecureMonitorCall(machInst); + } + ''' + + smcIop = InstObjParams("smc", "Smc64", "ArmStaticInst", + smcCode, ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(smcIop) + decoder_output += BasicConstructor64.subst(smcIop) + exec_output += BasicExecute.subst(smcIop) + + def subst(templateBase, iop): + global header_output, decoder_output, exec_output + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + bfmMaskCode = ''' + uint64_t bitMask; + int diff = imm2 - imm1; + if (imm1 <= imm2) { + bitMask = mask(diff + 1); + } else { + bitMask = mask(imm2 + 1); + bitMask = (bitMask >> imm1) | (bitMask << (intWidth - imm1)); + diff += intWidth; + } + uint64_t topBits M5_VAR_USED = ~mask(diff+1); + uint64_t result = (Op164 >> imm1) | (Op164 << (intWidth - imm1)); + result &= bitMask; + ''' + + bfmCode = bfmMaskCode + 'Dest64 = result | (Dest64 & ~bitMask);' + bfmIop = InstObjParams("bfm", "Bfm64", "RegRegImmImmOp64", bfmCode); + subst("RegRegImmImmOp64", bfmIop) + + ubfmCode = bfmMaskCode + 'Dest64 = result;' + ubfmIop = InstObjParams("ubfm", "Ubfm64", "RegRegImmImmOp64", ubfmCode); + subst("RegRegImmImmOp64", ubfmIop) + + sbfmCode = bfmMaskCode + \ + 'Dest64 = result | (bits(Op164, imm2) ? topBits : 0);' + sbfmIop = InstObjParams("sbfm", "Sbfm64", "RegRegImmImmOp64", sbfmCode); + subst("RegRegImmImmOp64", sbfmIop) + + extrCode = ''' + if (imm == 0) { + Dest64 = Op264; + } else { + Dest64 = (Op164 << (intWidth - imm)) | (Op264 >> imm); + } + ''' + extrIop = InstObjParams("extr", "Extr64", "RegRegRegImmOp64", extrCode); + subst("RegRegRegImmOp64", extrIop); + + unknownCode = ''' + return new UndefinedInstruction(machInst, true); + ''' + unknown64Iop = InstObjParams("unknown", "Unknown64", "UnknownOp64", + unknownCode) + header_output += BasicDeclare.subst(unknown64Iop) + decoder_output += BasicConstructor64.subst(unknown64Iop) + exec_output += BasicExecute.subst(unknown64Iop) + + isbIop = InstObjParams("isb", "Isb64", "ArmStaticInst", + "fault = new FlushPipe;", ['IsSerializeAfter']) + header_output += BasicDeclare.subst(isbIop) + decoder_output += BasicConstructor64.subst(isbIop) + exec_output += BasicExecute.subst(isbIop) + + dsbIop = InstObjParams("dsb", "Dsb64", "ArmStaticInst", + "fault = new FlushPipe;", + ['IsMemBarrier', 'IsSerializeAfter']) + header_output += BasicDeclare.subst(dsbIop) + decoder_output += BasicConstructor64.subst(dsbIop) + exec_output += BasicExecute.subst(dsbIop) + + dmbIop = InstObjParams("dmb", "Dmb64", "ArmStaticInst", "", + ['IsMemBarrier']) + header_output += BasicDeclare.subst(dmbIop) + decoder_output += BasicConstructor64.subst(dmbIop) + exec_output += BasicExecute.subst(dmbIop) + + clrexIop = InstObjParams("clrex", "Clrex64", "ArmStaticInst", + "LLSCLock = 0;") + header_output += BasicDeclare.subst(clrexIop) + decoder_output += BasicConstructor64.subst(clrexIop) + exec_output += BasicExecute.subst(clrexIop) +}}; diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa index 876bb3bb7..ca5c3038c 100644 --- a/src/arch/arm/isa/insts/neon.isa +++ b/src/arch/arm/isa/insts/neon.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -94,8 +94,8 @@ output header {{ template <template <typename T> class Base> StaticInstPtr decodeNeonUThreeUSReg(unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1, IntRegIndex op2) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) { switch (size) { case 0: @@ -112,8 +112,8 @@ output header {{ template <template <typename T> class Base> StaticInstPtr decodeNeonSThreeUSReg(unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1, IntRegIndex op2) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) { switch (size) { case 0: @@ -129,6 +129,38 @@ output header {{ template <template <typename T> class Base> StaticInstPtr + decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2) + { + switch (size) { + case 1: + return new Base<int16_t>(machInst, dest, op1, op2); + case 2: + return new Base<int32_t>(machInst, dest, op1, op2); + default: + return new Unknown(machInst); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + switch (size) { + case 1: + return new Base<int16_t>(machInst, dest, op1, op2, imm); + case 2: + return new Base<int32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } + + template <template <typename T> class Base> + StaticInstPtr decodeNeonUSThreeUSReg(bool notSigned, unsigned size, ExtMachInst machInst, IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) @@ -177,6 +209,38 @@ output header {{ template <template <typename T> class BaseD, template <typename T> class BaseQ> StaticInstPtr + decodeNeonSThreeXReg(bool q, unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) + { + if (q) { + return decodeNeonSThreeUReg<BaseQ>( + size, machInst, dest, op1, op2); + } else { + return decodeNeonSThreeUSReg<BaseD>( + size, machInst, dest, op1, op2); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUThreeXReg(bool q, unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) + { + if (q) { + return decodeNeonUThreeUReg<BaseQ>( + size, machInst, dest, op1, op2); + } else { + return decodeNeonUThreeUSReg<BaseD>( + size, machInst, dest, op1, op2); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size, ExtMachInst machInst, IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) @@ -241,6 +305,124 @@ output header {{ template <template <typename T> class BaseD, template <typename T> class BaseQ> StaticInstPtr + decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1, op2); + else + return new BaseQ<uint32_t>(machInst, dest, op1, op2); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1, op2); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1, op2); + else + return new Base<uint32_t>(machInst, dest, op1, op2); + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1, op2, imm); + else + return new Base<uint32_t>(machInst, dest, op1, op2, imm); + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (q) { + switch (size) { + case 1: + return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 1: + return new BaseD<uint16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (q) { + switch (size) { + case 1: + return new BaseQ<int16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseQ<int32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 1: + return new BaseD<int16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseD<int32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm); + else + return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr decodeNeonUTwoShiftReg(bool q, unsigned size, ExtMachInst machInst, IntRegIndex dest, IntRegIndex op1, uint64_t imm) @@ -345,6 +527,46 @@ output header {{ } } + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUTwoShiftUReg(unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, uint64_t imm) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, imm); + case 1: + return new Base<uint16_t>(machInst, dest, op1, imm); + case 2: + return new Base<uint32_t>(machInst, dest, op1, imm); + case 3: + return new Base<uint64_t>(machInst, dest, op1, imm); + default: + return new Unknown(machInst); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonSTwoShiftUReg(unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, uint64_t imm) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, op1, imm); + case 1: + return new Base<int16_t>(machInst, dest, op1, imm); + case 2: + return new Base<int32_t>(machInst, dest, op1, imm); + case 3: + return new Base<int64_t>(machInst, dest, op1, imm); + default: + return new Unknown(machInst); + } + } + template <template <typename T> class BaseD, template <typename T> class BaseQ> StaticInstPtr @@ -411,6 +633,66 @@ output header {{ } } + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (q) { + return decodeNeonUTwoShiftUReg<BaseQ>( + size, machInst, dest, op1, imm); + } else { + return decodeNeonUTwoShiftUSReg<BaseD>( + size, machInst, dest, op1, imm); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (q) { + return decodeNeonSTwoShiftUReg<BaseQ>( + size, machInst, dest, op1, imm); + } else { + return decodeNeonSTwoShiftUSReg<BaseD>( + size, machInst, dest, op1, imm); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1, imm); + else + return new Base<uint32_t>(machInst, dest, op1, imm); + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1, imm); + else + return new BaseQ<uint32_t>(machInst, dest, op1, imm); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1, imm); + } + } + template <template <typename T> class Base> StaticInstPtr decodeNeonUTwoMiscUSReg(unsigned size, @@ -451,8 +733,8 @@ output header {{ template <typename T> class BaseQ> StaticInstPtr decodeNeonUTwoMiscSReg(bool q, unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1) { if (q) { return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); @@ -465,8 +747,8 @@ output header {{ template <typename T> class BaseQ> StaticInstPtr decodeNeonSTwoMiscSReg(bool q, unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1) { if (q) { return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); @@ -498,8 +780,8 @@ output header {{ template <template <typename T> class Base> StaticInstPtr decodeNeonSTwoMiscUReg(unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1) { switch (size) { case 0: @@ -559,6 +841,221 @@ output header {{ } } + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); + } else { + return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); + } else { + return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1); + else + return new BaseQ<uint32_t>(machInst, dest, op1); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1); + else + return new BaseD<uint32_t>(machInst, dest, op1); + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1); + else + return new Base<uint32_t>(machInst, dest, op1); + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<uint16_t>(machInst, dest, op1); + case 0x2: + return new BaseQ<uint32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<uint16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ, + template <typename T> class BaseBQ> + StaticInstPtr + decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<uint16_t>(machInst, dest, op1); + case 0x2: + return new BaseBQ<uint32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<uint16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<int16_t>(machInst, dest, op1); + case 0x2: + return new BaseQ<int32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<int16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ, + template <typename T> class BaseBQ> + StaticInstPtr + decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<uint16_t>(machInst, dest, op1); + case 0x2: + return new BaseBQ<uint32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<uint16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ, + template <typename T> class BaseBQ> + StaticInstPtr + decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<int16_t>(machInst, dest, op1); + case 0x2: + return new BaseBQ<int32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<int16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } }}; output exec {{ @@ -872,10 +1369,7 @@ let {{ readDestCode = 'destElem = gtoh(destReg.elements[i]);' eWalkCode += ''' if (imm < 0 && imm >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { for (unsigned i = 0; i < eCount; i++) { Element srcElem1 = gtoh(srcReg1.elements[i]); @@ -926,10 +1420,7 @@ let {{ readDestCode = 'destElem = gtoh(destReg.elements[i]);' eWalkCode += ''' if (imm < 0 && imm >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { for (unsigned i = 0; i < eCount; i++) { Element srcElem1 = gtoh(srcReg1.elements[i]); @@ -978,10 +1469,7 @@ let {{ readDestCode = 'destReg = destRegs[i];' eWalkCode += ''' if (imm < 0 && imm >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { for (unsigned i = 0; i < rCount; i++) { FloatReg srcReg1 = srcRegs1[i]; @@ -2156,7 +2644,7 @@ let {{ bool done; destReg = processNans(fpscr, done, true, srcReg1, srcReg2); if (!done) { - destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS, + destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>, true, true, VfpRoundNearest); } else if (flushToZero(srcReg1, srcReg2)) { fpscr.idc = 1; @@ -2171,7 +2659,7 @@ let {{ bool done; destReg = processNans(fpscr, done, true, srcReg1, srcReg2); if (!done) { - destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS, + destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>, true, true, VfpRoundNearest); } else if (flushToZero(srcReg1, srcReg2)) { fpscr.idc = 1; @@ -2234,6 +2722,24 @@ let {{ threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) + vfmafpCode = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>, + true, true, VfpRoundNearest); + FpscrExc = fpscr; + ''' + threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True) + threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True) + + vfmsfpCode = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>, + true, true, VfpRoundNearest); + FpscrExc = fpscr; + ''' + threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True) + threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True) + vmlsfpCode = ''' FPSCR fpscr = (FPSCR) FpscrExc; float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, @@ -2765,7 +3271,7 @@ let {{ fpscr.idc = 1; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); - destReg = vfpFpSToFixed(srcElem1, false, false, imm); + destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm); __asm__ __volatile__("" :: "m" (destReg)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -2781,7 +3287,7 @@ let {{ fpscr.idc = 1; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); - destReg = vfpFpSToFixed(srcElem1, true, false, imm); + destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm); __asm__ __volatile__("" :: "m" (destReg)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -2795,7 +3301,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); - destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm); + destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -2809,7 +3315,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); - destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm); + destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -3296,10 +3802,7 @@ let {{ } else { index -= eCount; if (index >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { destReg.elements[i] = srcReg2.elements[index]; } diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa new file mode 100644 index 000000000..e065761f4 --- /dev/null +++ b/src/arch/arm/isa/insts/neon64.isa @@ -0,0 +1,3355 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli +// Mbou Eyole + +let {{ + + header_output = "" + exec_output = "" + + # FP types (FP operations always work with unsigned representations) + floatTypes = ("uint32_t", "uint64_t") + smallFloatTypes = ("uint32_t",) + + def threeEqualRegInstX(name, Name, opClass, types, rCount, op, + readDest=False, pairwise=False, scalar=False, + byElem=False): + assert (not pairwise) or ((not byElem) and (not scalar)) + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, destReg; + ''' + if byElem: + # 2nd register operand has to be read fully + eWalkCode += ''' + FullRegVect srcReg2; + ''' + else: + eWalkCode += ''' + RegVect srcReg2; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + if byElem: + # 2nd operand has to be read fully + for reg in range(rCount, 4): + eWalkCode += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + if pairwise: + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + Element srcElem1 = gtoh(2 * i < eCount ? + srcReg1.elements[2 * i] : + srcReg2.elements[2 * i - eCount]); + Element srcElem2 = gtoh(2 * i < eCount ? + srcReg1.elements[2 * i + 1] : + srcReg2.elements[2 * i + 1 - eCount]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + else: + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + Element srcElem1 = gtoh(srcReg1.elements[i]); + Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "scalarCheck" : scalarCheck if scalar else "", + "src2Index" : "imm" if byElem else "i" } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegImmOp" if byElem else "DataX2RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + if byElem: + header_output += NeonX2RegImmOpDeclare.subst(iop) + else: + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def threeUnequalRegInstX(name, Name, opClass, types, op, + bigSrc1, bigSrc2, bigDest, readDest, scalar=False, + byElem=False, hi=False): + assert not (scalar and hi) + global header_output, exec_output + src1Cnt = src2Cnt = destCnt = 2 + src1Prefix = src2Prefix = destPrefix = '' + if bigSrc1: + src1Cnt = 4 + src1Prefix = 'Big' + if bigSrc2: + src2Cnt = 4 + src2Prefix = 'Big' + if bigDest: + destCnt = 4 + destPrefix = 'Big' + if byElem: + src2Prefix = 'Full' + eWalkCode = simd64EnabledCheckCode + ''' + %sRegVect srcReg1; + %sRegVect srcReg2; + %sRegVect destReg; + ''' % (src1Prefix, src2Prefix, destPrefix) + srcReg1 = 0 + if hi and not bigSrc1: # long/widening operations + srcReg1 = 2 + for reg in range(src1Cnt): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw); + ''' % { "reg" : reg, "srcReg1" : srcReg1 } + srcReg1 += 1 + srcReg2 = 0 + if (not byElem) and (hi and not bigSrc2): # long/widening operations + srcReg2 = 2 + for reg in range(src2Cnt): + eWalkCode += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw); + ''' % { "reg" : reg, "srcReg2" : srcReg2 } + srcReg2 += 1 + if byElem: + # 2nd operand has to be read fully + for reg in range(src2Cnt, 4): + eWalkCode += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + for reg in range(destCnt): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); + %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); + %(destPrefix)sElement destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, + "destPrefix" : destPrefix, + "scalarCheck" : scalarCheck if scalar else "", + "src2Index" : "imm" if byElem else "i" } + destReg = 0 + if hi and not bigDest: + # narrowing operations + destReg = 2 + for reg in range(destCnt): + eWalkCode += ''' + AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg, "destReg": destReg } + destReg += 1 + if destCnt < 4 and not hi: # zero upper half + for reg in range(destCnt, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegImmOp" if byElem else "DataX2RegOp", + { "code": eWalkCode, + "r_count": 2, + "op_class": opClass }, []) + if byElem: + header_output += NeonX2RegImmOpDeclare.subst(iop) + else: + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, byElem=False, hi=False): + assert not byElem + threeUnequalRegInstX(name, Name, opClass, types, op, + True, True, False, readDest, scalar, byElem, hi) + + def threeRegLongInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, byElem=False, hi=False): + threeUnequalRegInstX(name, Name, opClass, types, op, + False, False, True, readDest, scalar, byElem, hi) + + def threeRegWideInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, byElem=False, hi=False): + assert not byElem + threeUnequalRegInstX(name, Name, opClass, types, op, + True, False, True, readDest, scalar, byElem, hi) + + def twoEqualRegInstX(name, Name, opClass, types, rCount, op, + readDest=False, scalar=False, byElem=False, + hasImm=False, isDup=False): + global header_output, exec_output + assert (not isDup) or byElem + if byElem: + hasImm = True + if isDup: + eWalkCode = simd64EnabledCheckCode + ''' + FullRegVect srcReg1; + RegVect destReg; + ''' + else: + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, destReg; + ''' + for reg in range(4 if isDup else rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + unsigned j = i; + Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[j] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "scalarCheck" : scalarCheck if scalar else "", + "src1Index" : "imm" if byElem else "i" } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp" if hasImm else "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + if hasImm: + header_output += NeonX1RegImmOpDeclare.subst(iop) + else: + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegLongInstX(name, Name, opClass, types, op, readDest=False, + hi=False, hasImm=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1; + BigRegVect destReg; + ''' + destReg = 0 if not hi else 2 + for reg in range(2): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw); + ''' % { "reg" : reg, "destReg": destReg } + destReg += 1 + destReg = 0 if not hi else 2 + if readDest: + for reg in range(4): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + destReg += 1 + readDestCode = '' + if readDest: + readDestCode = 'destReg = gtoh(destReg.elements[i]);' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + Element srcElem1 = gtoh(srcReg1.elements[i]); + BigElement destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + for reg in range(4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp" if hasImm else "DataX1RegOp", + { "code": eWalkCode, + "r_count": 2, + "op_class": opClass }, []) + if hasImm: + header_output += NeonX1RegImmOpDeclare.subst(iop) + else: + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, hi=False, hasImm=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + BigRegVect srcReg1; + RegVect destReg; + ''' + for reg in range(4): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + for reg in range(2): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + else: + eWalkCode += ''' + destReg.elements[0] = 0; + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + BigElement srcElem1 = gtoh(srcReg1.elements[i]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "scalarCheck" : scalarCheck if scalar else "" } + destReg = 0 if not hi else 2 + for reg in range(2): + eWalkCode += ''' + AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg, "destReg": destReg } + destReg += 1 + if not hi: + for reg in range(2, 4): # zero upper half + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp" if hasImm else "DataX1RegOp", + { "code": eWalkCode, + "r_count": 2, + "op_class": opClass }, []) + if hasImm: + header_output += NeonX1RegImmOpDeclare.subst(iop) + else: + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def threeRegScrambleInstX(name, Name, opClass, types, rCount, op): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, srcReg2, destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += op + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def insFromVecElemInstX(name, Name, opClass, types, rCount): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + FullRegVect srcReg1; + RegVect destReg; + ''' + for reg in range(4): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + for reg in range(rCount): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + Element srcElem1 = gtoh(srcReg1.elements[imm2]); + Element destElem = srcElem1; + destReg.elements[imm1] = htog(destElem); + ''' + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1Reg2ImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1Reg2ImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + Element srcElem1 = gtoh(srcReg1.elements[0]); + Element srcElem2 = gtoh(srcReg1.elements[1]); + Element destElem; + %(op)s + destReg.elements[0] = htog(destElem); + ''' % { "op" : op } + destCnt = rCount / 2 + for reg in range(destCnt): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + for reg in range(destCnt, 4): # zero upper half + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegAcrossInstX(name, Name, opClass, types, rCount, op, + doubleDest=False, long=False): + global header_output, exec_output + destPrefix = "Big" if long else "" + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1; + %sRegVect destReg; + ''' % destPrefix + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + destReg.regs[0] = 0; + %(destPrefix)sElement destElem = 0; + for (unsigned i = 0; i < eCount; i++) { + Element srcElem1 = gtoh(srcReg1.elements[i]); + if (i == 0) { + destElem = srcElem1; + } else { + %(op)s + } + } + destReg.elements[0] = htog(destElem); + ''' % { "op" : op, "destPrefix" : destPrefix } + destCnt = 2 if doubleDest else 1 + for reg in range(destCnt): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + for reg in range(destCnt, 4): # zero upper half + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + if long: + exec_output += NeonXUnequalRegOpExecute.subst(iop) + else: + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegCondenseInstX(name, Name, opClass, types, rCount, op, + readDest=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcRegs; + BigRegVect destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + eWalkCode += ''' + for (unsigned i = 0; i < eCount / 2; i++) { + Element srcElem1 = gtoh(srcRegs.elements[2 * i]); + Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); + BigElement destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect destReg; + ''' + if readDest: + for reg in range(rCount): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataXImmOnlyOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegImmOnlyOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def dupGprInstX(name, Name, opClass, types, rCount, gprSpec): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect destReg; + for (unsigned i = 0; i < eCount; i++) { + destReg.elements[i] = htog((Element) %sOp1); + } + ''' % gprSpec + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def extInstX(name, Name, opClass, types, rCount, op): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, srcReg2, destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += op + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX2RegImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + destReg.elements[imm] = htog((Element) %sOp1); + ''' % gprSpec + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def insToGprInstX(name, Name, opClass, types, rCount, gprSpec, + signExt=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + FullRegVect srcReg; + ''' + for reg in range(4): + eWalkCode += ''' + srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if signExt: + eWalkCode += ''' + %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]); + ''' % gprSpec + else: + eWalkCode += ''' + %sDest = srcReg.elements[imm]; + ''' % gprSpec + iop = InstObjParams(name, Name, + "DataX1RegImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount): + global header_output, decoder_output, exec_output + code = simd64EnabledCheckCode + ''' + union + { + uint8_t bytes[64]; + FloatRegBits regs[16]; + } table; + + union + { + uint8_t bytes[%(rCount)d * 4]; + FloatRegBits regs[%(rCount)d]; + } destReg, srcReg2; + + const unsigned length = %(length)d; + const bool isTbl = %(isTbl)s; + ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl } + for reg in range(rCount): + code += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + for reg in range(16): + if reg < length * 4: + code += ''' + table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw); + ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 } + else: + code += ''' + table.regs[%(reg)d] = 0; + ''' % { "reg" : reg } + code += ''' + for (unsigned i = 0; i < sizeof(destReg); i++) { + uint8_t index = srcReg2.bytes[i]; + if (index < 16 * length) { + destReg.bytes[i] = table.bytes[index]; + } else { + if (isTbl) + destReg.bytes[i] = 0; + // else destReg.bytes[i] unchanged + } + } + ''' + for reg in range(rCount): + code += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + code += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegOp", + { "code": code, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + # ABS + absCode = ''' + if (srcElem1 < 0) { + destElem = -srcElem1; + } else { + destElem = srcElem1; + } + ''' + twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode) + twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode) + # ADD + addCode = "destElem = srcElem1 + srcElem2;" + threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode) + threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode) + # ADDHN, ADDHN2 + addhnCode = ''' + destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes, + addhnCode) + threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes, + addhnCode, hi=True) + # ADDP (scalar) + twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4, + addCode) + # ADDP (vector) + threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2, + addCode, pairwise=True) + threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4, + addCode, pairwise=True) + # ADDV + # Note: SimdAddOp can be a bit optimistic here + addAcrossCode = "destElem += srcElem1;" + twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"), + 2, addAcrossCode) + twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4, + addAcrossCode) + # AND + andCode = "destElem = srcElem1 & srcElem2;" + threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode) + threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode) + # BIC (immediate) + bicImmCode = "destElem &= ~imm;" + oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2, + bicImmCode, True) + oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4, + bicImmCode, True) + # BIC (register) + bicCode = "destElem = srcElem1 & ~srcElem2;" + threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode) + threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode) + # BIF + bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);" + threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode, + True) + threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode, + True) + # BIT + bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);" + threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode, + True) + threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode, + True) + # BSL + bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);" + threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode, + True) + threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode, + True) + # CLS + clsCode = ''' + unsigned count = 0; + if (srcElem1 < 0) { + srcElem1 <<= 1; + while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { + count++; + srcElem1 <<= 1; + } + } else { + srcElem1 <<= 1; + while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { + count++; + srcElem1 <<= 1; + } + } + destElem = count; + ''' + twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode) + twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode) + # CLZ + clzCode = ''' + unsigned count = 0; + while (srcElem1 >= 0 && count < sizeof(Element) * 8) { + count++; + srcElem1 <<= 1; + } + destElem = count; + ''' + twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode) + twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode) + # CMEQ (register) + cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2, + cmeqCode) + threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4, + cmeqCode) + # CMEQ (zero) + cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2, + cmeqZeroCode) + twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4, + cmeqZeroCode) + # CMGE (register) + cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode) + threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode) + # CMGE (zero) + cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2, + cmgeZeroCode) + twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4, + cmgeZeroCode) + # CMGT (register) + cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode) + threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode) + # CMGT (zero) + cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2, + cmgtZeroCode) + twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4, + cmgtZeroCode) + # CMHI (register) + threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2, + cmgtCode) + threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4, + cmgtCode) + # CMHS (register) + threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2, + cmgeCode) + threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4, + cmgeCode) + # CMLE (zero) + cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2, + cmleZeroCode) + twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4, + cmleZeroCode) + # CMLT (zero) + cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2, + cmltZeroCode) + twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4, + cmltZeroCode) + # CMTST (register) + tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2, + tstCode) + threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4, + tstCode) + # CNT + cntCode = ''' + unsigned count = 0; + while (srcElem1 && count < sizeof(Element) * 8) { + count += srcElem1 & 0x1; + srcElem1 >>= 1; + } + destElem = count; + ''' + twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode) + twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode) + # DUP (element) + dupCode = "destElem = srcElem1;" + twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2, + dupCode, isDup=True, byElem=True) + twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4, + dupCode, isDup=True, byElem=True) + twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4, + dupCode, isDup=True, byElem=True, scalar=True) + # DUP (general register) + dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W') + dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') + dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X') + # EOR + eorCode = "destElem = srcElem1 ^ srcElem2;" + threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode) + threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode) + # EXT + extCode = ''' + for (unsigned i = 0; i < eCount; i++) { + unsigned index = i + imm; + if (index < eCount) { + destReg.elements[i] = srcReg1.elements[index]; + } else { + index -= eCount; + if (index >= eCount) { + fault = new UndefinedInstruction(machInst, false, mnemonic); + } else { + destReg.elements[i] = srcReg2.elements[index]; + } + } + } + ''' + extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode) + extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode) + # FABD + fpOp = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destElem = %s; + FpscrExc = fpscr; + ''' + fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))" + threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2, + fabdCode) + threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4, + fabdCode) + threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4, + fabdCode, scalar=True) + # FABS + fabsCode = fpOp % "fplibAbs<Element>(srcElem1)" + twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2, + fabsCode) + twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4, + fabsCode) + # FACGE + fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1)," + " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0") + facgeCode = fpCmpAbsOp % "GE" + threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes, + 2, facgeCode) + threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4, + facgeCode) + threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4, + facgeCode, scalar=True) + # FACGT + facgtCode = fpCmpAbsOp % "GT" + threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes, + 2, facgtCode) + threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4, + facgtCode) + threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4, + facgtCode, scalar=True) + # FADD + fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)" + faddCode = fpBinOp % "Add" + threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2, + faddCode) + threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4, + faddCode) + # FADDP (scalar) + twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp", + ("uint32_t",), 2, faddCode) + twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp", + ("uint64_t",), 4, faddCode) + # FADDP (vector) + threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes, + 2, faddCode, pairwise=True) + threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4, + faddCode, pairwise=True) + # FCMEQ (register) + fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?" + " -1 : 0") + fcmeqCode = fpCmpOp % "EQ" + threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmeqCode) + threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqCode) + threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqCode, scalar=True) + # FCMEQ (zero) + fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0" + fcmeqZeroCode = fpCmpZeroOp % "EQ" + twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmeqZeroCode) + twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqZeroCode) + twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqZeroCode, scalar=True) + # FCMGE (register) + fcmgeCode = fpCmpOp % "GE" + threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgeCode) + threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeCode) + threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeCode, scalar=True) + # FCMGE (zero) + fcmgeZeroCode = fpCmpZeroOp % "GE" + twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgeZeroCode) + twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeZeroCode) + twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeZeroCode, scalar=True) + # FCMGT (register) + fcmgtCode = fpCmpOp % "GT" + threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgtCode) + threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtCode) + threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtCode, scalar=True) + # FCMGT (zero) + fcmgtZeroCode = fpCmpZeroOp % "GT" + twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgtZeroCode) + twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtZeroCode) + twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtZeroCode, scalar=True) + # FCMLE (zero) + fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?" + " -1 : 0") + fcmleZeroCode = fpCmpRevZeroOp % "GE" + twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmleZeroCode) + twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmleZeroCode) + twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmleZeroCode, scalar=True) + # FCMLT (zero) + fcmltZeroCode = fpCmpRevZeroOp % "GT" + twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmltZeroCode) + twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmltZeroCode) + twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmltZeroCode, scalar=True) + # FCVTAS + fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>(" + "srcElem1, %s, %s, %s, fpscr)") + fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY") + twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtasCode) + twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4, + fcvtasCode) + twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4, + fcvtasCode, scalar=True) + # FCVTAU + fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY") + twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtauCode) + twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4, + fcvtauCode) + twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4, + fcvtauCode, scalar=True) + # FCVTL, FCVTL2 + fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>(" + "srcElem1, FPCRRounding(fpscr), fpscr)") + twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"), + fcvtlCode) + twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"), + fcvtlCode, hi=True) + # FCVTMS + fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF") + twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtmsCode) + twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4, + fcvtmsCode) + twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4, + fcvtmsCode, scalar=True) + # FCVTMU + fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF") + twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtmuCode) + twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4, + fcvtmuCode) + twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4, + fcvtmuCode, scalar=True) + # FCVTN, FCVTN2 + fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>(" + "srcElem1, FPCRRounding(fpscr), fpscr)") + twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp", + ("uint16_t", "uint32_t"), fcvtnCode) + twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp", + ("uint16_t", "uint32_t"), fcvtnCode, hi=True) + # FCVTNS + fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN") + twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtnsCode) + twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4, + fcvtnsCode) + twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4, + fcvtnsCode, scalar=True) + # FCVTNU + fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN") + twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtnuCode) + twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4, + fcvtnuCode) + twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4, + fcvtnuCode, scalar=True) + # FCVTPS + fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF") + twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtpsCode) + twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4, + fcvtpsCode) + twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4, + fcvtpsCode, scalar=True) + # FCVTPU + fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF") + twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtpuCode) + twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4, + fcvtpuCode) + twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4, + fcvtpuCode, scalar=True) + # FCVTXN, FCVTXN2 + fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>(" + "srcElem1, FPRounding_ODD, fpscr)") + twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes, + fcvtxnCode) + twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes, + fcvtxnCode, hi=True) + twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes, + fcvtxnCode, scalar=True) + # FCVTZS (fixed-point) + fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes, + 2, fcvtzsCode, hasImm=True) + twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4, + fcvtzsCode, hasImm=True) + twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4, + fcvtzsCode, hasImm=True, scalar=True) + # FCVTZS (integer) + fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes, + 2, fcvtzsIntCode) + twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4, + fcvtzsIntCode) + twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4, + fcvtzsIntCode, scalar=True) + # FCVTZU (fixed-point) + fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes, + 2, fcvtzuCode, hasImm=True) + twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4, + fcvtzuCode, hasImm=True) + twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4, + fcvtzuCode, hasImm=True, scalar=True) + # FCVTZU (integer) + fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtzuIntCode) + twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4, + fcvtzuIntCode) + twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4, + fcvtzuIntCode, scalar=True) + # FDIV + fdivCode = fpBinOp % "Div" + threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2, + fdivCode) + threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4, + fdivCode) + # FMAX + fmaxCode = fpBinOp % "Max" + threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2, + fmaxCode) + threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxCode) + # FMAXNM + fmaxnmCode = fpBinOp % "MaxNum" + threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fmaxnmCode) + threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxnmCode) + # FMAXNMP (scalar) + twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fmaxnmCode) + twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fmaxnmCode) + # FMAXNMP (vector) + threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp", + smallFloatTypes, 2, fmaxnmCode, pairwise=True) + threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxnmCode, pairwise=True) + # FMAXNMV + # Note: SimdFloatCmpOp can be a bit optimistic here + fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)" + fmaxnmAcrossCode = fpAcrossOp % "MaxNum" + twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",), + 4, fmaxnmAcrossCode) + # FMAXP (scalar) + twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fmaxCode) + twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fmaxCode) + # FMAXP (vector) + threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fmaxCode, pairwise=True) + threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxCode, pairwise=True) + # FMAXV + # Note: SimdFloatCmpOp can be a bit optimistic here + fmaxAcrossCode = fpAcrossOp % "Max" + twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4, + fmaxAcrossCode) + # FMIN + fminCode = fpBinOp % "Min" + threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2, + fminCode) + threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4, + fminCode) + # FMINNM + fminnmCode = fpBinOp % "MinNum" + threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fminnmCode) + threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4, + fminnmCode) + # FMINNMP (scalar) + twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fminnmCode) + twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fminnmCode) + # FMINNMP (vector) + threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp", + smallFloatTypes, 2, fminnmCode, pairwise=True) + threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4, + fminnmCode, pairwise=True) + # FMINNMV + # Note: SimdFloatCmpOp can be a bit optimistic here + fminnmAcrossCode = fpAcrossOp % "MinNum" + twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",), + 4, fminnmAcrossCode) + # FMINP (scalar) + twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fminCode) + twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fminCode) + # FMINP (vector) + threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fminCode, pairwise=True) + threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4, + fminCode, pairwise=True) + # FMINV + # Note: SimdFloatCmpOp can be a bit optimistic here + fminAcrossCode = fpAcrossOp % "Min" + twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4, + fminAcrossCode) + # FMLA (by element) + fmlaCode = fpOp % ("fplibMulAdd<Element>(" + "destElem, srcElem1, srcElem2, fpscr)") + threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, fmlaCode, True, byElem=True) + threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes, + 4, fmlaCode, True, byElem=True) + threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes, + 4, fmlaCode, True, byElem=True, scalar=True) + # FMLA (vector) + threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes, + 2, fmlaCode, True) + threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4, + fmlaCode, True) + # FMLS (by element) + fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem," + " fplibNeg<Element>(srcElem1), srcElem2, fpscr)") + threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, fmlsCode, True, byElem=True) + threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes, + 4, fmlsCode, True, byElem=True) + threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes, + 4, fmlsCode, True, byElem=True, scalar=True) + # FMLS (vector) + threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes, + 2, fmlsCode, True) + threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4, + fmlsCode, True) + # FMOV + fmovCode = 'destElem = imm;' + oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2, + fmovCode) + oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode) + # FMUL (by element) + fmulCode = fpBinOp % "Mul" + threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp", + smallFloatTypes, 2, fmulCode, byElem=True) + threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4, + fmulCode, byElem=True) + threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4, + fmulCode, byElem=True, scalar=True) + # FMUL (vector) + threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2, + fmulCode) + threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4, + fmulCode) + # FMULX + fmulxCode = fpBinOp % "MulX" + threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes, + 2, fmulxCode) + threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4, + fmulxCode) + threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4, + fmulxCode, scalar=True) + # FMULX (by element) + threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp", + smallFloatTypes, 2, fmulxCode, byElem=True) + threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes, + 4, fmulxCode, byElem=True) + threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes, + 4, fmulxCode, byElem=True, scalar=True) + # FNEG + fnegCode = fpOp % "fplibNeg<Element>(srcElem1)" + twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2, + fnegCode) + twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4, + fnegCode) + # FRECPE + frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)" + twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, frecpeCode) + twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4, + frecpeCode) + twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes, + 4, frecpeCode, scalar=True) + # FRECPS + frecpsCode = fpBinOp % "RecipStepFused" + threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, frecpsCode) + threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes, + 4, frecpsCode) + threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes, + 4, frecpsCode, scalar=True) + # FRECPX + frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)" + twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4, + frecpxCode, scalar=True) + # FRINTA + frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)" + frintaCode = frintCode % ("FPRounding_TIEAWAY", "false") + twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2, + frintaCode) + twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4, + frintaCode) + # FRINTI + frintiCode = frintCode % ("FPCRRounding(fpscr)", "false") + twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2, + frintiCode) + twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4, + frintiCode) + # FRINTM + frintmCode = frintCode % ("FPRounding_NEGINF", "false") + twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2, + frintmCode) + twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4, + frintmCode) + # FRINTN + frintnCode = frintCode % ("FPRounding_TIEEVEN", "false") + twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2, + frintnCode) + twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4, + frintnCode) + # FRINTP + frintpCode = frintCode % ("FPRounding_POSINF", "false") + twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2, + frintpCode) + twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4, + frintpCode) + # FRINTX + frintxCode = frintCode % ("FPCRRounding(fpscr)", "true") + twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2, + frintxCode) + twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4, + frintxCode) + # FRINTZ + frintzCode = frintCode % ("FPRounding_ZERO", "false") + twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2, + frintzCode) + twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4, + frintzCode) + # FRSQRTE + frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)" + twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp", + smallFloatTypes, 2, frsqrteCode) + twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4, + frsqrteCode) + twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4, + frsqrteCode, scalar=True) + # FRSQRTS + frsqrtsCode = fpBinOp % "RSqrtStepFused" + threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp", + smallFloatTypes, 2, frsqrtsCode) + threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes, + 4, frsqrtsCode) + threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes, + 4, frsqrtsCode, scalar=True) + # FSQRT + fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)" + twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2, + fsqrtCode) + twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4, + fsqrtCode) + # FSUB + fsubCode = fpBinOp % "Sub" + threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2, + fsubCode) + threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4, + fsubCode) + # INS (element) + insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4) + # INS (general register) + insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4, + 'W') + insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X') + # MLA (by element) + mlaCode = "destElem += srcElem1 * srcElem2;" + threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True) + threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True) + # MLA (vector) + threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2, + mlaCode, True) + threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4, + mlaCode, True) + # MLS (by element) + mlsCode = "destElem -= srcElem1 * srcElem2;" + threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True) + threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True) + # MLS (vector) + threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2, + mlsCode, True) + threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4, + mlsCode, True) + # MOV (element) -> alias to INS (element) + # MOV (from general) -> alias to INS (general register) + # MOV (scalar) -> alias to DUP (element) + # MOV (to general) -> alias to UMOV + # MOV (vector) -> alias to ORR (register) + # MOVI + movImmCode = "destElem = imm;" + oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2, + movImmCode) + oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4, + movImmCode) + # MUL (by element) + mulCode = "destElem = srcElem1 * srcElem2;" + threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp", + ("uint16_t", "uint32_t"), 2, mulCode, byElem=True) + threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp", + ("uint16_t", "uint32_t"), 4, mulCode, byElem=True) + # MUL (vector) + threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2, + mulCode) + threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4, + mulCode) + # MVN + mvnCode = "destElem = ~srcElem1;" + twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode) + twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode) + # MVNI + mvniCode = "destElem = ~imm;" + oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode) + oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode) + # NEG + negCode = "destElem = -srcElem1;" + twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode) + twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode) + # NOT -> alias to MVN + # ORN + ornCode = "destElem = srcElem1 | ~srcElem2;" + threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode) + threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode) + # ORR (immediate) + orrImmCode = "destElem |= imm;" + oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2, + orrImmCode, True) + oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4, + orrImmCode, True) + # ORR (register) + orrCode = "destElem = srcElem1 | srcElem2;" + threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode) + threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode) + # PMUL + pmulCode = ''' + destElem = 0; + for (unsigned j = 0; j < sizeof(Element) * 8; j++) { + if (bits(srcElem2, j)) + destElem ^= srcElem1 << j; + } + ''' + threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2, + pmulCode) + threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4, + pmulCode) + # PMULL, PMULL2 + # Note: 64-bit PMULL is not available (Crypto. Extension) + pmullCode = ''' + destElem = 0; + for (unsigned j = 0; j < sizeof(Element) * 8; j++) { + if (bits(srcElem2, j)) + destElem ^= (BigElement)srcElem1 << j; + } + ''' + threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode) + threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",), + pmullCode, hi=True) + # RADDHN, RADDHN2 + raddhnCode = ''' + destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + + ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes, + raddhnCode) + threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes, + raddhnCode, hi=True) + # RBIT + rbitCode = ''' + destElem = 0; + Element temp = srcElem1; + for (int i = 0; i < 8 * sizeof(Element); i++) { + destElem = destElem | ((temp & 0x1) << + (8 * sizeof(Element) - 1 - i)); + temp >>= 1; + } + ''' + twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode) + twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode) + # REV16 + rev16Code = ''' + destElem = srcElem1; + unsigned groupSize = ((1 << 1) / sizeof(Element)); + unsigned reverseMask = (groupSize - 1); + j = i ^ reverseMask; + ''' + twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2, + rev16Code) + twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4, + rev16Code) + # REV32 + rev32Code = ''' + destElem = srcElem1; + unsigned groupSize = ((1 << 2) / sizeof(Element)); + unsigned reverseMask = (groupSize - 1); + j = i ^ reverseMask; + ''' + twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"), + 2, rev32Code) + twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"), + 4, rev32Code) + # REV64 + rev64Code = ''' + destElem = srcElem1; + unsigned groupSize = ((1 << 3) / sizeof(Element)); + unsigned reverseMask = (groupSize - 1); + j = i ^ reverseMask; + ''' + twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2, + rev64Code) + twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4, + rev64Code) + # RSHRN, RSHRN2 + rshrnCode = ''' + if (imm > sizeof(srcElem1) * 8) { + destElem = 0; + } else if (imm) { + Element rBit = bits(srcElem1, imm - 1); + destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; + } else { + destElem = srcElem1; + } + ''' + twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes, + rshrnCode, hasImm=True) + twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes, + rshrnCode, hasImm=True, hi=True) + # RSUBHN, RSUBHN2 + rsubhnCode = ''' + destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + + ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes, + rsubhnCode) + threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes, + rsubhnCode, hi=True) + # SABA + abaCode = ''' + destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : + (srcElem2 - srcElem1); + ''' + threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2, + abaCode, True) + threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4, + abaCode, True) + # SABAL, SABAL2 + abalCode = ''' + destElem += (srcElem1 > srcElem2) ? + ((BigElement)srcElem1 - (BigElement)srcElem2) : + ((BigElement)srcElem2 - (BigElement)srcElem1); + ''' + threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes, + abalCode, True) + threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes, + abalCode, True, hi=True) + # SABD + abdCode = ''' + destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : + (srcElem2 - srcElem1); + ''' + threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2, + abdCode) + threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4, + abdCode) + # SABDL, SABDL2 + abdlCode = ''' + destElem = (srcElem1 > srcElem2) ? + ((BigElement)srcElem1 - (BigElement)srcElem2) : + ((BigElement)srcElem2 - (BigElement)srcElem1); + ''' + threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes, + abdlCode, True) + threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes, + abdlCode, True, hi=True) + # SADALP + adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;" + twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2, + adalpCode, True) + twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4, + adalpCode, True) + # SADDL, SADDL2 + addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;" + threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes, + addlwCode) + threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes, + addlwCode, hi=True) + # SADDLP + twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2, + addlwCode) + twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4, + addlwCode) + # SADDLV + # Note: SimdAddOp can be a bit optimistic here + addAcrossLongCode = "destElem += (BigElement)srcElem1;" + twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"), + 2, addAcrossLongCode, long=True) + twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"), + 4, addAcrossLongCode, long=True) + twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4, + addAcrossLongCode, doubleDest=True, long=True) + # SADDW, SADDW2 + threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes, + addlwCode) + threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes, + addlwCode, hi=True) + # SCVTF (fixed-point) + scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm," + " false, FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, + scvtfFixedCode % 32, hasImm=True) + twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4, + scvtfFixedCode % 32, hasImm=True) + twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4, + scvtfFixedCode % 64, hasImm=True) + twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes, + 4, scvtfFixedCode % 32, hasImm=True, scalar=True) + twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4, + scvtfFixedCode % 64, hasImm=True, scalar=True) + # SCVTF (integer) + scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0," + " false, FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, + scvtfIntCode % 32) + twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4, + scvtfIntCode % 32) + twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4, + scvtfIntCode % 64) + twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4, + scvtfIntCode % 32, scalar=True) + twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4, + scvtfIntCode % 64, scalar=True) + # SHADD + haddCode = ''' + Element carryBit = + (((unsigned)srcElem1 & 0x1) + + ((unsigned)srcElem2 & 0x1)) >> 1; + // Use division instead of a shift to ensure the sign extension works + // right. The compiler will figure out if it can be a shift. Mask the + // inputs so they get truncated correctly. + destElem = (((srcElem1 & ~(Element)1) / 2) + + ((srcElem2 & ~(Element)1) / 2)) + carryBit; + ''' + threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2, + haddCode) + threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4, + haddCode) + # SHL + shlCode = ''' + if (imm >= sizeof(Element) * 8) + destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; + else + destElem = srcElem1 << imm; + ''' + twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode, + hasImm=True) + twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode, + hasImm=True) + # SHLL, SHLL2 + shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);" + twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode) + twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode, + hi=True) + # SHRN, SHRN2 + shrnCode = ''' + if (imm >= sizeof(srcElem1) * 8) { + destElem = 0; + } else { + destElem = srcElem1 >> imm; + } + ''' + twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes, + shrnCode, hasImm=True) + twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes, + shrnCode, hasImm=True, hi=True) + # SHSUB + hsubCode = ''' + Element borrowBit = + (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; + // Use division instead of a shift to ensure the sign extension works + // right. The compiler will figure out if it can be a shift. Mask the + // inputs so they get truncated correctly. + destElem = (((srcElem1 & ~(Element)1) / 2) - + ((srcElem2 & ~(Element)1) / 2)) - borrowBit; + ''' + threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2, + hsubCode) + threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4, + hsubCode) + # SLI + sliCode = ''' + if (imm >= sizeof(Element) * 8) + destElem = destElem; + else + destElem = (srcElem1 << imm) | (destElem & mask(imm)); + ''' + twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode, + True, hasImm=True) + twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode, + True, hasImm=True) + # SMAX + maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;" + threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2, + maxCode) + threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4, + maxCode) + # SMAXP + threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2, + maxCode, pairwise=True) + threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4, + maxCode, pairwise=True) + # SMAXV + maxAcrossCode = ''' + if (i == 0 || srcElem1 > destElem) + destElem = srcElem1; + ''' + twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"), + 2, maxAcrossCode) + twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4, + maxAcrossCode) + # SMIN + minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;" + threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2, + minCode) + threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4, + minCode) + # SMINP + threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2, + minCode, pairwise=True) + threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4, + minCode, pairwise=True) + # SMINV + minAcrossCode = ''' + if (i == 0 || srcElem1 < destElem) + destElem = srcElem1; + ''' + twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"), + 2, minAcrossCode) + twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4, + minAcrossCode) + # SMLAL, SMLAL2 (by element) + mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;" + threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp", + ("int16_t", "int32_t"), mlalCode, True, byElem=True) + threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp", + ("int16_t", "int32_t"), mlalCode, True, byElem=True, + hi=True) + # SMLAL, SMLAL2 (vector) + threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes, + mlalCode, True) + threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes, + mlalCode, True, hi=True) + # SMLSL, SMLSL2 (by element) + mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;" + threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes, + mlslCode, True, byElem=True) + threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp", + smallSignedTypes, mlslCode, True, byElem=True, hi=True) + # SMLSL, SMLSL2 (vector) + threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes, + mlslCode, True) + threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes, + mlslCode, True, hi=True) + # SMOV + insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4, + 'W', True) + insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X', + True) + # SMULL, SMULL2 (by element) + mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;" + threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes, + mullCode, byElem=True) + threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes, + mullCode, byElem=True, hi=True) + # SMULL, SMULL2 (vector) + threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes, + mullCode) + threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes, + mullCode, hi=True) + # SQABS + sqabsCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { + fpscr.qc = 1; + destElem = ~srcElem1; + } else if (srcElem1 < 0) { + destElem = -srcElem1; + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2, + sqabsCode) + twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4, + sqabsCode) + twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4, + sqabsCode, scalar=True) + # SQADD + sqaddCode = ''' + destElem = srcElem1 + srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + bool negDest = (destElem < 0); + bool negSrc1 = (srcElem1 < 0); + bool negSrc2 = (srcElem2 < 0); + if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (negDest) + destElem -= 1; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2, + sqaddCode) + threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4, + sqaddCode) + threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4, + sqaddCode, scalar=True) + # SQDMLAL, SQDMLAL2 (by element) + qdmlalCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); + Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); + Element halfNeg = maxNeg / 2; + if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || + (srcElem1 == halfNeg && srcElem2 == maxNeg) || + (srcElem1 == maxNeg && srcElem2 == halfNeg)) { + midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); + fpscr.qc = 1; + } + bool negPreDest = ltz(destElem); + destElem += midElem; + bool negDest = ltz(destElem); + bool negMid = ltz(midElem); + if (negPreDest == negMid && negMid != negDest) { + destElem = mask(sizeof(BigElement) * 8 - 1); + if (negPreDest) + destElem = ~destElem; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, byElem=True) + threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, + hi=True) + threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, + scalar=True) + # SQDMLAL, SQDMLAL2 (vector) + threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True) + threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, hi=True) + threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, scalar=True) + # SQDMLSL, SQDMLSL2 (by element) + qdmlslCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); + Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); + Element halfNeg = maxNeg / 2; + if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || + (srcElem1 == halfNeg && srcElem2 == maxNeg) || + (srcElem1 == maxNeg && srcElem2 == halfNeg)) { + midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); + fpscr.qc = 1; + } + bool negPreDest = ltz(destElem); + destElem -= midElem; + bool negDest = ltz(destElem); + bool posMid = ltz((BigElement)-midElem); + if (negPreDest == posMid && posMid != negDest) { + destElem = mask(sizeof(BigElement) * 8 - 1); + if (negPreDest) + destElem = ~destElem; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, byElem=True) + threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, + hi=True) + threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, + scalar=True) + # SQDMLSL, SQDMLSL2 (vector) + threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True) + threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, hi=True) + threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, scalar=True) + # SQDMULH (by element) + sqdmulhCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> + (sizeof(Element) * 8); + if (srcElem1 == srcElem2 && + srcElem1 == (Element)((Element)1 << + (sizeof(Element) * 8 - 1))) { + destElem = ~srcElem1; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True) + threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True) + threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True, + scalar=True) + # SQDMULH (vector) + threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqdmulhCode) + threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode) + threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True) + # SQDMULL, SQDMULL2 (by element) + qdmullCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); + if (srcElem1 == srcElem2 && + srcElem1 == (Element)((Element)1 << + (Element)(sizeof(Element) * 8 - 1))) { + destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, byElem=True) + threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, byElem=True, + hi=True) + threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, byElem=True, + scalar=True) + # SQDMULL, SQDMULL2 (vector) + threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True) + threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, hi=True) + threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, scalar=True) + # SQNEG + sqnegCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { + fpscr.qc = 1; + destElem = ~srcElem1; + } else { + destElem = -srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2, + sqnegCode) + twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4, + sqnegCode) + twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4, + sqnegCode, scalar=True) + # SQRDMULH (by element) + sqrdmulhCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + + ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> + (sizeof(Element) * 8); + Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); + Element halfNeg = maxNeg / 2; + if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || + (srcElem1 == halfNeg && srcElem2 == maxNeg) || + (srcElem1 == maxNeg && srcElem2 == halfNeg)) { + if (destElem < 0) { + destElem = mask(sizeof(Element) * 8 - 1); + } else { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + } + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True) + threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True) + threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True, + scalar=True) + # SQRDMULH (vector) + threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqrdmulhCode) + threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode) + threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True) + # SQRSHL + sqrshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + Element rBit = 0; + if (shiftAmt <= sizeof(Element) * 8) + rBit = bits(srcElem1, shiftAmt - 1); + if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) + rBit = 1; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (srcElem1 < 0 && destElem >= 0) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + destElem += rBit; + } else if (shiftAmt > 0) { + bool sat = false; + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) + sat = true; + else + destElem = 0; + } else { + if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - 1 - shiftAmt) != + ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { + sat = true; + } else { + destElem = srcElem1 << shiftAmt; + } + } + if (sat) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2, + sqrshlCode) + threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4, + sqrshlCode) + threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4, + sqrshlCode, scalar=True) + # SQRSHRN, SQRSHRN2 + sqrshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0 && srcElem1 != -1) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = (srcElem1 >> (imm - 1)); + uint64_t rBit = mid & 0x1; + mid >>= 1; + mid |= -(mid & ((BigElement)1 << + (sizeof(BigElement) * 8 - 1 - imm))); + mid += rBit; + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + if (srcElem1 != (Element)srcElem1) { + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes, + sqrshrnCode, hasImm=True) + twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes, + sqrshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes, + sqrshrnCode, hasImm=True, scalar=True) + # SQRSHRUN, SQRSHRUN2 + sqrshrunCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = (srcElem1 >> (imm - 1)); + uint64_t rBit = mid & 0x1; + mid >>= 1; + mid |= -(mid & ((BigElement)1 << + (sizeof(BigElement) * 8 - 1 - imm))); + mid += rBit; + if (bits(mid, sizeof(BigElement) * 8 - 1, + sizeof(Element) * 8) != 0) { + if (srcElem1 < 0) { + destElem = 0; + } else { + destElem = mask(sizeof(Element) * 8); + } + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + if (srcElem1 < 0) { + fpscr.qc = 1; + destElem = 0; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes, + sqrshrunCode, hasImm=True) + twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp", + smallSignedTypes, sqrshrunCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp", + smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True) + # SQSHL (immediate) + sqshlImmCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (srcElem1 > 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = 0; + } + } else if (imm) { + destElem = (srcElem1 << imm); + uint64_t topBits = bits((uint64_t)srcElem1, + sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - 1 - imm); + if (topBits != 0 && topBits != mask(imm + 1)) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (srcElem1 > 0) + destElem = ~destElem; + fpscr.qc = 1; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2, + sqshlImmCode, hasImm=True) + twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4, + sqshlImmCode, hasImm=True) + twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4, + sqshlImmCode, hasImm=True, scalar=True) + # SQSHL (register) + sqshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (srcElem1 < 0 && destElem >= 0) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + } else if (shiftAmt > 0) { + bool sat = false; + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) + sat = true; + else + destElem = 0; + } else { + if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - 1 - shiftAmt) != + ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { + sat = true; + } else { + destElem = srcElem1 << shiftAmt; + } + } + if (sat) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2, + sqshlCode) + threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4, + sqshlCode) + threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4, + sqshlCode, scalar=True) + # SQSHLU + sqshluCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm >= sizeof(Element) * 8) { + if (srcElem1 < 0) { + destElem = 0; + fpscr.qc = 1; + } else if (srcElem1 > 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else if (imm) { + destElem = (srcElem1 << imm); + uint64_t topBits = bits((uint64_t)srcElem1, + sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - imm); + if (srcElem1 < 0) { + destElem = 0; + fpscr.qc = 1; + } else if (topBits != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } + } else { + if (srcElem1 < 0) { + fpscr.qc = 1; + destElem = 0; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2, + sqshluCode, hasImm=True) + twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4, + sqshluCode, hasImm=True) + twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4, + sqshluCode, hasImm=True, scalar=True) + # SQSHRN, SQSHRN2 + sqshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0 && srcElem1 != -1) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); + mid |= -(mid & ((BigElement)1 << + (sizeof(BigElement) * 8 - 1 - imm))); + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes, + sqshrnCode, hasImm=True) + twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes, + sqshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes, + sqshrnCode, hasImm=True, scalar=True) + # SQSHRUN, SQSHRUN2 + sqshrunCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); + if (bits(mid, sizeof(BigElement) * 8 - 1, + sizeof(Element) * 8) != 0) { + if (srcElem1 < 0) { + destElem = 0; + } else { + destElem = mask(sizeof(Element) * 8); + } + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes, + sqshrunCode, hasImm=True) + twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes, + sqshrunCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes, + sqshrunCode, hasImm=True, scalar=True) + # SQSUB + sqsubCode = ''' + destElem = srcElem1 - srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + bool negDest = (destElem < 0); + bool negSrc1 = (srcElem1 < 0); + bool posSrc2 = (srcElem2 >= 0); + if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (negDest) + destElem -= 1; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2, + sqsubCode) + threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4, + sqsubCode) + threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4, + sqsubCode, scalar=True) + # SQXTN, SQXTN2 + sqxtnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = srcElem1; + if ((BigElement)destElem != srcElem1) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes, + sqxtnCode) + twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes, + sqxtnCode, hi=True) + twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes, + sqxtnCode, scalar=True) + # SQXTUN, SQXTUN2 + sqxtunCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = srcElem1; + if (srcElem1 < 0 || + ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8); + if (srcElem1 < 0) + destElem = ~destElem; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes, + sqxtunCode) + twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes, + sqxtunCode, hi=True) + twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes, + sqxtunCode, scalar=True) + # SRHADD + rhaddCode = ''' + Element carryBit = + (((unsigned)srcElem1 & 0x1) + + ((unsigned)srcElem2 & 0x1) + 1) >> 1; + // Use division instead of a shift to ensure the sign extension works + // right. The compiler will figure out if it can be a shift. Mask the + // inputs so they get truncated correctly. + destElem = (((srcElem1 & ~(Element)1) / 2) + + ((srcElem2 & ~(Element)1) / 2)) + carryBit; + ''' + threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2, + rhaddCode) + threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4, + rhaddCode) + # SRI + sriCode = ''' + if (imm >= sizeof(Element) * 8) + destElem = destElem; + else + destElem = (srcElem1 >> imm) | + (destElem & ~mask(sizeof(Element) * 8 - imm)); + ''' + twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode, + True, hasImm=True) + twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode, + True, hasImm=True) + # SRSHL + rshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + Element rBit = 0; + if (shiftAmt <= sizeof(Element) * 8) + rBit = bits(srcElem1, shiftAmt - 1); + if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) + rBit = 1; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (ltz(srcElem1) && !ltz(destElem)) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + destElem += rBit; + } else if (shiftAmt > 0) { + if (shiftAmt >= sizeof(Element) * 8) { + destElem = 0; + } else { + destElem = srcElem1 << shiftAmt; + } + } else { + destElem = srcElem1; + } + ''' + threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2, + rshlCode) + threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4, + rshlCode) + # SRSHR + rshrCode = ''' + if (imm > sizeof(srcElem1) * 8) { + destElem = 0; + } else if (imm) { + Element rBit = bits(srcElem1, imm - 1); + destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; + } else { + destElem = srcElem1; + } + ''' + twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2, + rshrCode, hasImm=True) + twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4, + rshrCode, hasImm=True) + # SRSRA + rsraCode = ''' + if (imm > sizeof(srcElem1) * 8) { + destElem += 0; + } else if (imm) { + Element rBit = bits(srcElem1, imm - 1); + destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; + } else { + destElem += srcElem1; + } + ''' + twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2, + rsraCode, True, hasImm=True) + twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4, + rsraCode, True, hasImm=True) + # SSHL + shlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (ltz(srcElem1) && !ltz(destElem)) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + } else { + if (shiftAmt >= sizeof(Element) * 8) { + destElem = 0; + } else { + destElem = srcElem1 << shiftAmt; + } + } + ''' + threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2, + shlCode) + threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4, + shlCode) + # SSHLL, SSHLL2 + shllCode = ''' + if (imm >= sizeof(destElem) * 8) { + destElem = 0; + } else { + destElem = (BigElement)srcElem1 << imm; + } + ''' + twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes, + shllCode, hasImm=True) + twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes, + shllCode, hasImm=True, hi=True) + # SSHR + shrCode = ''' + if (imm >= sizeof(srcElem1) * 8) { + if (ltz(srcElem1)) + destElem = -1; + else + destElem = 0; + } else { + destElem = srcElem1 >> imm; + } + ''' + twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode, + hasImm=True) + twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode, + hasImm=True) + # SSRA + sraCode = ''' + Element mid;; + if (imm >= sizeof(srcElem1) * 8) { + mid = ltz(srcElem1) ? -1 : 0; + } else { + mid = srcElem1 >> imm; + if (ltz(srcElem1) && !ltz(mid)) { + mid |= -(mid & ((Element)1 << + (sizeof(Element) * 8 - 1 - imm))); + } + } + destElem += mid; + ''' + twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode, + True, hasImm=True) + twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode, + True, hasImm=True) + # SSUBL + sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;" + threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes, + sublwCode) + threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes, + sublwCode, hi=True) + # SSUBW + threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes, + sublwCode) + threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes, + sublwCode, hi=True) + # SUB + subCode = "destElem = srcElem1 - srcElem2;" + threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode) + threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode) + # SUBHN, SUBHN2 + subhnCode = ''' + destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes, + subhnCode) + threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes, + subhnCode, hi=True) + # SUQADD + suqaddCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + Element tmp = destElem + srcElem1; + if (bits(destElem, sizeof(Element) * 8 - 1) == 0) { + if (bits(tmp, sizeof(Element) * 8 - 1) == 1 || + tmp < srcElem1 || tmp < destElem) { + destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; + fpscr.qc = 1; + } else { + destElem = tmp; + } + } else { + Element absDestElem = (~destElem) + 1; + if (absDestElem < srcElem1) { + // Still check for positive sat., no need to check for negative sat. + if (bits(tmp, sizeof(Element) * 8 - 1) == 1) { + destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; + fpscr.qc = 1; + } else { + destElem = tmp; + } + } else { + destElem = tmp; + } + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2, + suqaddCode, True) + twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4, + suqaddCode, True) + twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4, + suqaddCode, True, scalar=True) + # SXTL -> alias to SSHLL + # TBL + tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2) + tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4) + tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2) + tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4) + tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2) + tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4) + tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2) + tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4) + # TBX + tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2) + tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4) + tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2) + tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4) + tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2) + tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4) + tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2) + tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4) + # TRN1 + trnCode = ''' + unsigned part = %s; + for (unsigned i = 0; i < eCount / 2; i++) { + destReg.elements[2 * i] = srcReg1.elements[2 * i + part]; + destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part]; + } + ''' + threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2, + trnCode % "0") + threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4, + trnCode % "0") + # TRN2 + threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2, + trnCode % "1") + threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4, + trnCode % "1") + # UABA + threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2, + abaCode, True) + threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4, + abaCode, True) + # UABAL, UABAL2 + threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes, + abalCode, True) + threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes, + abalCode, True, hi=True) + # UABD + threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2, + abdCode) + threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4, + abdCode) + # UABDL, UABDL2 + threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes, + abdlCode, True) + threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes, + abdlCode, True, hi=True) + # UADALP + twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes, + 2, adalpCode, True) + twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes, + 4, adalpCode, True) + # UADDL, UADDL2 + threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes, + addlwCode) + threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes, + addlwCode, hi=True) + # UADDLP + twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes, + 2, addlwCode) + twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes, + 4, addlwCode) + # UADDLV + twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp", + ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True) + twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp", + ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True) + twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4, + addAcrossLongCode, doubleDest=True, long=True) + # UADDW + threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes, + addlwCode) + threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes, + addlwCode, hi=True) + # UCVTF (fixed-point) + ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true," + " FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, + ucvtfFixedCode, hasImm=True) + twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4, + ucvtfFixedCode, hasImm=True) + twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4, + ucvtfFixedCode, hasImm=True, scalar=True) + # UCVTF (integer) + ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true," + " FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, + ucvtfIntCode) + twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4, + ucvtfIntCode) + twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4, + ucvtfIntCode, scalar=True) + # UHADD + threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2, + haddCode) + threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4, + haddCode) + # UHSUB + threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2, + hsubCode) + threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4, + hsubCode) + # UMAX + threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2, + maxCode) + threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4, + maxCode) + # UMAXP + threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2, + maxCode, pairwise=True) + threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4, + maxCode, pairwise=True) + # UMAXV + twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), + 2, maxAcrossCode) + twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4, + maxAcrossCode) + # UMIN + threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2, + minCode) + threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4, + minCode) + # UMINP + threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2, + minCode, pairwise=True) + threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4, + minCode, pairwise=True) + # UMINV + twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), + 2, minAcrossCode) + twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4, + minAcrossCode) + # UMLAL (by element) + threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp", + smallUnsignedTypes, mlalCode, True, byElem=True) + threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp", + smallUnsignedTypes, mlalCode, True, byElem=True, hi=True) + # UMLAL (vector) + threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes, + mlalCode, True) + threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes, + mlalCode, True, hi=True) + # UMLSL (by element) + threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp", + smallUnsignedTypes, mlslCode, True, byElem=True) + threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp", + smallUnsignedTypes, mlslCode, True, byElem=True, hi=True) + # UMLSL (vector) + threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes, + mlslCode, True) + threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes, + mlslCode, True, hi=True) + # UMOV + insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') + insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X') + # UMULL, UMULL2 (by element) + threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes, + mullCode, byElem=True) + threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes, + mullCode, byElem=True, hi=True) + # UMULL, UMULL2 (vector) + threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes, + mullCode) + threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes, + mullCode, hi=True) + # UQADD + uqaddCode = ''' + destElem = srcElem1 + srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (destElem < srcElem1 || destElem < srcElem2) { + destElem = (Element)(-1); + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2, + uqaddCode) + threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4, + uqaddCode) + threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4, + uqaddCode, scalar=True) + # UQRSHL + uqrshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + Element rBit = 0; + if (shiftAmt <= sizeof(Element) * 8) + rBit = bits(srcElem1, shiftAmt - 1); + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + destElem += rBit; + } else { + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else { + if (bits(srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - shiftAmt)) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = srcElem1 << shiftAmt; + } + } + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes, + 2, uqrshlCode) + threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4, + uqrshlCode) + threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4, + uqrshlCode, scalar=True) + # UQRSHRN + uqrshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = (srcElem1 >> (imm - 1)); + uint64_t rBit = mid & 0x1; + mid >>= 1; + mid += rBit; + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + if (srcElem1 != (Element)srcElem1) { + destElem = mask(sizeof(Element) * 8 - 1); + fpscr.qc = 1; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes, + uqrshrnCode, hasImm=True) + twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp", + smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp", + smallUnsignedTypes, uqrshrnCode, hasImm=True, + scalar=True) + # UQSHL (immediate) + uqshlImmCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else if (imm) { + destElem = (srcElem1 << imm); + uint64_t topBits = bits((uint64_t)srcElem1, + sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - imm); + if (topBits != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2, + uqshlImmCode, hasImm=True) + twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4, + uqshlImmCode, hasImm=True) + twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4, + uqshlImmCode, hasImm=True, scalar=True) + # UQSHL (register) + uqshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + } else if (shiftAmt > 0) { + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else { + if (bits(srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - shiftAmt)) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = srcElem1 << shiftAmt; + } + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2, + uqshlCode) + threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4, + uqshlCode) + threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4, + uqshlCode, scalar=True) + # UQSHRN, UQSHRN2 + uqshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes, + uqshrnCode, hasImm=True) + twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes, + uqshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes, + uqshrnCode, hasImm=True, scalar=True) + # UQSUB + uqsubCode = ''' + destElem = srcElem1 - srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (destElem > srcElem1) { + destElem = 0; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2, + uqsubCode) + threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4, + uqsubCode) + threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4, + uqsubCode, scalar=True) + # UQXTN + uqxtnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = srcElem1; + if ((BigElement)destElem != srcElem1) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8); + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes, + uqxtnCode) + twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes, + uqxtnCode, hi=True) + twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes, + uqxtnCode, scalar=True) + # URECPE + urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);" + twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2, + urecpeCode) + twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4, + urecpeCode) + # URHADD + threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes, + 2, rhaddCode) + threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes, + 4, rhaddCode) + # URSHL + threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2, + rshlCode) + threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4, + rshlCode) + # URSHR + twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2, + rshrCode, hasImm=True) + twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4, + rshrCode, hasImm=True) + # URSQRTE + ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);" + twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2, + ursqrteCode) + twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4, + ursqrteCode) + # URSRA + twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2, + rsraCode, True, hasImm=True) + twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4, + rsraCode, True, hasImm=True) + # USHL + threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2, + shlCode) + threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4, + shlCode) + # USHLL, USHLL2 + twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes, + shllCode, hasImm=True) + twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes, + shllCode, hi=True, hasImm=True) + # USHR + twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2, + shrCode, hasImm=True) + twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4, + shrCode, hasImm=True) + # USQADD + usqaddCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + Element tmp = destElem + srcElem1; + if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) { + if (tmp < srcElem1 || tmp < destElem) { + destElem = (Element)(-1); + fpscr.qc = 1; + } else { + destElem = tmp; + } + } else { + Element absSrcElem1 = (~srcElem1) + 1; + if (absSrcElem1 > destElem) { + destElem = 0; + fpscr.qc = 1; + } else { + destElem = tmp; + } + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2, + usqaddCode, True) + twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4, + usqaddCode, True) + twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4, + usqaddCode, True, scalar=True) + # USRA + twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2, + sraCode, True, hasImm=True) + twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4, + sraCode, True, hasImm=True) + # USUBL + threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes, + sublwCode) + threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes, + sublwCode, hi=True) + # USUBW + threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes, + sublwCode) + threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes, + sublwCode, hi=True) + # UXTL -> alias to USHLL + # UZP1 + uzpCode = ''' + unsigned part = %s; + for (unsigned i = 0; i < eCount / 2; i++) { + destReg.elements[i] = srcReg1.elements[2 * i + part]; + destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part]; + } + ''' + threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2, + uzpCode % "0") + threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4, + uzpCode % "0") + # UZP2 + threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2, + uzpCode % "1") + threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4, + uzpCode % "1") + # XTN, XTN2 + xtnCode = "destElem = srcElem1;" + twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode) + twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes, + xtnCode, hi=True) + # ZIP1 + zipCode = ''' + unsigned base = %s; + for (unsigned i = 0; i < eCount / 2; i++) { + destReg.elements[2 * i] = srcReg1.elements[base + i]; + destReg.elements[2 * i + 1] = srcReg2.elements[base + i]; + } + ''' + threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2, + zipCode % "0") + threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4, + zipCode % "0") + # ZIP2 + threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2, + zipCode % "eCount / 2") + threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4, + zipCode % "eCount / 2") + +}}; diff --git a/src/arch/arm/isa/insts/neon64_mem.isa b/src/arch/arm/isa/insts/neon64_mem.isa new file mode 100644 index 000000000..32a37f87e --- /dev/null +++ b/src/arch/arm/isa/insts/neon64_mem.isa @@ -0,0 +1,471 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Mbou Eyole +// Giacomo Gabrielli + +let {{ + + header_output = '' + decoder_output = '' + exec_output = '' + + def mkMemAccMicroOp(name): + global header_output, decoder_output, exec_output + SPAlignmentCheckCodeNeon = ''' + if (baseIsSP && bits(XURa, 3, 0) && + SPAlignmentCheckEnabled(xc->tcBase())) { + return new SPAlignmentFault(); + } + ''' + eaCode = SPAlignmentCheckCodeNeon + ''' + EA = XURa + imm; + ''' + memDecl = ''' + const int MaxNumBytes = 16; + union MemUnion { + uint8_t bytes[MaxNumBytes]; + uint32_t floatRegBits[MaxNumBytes / 4]; + }; + ''' + + # Do endian conversion for all the elements + convCode = ''' + VReg x = {0, 0}; + + x.lo = (((XReg) memUnion.floatRegBits[1]) << 32) | + (XReg) memUnion.floatRegBits[0]; + x.hi = (((XReg) memUnion.floatRegBits[3]) << 32) | + (XReg) memUnion.floatRegBits[2]; + + const unsigned eCount = 16 / (1 << eSize); + + if (isBigEndian64(xc->tcBase())) { + for (unsigned i = 0; i < eCount; i++) { + switch (eSize) { + case 0x3: // 64-bit + writeVecElem(&x, (XReg) gtobe( + (uint64_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x2: // 32-bit + writeVecElem(&x, (XReg) gtobe( + (uint32_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x1: // 16-bit + writeVecElem(&x, (XReg) gtobe( + (uint16_t) readVecElem(x, i, eSize)), i, eSize); + break; + default: // 8-bit + break; // Nothing to do here + } + } + } else { + for (unsigned i = 0; i < eCount; i++) { + switch (eSize) { + case 0x3: // 64-bit + writeVecElem(&x, (XReg) gtole( + (uint64_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x2: // 32-bit + writeVecElem(&x, (XReg) gtole( + (uint32_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x1: // 16-bit + writeVecElem(&x, (XReg) gtole( + (uint16_t) readVecElem(x, i, eSize)), i, eSize); + break; + default: // 8-bit + break; // Nothing to do here + } + } + } + + memUnion.floatRegBits[0] = (uint32_t) x.lo; + memUnion.floatRegBits[1] = (uint32_t) (x.lo >> 32); + memUnion.floatRegBits[2] = (uint32_t) x.hi; + memUnion.floatRegBits[3] = (uint32_t) (x.hi >> 32); + ''' + + # Offload everything into registers + regSetCode = '' + for reg in range(4): + regSetCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(memUnion.floatRegBits[%(reg)d]); + ''' % { 'reg' : reg } + + # Pull everything in from registers + regGetCode = '' + for reg in range(4): + regGetCode += ''' + memUnion.floatRegBits[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { 'reg' : reg } + + loadMemAccCode = convCode + regSetCode + storeMemAccCode = regGetCode + convCode + + loadIop = InstObjParams(name + 'ld', + 'MicroNeonLoad64', + 'MicroNeonMemOp', + { 'mem_decl' : memDecl, + 'memacc_code' : loadMemAccCode, + 'ea_code' : simd64EnabledCheckCode + eaCode, + }, + [ 'IsMicroop', 'IsMemRef', 'IsLoad' ]) + storeIop = InstObjParams(name + 'st', + 'MicroNeonStore64', + 'MicroNeonMemOp', + { 'mem_decl' : memDecl, + 'memacc_code' : storeMemAccCode, + 'ea_code' : simd64EnabledCheckCode + eaCode, + }, + [ 'IsMicroop', 'IsMemRef', 'IsStore' ]) + + exec_output += NeonLoadExecute64.subst(loadIop) + \ + NeonLoadInitiateAcc64.subst(loadIop) + \ + NeonLoadCompleteAcc64.subst(loadIop) + \ + NeonStoreExecute64.subst(storeIop) + \ + NeonStoreInitiateAcc64.subst(storeIop) + \ + NeonStoreCompleteAcc64.subst(storeIop) + header_output += MicroNeonMemDeclare64.subst(loadIop) + \ + MicroNeonMemDeclare64.subst(storeIop) + + def mkMarshalMicroOp(name, Name): + global header_output, decoder_output, exec_output + + getInputCodeOp1L = '' + for v in range(4): + for p in range(4): + getInputCodeOp1L += ''' + writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)d_uw, + %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + getInputCodeOp1S = '' + for v in range(4): + for p in range(4): + getInputCodeOp1S += ''' + writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)dS_uw, + %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + if name == 'deint_neon_uop': + + eCode = ''' + VReg input[4]; // input data from scratch area + VReg output[2]; // output data to arch. SIMD regs + VReg temp; + temp.lo = 0; + temp.hi = 0; + ''' + for p in range(4): + eCode += ''' + writeVecElem(&temp, (XReg) AA64FpDestP%(p)dV1L_uw, %(p)d, 0x2); + ''' % { 'p' : p } + eCode += getInputCodeOp1L + + # Note that numRegs is not always the same as numStructElems; in + # particular, for LD1/ST1, numStructElems is 1 but numRegs can be + # 1, 2, 3 or 4 + + eCode += ''' + output[0].lo = 0; + output[0].hi = 0; + output[1].lo = 0; + output[1].hi = 0; + + int eCount = dataSize / (8 << eSize); + int eSizeBytes = 1 << eSize; // element size in bytes + int numBytes = step * dataSize / 4; + int totNumBytes = numRegs * dataSize / 8; + + int structElemNo, pos, a, b; + XReg data; + + for (int r = 0; r < 2; ++r) { + for (int i = 0; i < eCount; ++i) { + if (numBytes < totNumBytes) { + structElemNo = r + (step * 2); + if (numStructElems == 1) { + pos = (eSizeBytes * i) + + (eCount * structElemNo * eSizeBytes); + } else { + pos = (numStructElems * eSizeBytes * i) + + (structElemNo * eSizeBytes); + } + a = pos / 16; + b = (pos % 16) / eSizeBytes; + data = (XReg) readVecElem(input[a], (XReg) b, + eSize); + writeVecElem(&output[r], data, i, eSize); + numBytes += eSizeBytes; + } + } + } + ''' + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV0L_uw = (uint32_t) readVecElem(output[0], + %(p)d, 0x2); + ''' % { 'p' : p } + eCode += ''' + if ((numRegs % 2 == 0) || (numRegs == 3 && step == 0)) { + ''' + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV1L_uw = (uint32_t) readVecElem( + output[1], %(p)d, 0x2); + ''' % { 'p' : p } + eCode += ''' + } else { + ''' + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV1L_uw = (uint32_t) readVecElem(temp, + %(p)d, 0x2); + ''' % { 'p' : p } + eCode += ''' + } + ''' + + iop = InstObjParams(name, Name, 'MicroNeonMixOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + elif name == 'int_neon_uop': + + eCode = ''' + VReg input[4]; // input data from arch. SIMD regs + VReg output[2]; // output data to scratch area + ''' + + eCode += getInputCodeOp1S + + # Note that numRegs is not always the same as numStructElems; in + # particular, for LD1/ST1, numStructElems is 1 but numRegs can be + # 1, 2, 3 or 4 + + eCode += ''' + int eCount = dataSize / (8 << eSize); + int eSizeBytes = 1 << eSize; + int totNumBytes = numRegs * dataSize / 8; + int numOutputElems = 128 / (8 << eSize); + int stepOffset = step * 32; + + for (int i = 0; i < 2; ++i) { + output[i].lo = 0; + output[i].hi = 0; + } + + int r = 0, k = 0, i, j; + XReg data; + + for (int pos = stepOffset; pos < 32 + stepOffset; + pos += eSizeBytes) { + if (pos < totNumBytes) { + if (numStructElems == 1) { + i = (pos / eSizeBytes) % eCount; + j = pos / (eCount * eSizeBytes); + } else { + i = pos / (numStructElems * eSizeBytes); + j = (pos % (numStructElems * eSizeBytes)) / + eSizeBytes; + } + data = (XReg) readVecElem(input[j], (XReg) i, eSize); + writeVecElem(&output[r], data, k, eSize); + k++; + if (k == numOutputElems){ + k = 0; + ++r; + } + } + } + ''' + for v in range(2): + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV%(v)d_uw = (uint32_t) readVecElem( + output[%(v)d], %(p)d, 0x2); + ''' % { 'v': v, 'p': p} + + iop = InstObjParams(name, Name, 'MicroNeonMixOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + elif name == 'unpack_neon_uop': + + eCode = ''' + VReg input[4]; //input data from scratch area + VReg output[2]; //output data to arch. SIMD regs + ''' + + eCode += getInputCodeOp1L + + # Fill output regs with register data initially. Note that + # elements in output register outside indexed lanes are left + # untouched + for v in range(2): + for p in range(4): + eCode += ''' + writeVecElem(&output[%(v)d], (XReg) AA64FpDestP%(p)dV%(v)dL_uw, + %(p)d, 0x2); + ''' % { 'v': v, 'p': p} + eCode += ''' + int eCount = dataSize / (8 << eSize); + int eCount128 = 128 / (8 << eSize); + int eSizeBytes = 1 << eSize; + int totNumBytes = numStructElems * eSizeBytes; + int numInputElems = eCount128; + int stepOffset = step * 2 * eSizeBytes; + int stepLimit = 2 * eSizeBytes; + + int r = 0, i, j; + XReg data; + + for (int pos = stepOffset; pos < stepLimit + stepOffset; + pos += eSizeBytes) { + if (pos < totNumBytes) { + r = pos / eSizeBytes; + j = r / numInputElems; + i = r % numInputElems; + data = (XReg) readVecElem(input[j], (XReg) i, eSize); + + if (replicate) { + for (int i = 0; i < eCount128; ++i) { + if (i < eCount) { + writeVecElem(&output[r % 2], data, i, + eSize); + } else { // zero extend if necessary + writeVecElem(&output[r % 2], (XReg) 0, i, + eSize); + } + } + } else { + writeVecElem(&output[r % 2], data, lane, eSize); + } + } + } + ''' + for v in range(2): + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV%(v)dL_uw = (uint32_t) readVecElem( + output[%(v)d], %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + iop = InstObjParams(name, Name, 'MicroNeonMixLaneOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixLaneDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + elif name == 'pack_neon_uop': + + eCode = ''' + VReg input[4]; // input data from arch. SIMD regs + VReg output[2]; // output data to scratch area + ''' + + eCode += getInputCodeOp1S + + eCode += ''' + int eSizeBytes = 1 << eSize; + int numOutputElems = 128 / (8 << eSize); + int totNumBytes = numStructElems * eSizeBytes; + int stepOffset = step * 32; + int stepLimit = 32; + + int r = 0, i, j; + XReg data; + + for (int i = 0; i < 2; ++i) { + output[i].lo = 0; + output[i].hi = 0; + } + + for (int pos = stepOffset; pos < stepLimit + stepOffset; + pos += eSizeBytes) { + if (pos < totNumBytes) { + r = pos / 16; + j = pos / eSizeBytes; + i = (pos / eSizeBytes) % numOutputElems; + data = (XReg) readVecElem(input[j], lane, eSize); + writeVecElem(&output[r % 2], data, i, eSize); + } + } + ''' + + for v in range(2): + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV%(v)d_uw = (uint32_t) readVecElem( + output[%(v)d], %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + iop = InstObjParams(name, Name, 'MicroNeonMixLaneOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixLaneDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + # Generate instructions + mkMemAccMicroOp('mem_neon_uop') + mkMarshalMicroOp('deint_neon_uop', 'MicroDeintNeon64') + mkMarshalMicroOp('int_neon_uop', 'MicroIntNeon64') + mkMarshalMicroOp('unpack_neon_uop', 'MicroUnpackNeon64') + mkMarshalMicroOp('pack_neon_uop', 'MicroPackNeon64') + +}}; + +let {{ + + iop = InstObjParams('vldmult64', 'VldMult64', 'VldMultOp64', '', []) + header_output += VMemMultDeclare64.subst(iop) + decoder_output += VMemMultConstructor64.subst(iop) + + iop = InstObjParams('vstmult64', 'VstMult64', 'VstMultOp64', '', []) + header_output += VMemMultDeclare64.subst(iop) + decoder_output += VMemMultConstructor64.subst(iop) + + iop = InstObjParams('vldsingle64', 'VldSingle64', 'VldSingleOp64', '', []) + header_output += VMemSingleDeclare64.subst(iop) + decoder_output += VMemSingleConstructor64.subst(iop) + + iop = InstObjParams('vstsingle64', 'VstSingle64', 'VstSingleOp64', '', []) + header_output += VMemSingleDeclare64.subst(iop) + decoder_output += VMemSingleConstructor64.subst(iop) + +}}; diff --git a/src/arch/arm/isa/insts/str.isa b/src/arch/arm/isa/insts/str.isa index 80846053b..3f595692a 100644 --- a/src/arch/arm/isa/insts/str.isa +++ b/src/arch/arm/isa/insts/str.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -38,6 +38,7 @@ // Authors: Gabe Black let {{ + import math header_output = "" decoder_output = "" @@ -77,7 +78,9 @@ let {{ (newHeader, newDecoder, newExec) = self.fillTemplates(self.name, self.Name, codeBlobs, - self.memFlags, self.instFlags, base, wbDecl) + self.memFlags, self.instFlags, + base, wbDecl, None, False, + self.size, self.sign) header_output += newHeader decoder_output += newDecoder @@ -171,7 +174,7 @@ let {{ self.size, self.sign, self.user) # Add memory request flags where necessary - self.memFlags.append("%d" % (self.size - 1)) + self.memFlags.append("%d" % int(math.log(self.size, 2))) if self.user: self.memFlags.append("ArmISA::TLB::UserMode") diff --git a/src/arch/arm/isa/insts/str64.isa b/src/arch/arm/isa/insts/str64.isa new file mode 100644 index 000000000..c15dca16e --- /dev/null +++ b/src/arch/arm/isa/insts/str64.isa @@ -0,0 +1,372 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + class StoreInst64(LoadStoreInst): + execBase = 'Store64' + micro = False + + def __init__(self, mnem, Name, size=4, user=False, flavor="normal", + top = False): + super(StoreInst64, self).__init__() + + self.name = mnem + self.Name = Name + self.size = size + self.user = user + self.flavor = flavor + self.top = top + + self.memFlags = ["ArmISA::TLB::MustBeOne"] + self.instFlags = [] + self.codeBlobs = { "postacc_code" : "" } + + # Add memory request flags where necessary + if self.user: + self.memFlags.append("ArmISA::TLB::UserMode") + + if self.flavor in ("relexp", "exp"): + # For exclusive pair ops alignment check is based on total size + self.memFlags.append("%d" % int(math.log(self.size, 2) + 1)) + elif not (self.size == 16 and self.top): + # Only the first microop should perform alignment checking. + self.memFlags.append("%d" % int(math.log(self.size, 2))) + + if self.flavor not in ("release", "relex", "exclusive", + "relexp", "exp"): + self.memFlags.append("ArmISA::TLB::AllowUnaligned") + + if self.micro: + self.instFlags.append("IsMicroop") + + if self.flavor in ("release", "relex", "relexp"): + self.instFlags.extend(["IsMemBarrier", + "IsWriteBarrier", + "IsReadBarrier"]) + if self.flavor in ("relex", "exclusive", "exp", "relexp"): + self.instFlags.append("IsStoreConditional") + self.memFlags.append("Request::LLSC") + + def emitHelper(self, base = 'Memory64', wbDecl = None): + global header_output, decoder_output, exec_output + + # If this is a microop itself, don't allow anything that would + # require further microcoding. + if self.micro: + assert not wbDecl + + fa_code = None + if not self.micro and self.flavor in ("normal", "release"): + fa_code = ''' + fault->annotate(ArmFault::SAS, %s); + fault->annotate(ArmFault::SSE, false); + fault->annotate(ArmFault::SRT, dest); + fault->annotate(ArmFault::SF, %s); + fault->annotate(ArmFault::AR, %s); + ''' % ("0" if self.size == 1 else + "1" if self.size == 2 else + "2" if self.size == 4 else "3", + "true" if self.size == 8 else "false", + "true" if self.flavor == "release" else "false") + + (newHeader, newDecoder, newExec) = \ + self.fillTemplates(self.name, self.Name, self.codeBlobs, + self.memFlags, self.instFlags, + base, wbDecl, faCode=fa_code) + + header_output += newHeader + decoder_output += newDecoder + exec_output += newExec + + def buildEACode(self): + # Address computation + eaCode = "" + if self.flavor == "fp": + eaCode += vfp64EnabledCheckCode + + eaCode += SPAlignmentCheckCode + "EA = XBase" + if self.size == 16: + if self.top: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 0 : 8)" + else: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 8 : 0)" + if not self.post: + eaCode += self.offset + eaCode += ";" + + self.codeBlobs["ea_code"] = eaCode + + + class StoreImmInst64(StoreInst64): + def __init__(self, *args, **kargs): + super(StoreImmInst64, self).__init__(*args, **kargs) + self.offset = "+ imm" + + self.wbDecl = "MicroAddXiUop(machInst, base, base, imm);" + + class StoreRegInst64(StoreInst64): + def __init__(self, *args, **kargs): + super(StoreRegInst64, self).__init__(*args, **kargs) + self.offset = "+ extendReg64(XOffset, type, shiftAmt, 64)" + + self.wbDecl = \ + "MicroAddXERegUop(machInst, base, base, " + \ + " offset, type, shiftAmt);" + + class StoreRawRegInst64(StoreInst64): + def __init__(self, *args, **kargs): + super(StoreRawRegInst64, self).__init__(*args, **kargs) + self.offset = "" + + class StoreSingle64(StoreInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor == "fp": + if self.size in (1, 2, 4): + accCode = ''' + Mem%(suffix)s = + cSwap(AA64FpDestP0%(suffix)s, isBigEndian64(xc->tcBase())); + ''' + elif self.size == 8 or (self.size == 16 and not self.top): + accCode = ''' + uint64_t data = AA64FpDestP1_uw; + data = (data << 32) | AA64FpDestP0_uw; + Mem%(suffix)s = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + elif self.size == 16 and self.top: + accCode = ''' + uint64_t data = AA64FpDestP3_uw; + data = (data << 32) | AA64FpDestP2_uw; + Mem%(suffix)s = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + else: + accCode = \ + 'Mem%(suffix)s = cSwap(XDest%(suffix)s, isBigEndian64(xc->tcBase()));' + if self.size == 16: + accCode = accCode % \ + { "suffix" : buildMemSuffix(False, 8) } + else: + accCode = accCode % \ + { "suffix" : buildMemSuffix(False, self.size) } + + self.codeBlobs["memacc_code"] = accCode + + if self.flavor in ("relex", "exclusive"): + self.instFlags.append("IsStoreConditional") + self.memFlags.append("Request::LLSC") + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class StoreDouble64(StoreInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor == "fp": + accCode = ''' + uint64_t data = AA64FpDest2P0_uw; + data = (data << 32) | AA64FpDestP0_uw; + Mem_ud = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + else: + if self.size == 4: + accCode = ''' + uint64_t data = XDest2_uw; + data = (data << 32) | XDest_uw; + Mem_ud = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + elif self.size == 8: + accCode = ''' + // This temporary needs to be here so that the parser + // will correctly identify this instruction as a store. + Twin64_t temp; + temp.a = XDest_ud; + temp.b = XDest2_ud; + Mem_tud = temp; + ''' + self.codeBlobs["memacc_code"] = accCode + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class StoreImm64(StoreImmInst64, StoreSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryImm64' + writeback = False + post = False + + class StorePre64(StoreImmInst64, StoreSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPreIndex64' + writeback = True + post = False + + class StorePost64(StoreImmInst64, StoreSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPostIndex64' + writeback = True + post = True + + class StoreReg64(StoreRegInst64, StoreSingle64): + decConstBase = 'LoadStoreReg64' + base = 'ArmISA::MemoryReg64' + writeback = False + post = False + + class StoreRaw64(StoreRawRegInst64, StoreSingle64): + decConstBase = 'LoadStoreRaw64' + base = 'ArmISA::MemoryRaw64' + writeback = False + post = False + + class StoreEx64(StoreRawRegInst64, StoreSingle64): + decConstBase = 'LoadStoreEx64' + base = 'ArmISA::MemoryEx64' + writeback = False + post = False + execBase = 'StoreEx64' + def __init__(self, *args, **kargs): + super(StoreEx64, self).__init__(*args, **kargs) + self.codeBlobs["postacc_code"] = "XResult = !writeResult;" + + def buildStores64(mnem, NameBase, size, flavor="normal"): + StoreImm64(mnem, NameBase + "_IMM", size, flavor=flavor).emit() + StorePre64(mnem, NameBase + "_PRE", size, flavor=flavor).emit() + StorePost64(mnem, NameBase + "_POST", size, flavor=flavor).emit() + StoreReg64(mnem, NameBase + "_REG", size, flavor=flavor).emit() + + buildStores64("strb", "STRB64", 1) + buildStores64("strh", "STRH64", 2) + buildStores64("str", "STRW64", 4) + buildStores64("str", "STRX64", 8) + buildStores64("str", "STRBFP64", 1, flavor="fp") + buildStores64("str", "STRHFP64", 2, flavor="fp") + buildStores64("str", "STRSFP64", 4, flavor="fp") + buildStores64("str", "STRDFP64", 8, flavor="fp") + + StoreImm64("sturb", "STURB64_IMM", 1).emit() + StoreImm64("sturh", "STURH64_IMM", 2).emit() + StoreImm64("stur", "STURW64_IMM", 4).emit() + StoreImm64("stur", "STURX64_IMM", 8).emit() + StoreImm64("stur", "STURBFP64_IMM", 1, flavor="fp").emit() + StoreImm64("stur", "STURHFP64_IMM", 2, flavor="fp").emit() + StoreImm64("stur", "STURSFP64_IMM", 4, flavor="fp").emit() + StoreImm64("stur", "STURDFP64_IMM", 8, flavor="fp").emit() + + StoreImm64("sttrb", "STTRB64_IMM", 1, user=True).emit() + StoreImm64("sttrh", "STTRH64_IMM", 2, user=True).emit() + StoreImm64("sttr", "STTRW64_IMM", 4, user=True).emit() + StoreImm64("sttr", "STTRX64_IMM", 8, user=True).emit() + + StoreRaw64("stlr", "STLRX64", 8, flavor="release").emit() + StoreRaw64("stlr", "STLRW64", 4, flavor="release").emit() + StoreRaw64("stlrh", "STLRH64", 2, flavor="release").emit() + StoreRaw64("stlrb", "STLRB64", 1, flavor="release").emit() + + StoreEx64("stlxr", "STLXRX64", 8, flavor="relex").emit() + StoreEx64("stlxr", "STLXRW64", 4, flavor="relex").emit() + StoreEx64("stlxrh", "STLXRH64", 2, flavor="relex").emit() + StoreEx64("stlxrb", "STLXRB64", 1, flavor="relex").emit() + + StoreEx64("stxr", "STXRX64", 8, flavor="exclusive").emit() + StoreEx64("stxr", "STXRW64", 4, flavor="exclusive").emit() + StoreEx64("stxrh", "STXRH64", 2, flavor="exclusive").emit() + StoreEx64("stxrb", "STXRB64", 1, flavor="exclusive").emit() + + class StoreImmU64(StoreImm64): + decConstBase = 'LoadStoreImmU64' + micro = True + + class StoreImmDU64(StoreImmInst64, StoreDouble64): + decConstBase = 'LoadStoreImmDU64' + base = 'ArmISA::MemoryDImm64' + micro = True + post = False + writeback = False + + class StoreImmDEx64(StoreImmInst64, StoreDouble64): + execBase = 'StoreEx64' + decConstBase = 'StoreImmDEx64' + base = 'ArmISA::MemoryDImmEx64' + micro = False + post = False + writeback = False + def __init__(self, *args, **kargs): + super(StoreImmDEx64, self).__init__(*args, **kargs) + self.codeBlobs["postacc_code"] = "XResult = !writeResult;" + + class StoreRegU64(StoreReg64): + decConstBase = 'LoadStoreRegU64' + micro = True + + StoreImmDEx64("stlxp", "STLXPW64", 4, flavor="relexp").emit() + StoreImmDEx64("stlxp", "STLXPX64", 8, flavor="relexp").emit() + StoreImmDEx64("stxp", "STXPW64", 4, flavor="exp").emit() + StoreImmDEx64("stxp", "STXPX64", 8, flavor="exp").emit() + + StoreImmU64("strxi_uop", "MicroStrXImmUop", 8).emit() + StoreRegU64("strxr_uop", "MicroStrXRegUop", 8).emit() + StoreImmU64("strfpxi_uop", "MicroStrFpXImmUop", 8, flavor="fp").emit() + StoreRegU64("strfpxr_uop", "MicroStrFpXRegUop", 8, flavor="fp").emit() + StoreImmU64("strqbfpxi_uop", "MicroStrQBFpXImmUop", + 16, flavor="fp", top=False).emit() + StoreRegU64("strqbfpxr_uop", "MicroStrQBFpXRegUop", + 16, flavor="fp", top=False).emit() + StoreImmU64("strqtfpxi_uop", "MicroStrQTFpXImmUop", + 16, flavor="fp", top=True).emit() + StoreRegU64("strqtfpxr_uop", "MicroStrQTFpXRegUop", + 16, flavor="fp", top=True).emit() + StoreImmDU64("strdxi_uop", "MicroStrDXImmUop", 4).emit() + StoreImmDU64("strdfpxi_uop", "MicroStrDFpXImmUop", 4, flavor="fp").emit() + +}}; diff --git a/src/arch/arm/isa/insts/swap.isa b/src/arch/arm/isa/insts/swap.isa index b42a1c4b2..f2ceed28e 100644 --- a/src/arch/arm/isa/insts/swap.isa +++ b/src/arch/arm/isa/insts/swap.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -73,10 +73,7 @@ let {{ swpPreAccCode = ''' if (!((SCTLR)Sctlr).sw) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + return new UndefinedInstruction(machInst, false, mnemonic); } ''' diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index 64deef044..7a1213377 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -1,5 +1,5 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -80,6 +80,31 @@ let {{ xc->%(func)s(this, %(op_idx)s, %(final_val)s); } ''' + aarch64Read = ''' + ((xc->%(func)s(this, %(op_idx)s)) & mask(intWidth)) + ''' + aarch64Write = ''' + xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(intWidth)) + ''' + aarchX64Read = ''' + ((xc->%(func)s(this, %(op_idx)s)) & mask(aarch64 ? 64 : 32)) + ''' + aarchX64Write = ''' + xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(aarch64 ? 64 : 32)) + ''' + aarchW64Read = ''' + ((xc->%(func)s(this, %(op_idx)s)) & mask(32)) + ''' + aarchW64Write = ''' + xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(32)) + ''' + cntrlNsBankedWrite = ''' + xc->setMiscReg(flattenMiscRegNsBanked(dest, xc->tcBase()), %(final_val)s) + ''' + + cntrlNsBankedRead = ''' + xc->readMiscReg(flattenMiscRegNsBanked(op1, xc->tcBase())) + ''' #PCState operands need to have a sorting index (the number at the end) #less than all the integer registers which might update the PC. That way @@ -99,6 +124,18 @@ let {{ return ('IntReg', 'uw', idx, 'IsInteger', srtNormal, maybePCRead, maybePCWrite) + def intReg64(idx): + return ('IntReg', 'ud', idx, 'IsInteger', srtNormal, + aarch64Read, aarch64Write) + + def intRegX64(idx, id = srtNormal): + return ('IntReg', 'ud', idx, 'IsInteger', id, + aarchX64Read, aarchX64Write) + + def intRegW64(idx, id = srtNormal): + return ('IntReg', 'ud', idx, 'IsInteger', id, + aarchW64Read, aarchW64Write) + def intRegNPC(idx): return ('IntReg', 'uw', idx, 'IsInteger', srtNormal) @@ -120,26 +157,49 @@ let {{ def cntrlReg(idx, id = srtNormal, type = 'uw'): return ('ControlReg', type, idx, None, id) + def cntrlNsBankedReg(idx, id = srtNormal, type = 'uw'): + return ('ControlReg', type, idx, (None, None, 'IsControl'), id, cntrlNsBankedRead, cntrlNsBankedWrite) + + def cntrlNsBankedReg64(idx, id = srtNormal, type = 'ud'): + return ('ControlReg', type, idx, (None, None, 'IsControl'), id, cntrlNsBankedRead, cntrlNsBankedWrite) + def cntrlRegNC(idx, id = srtNormal, type = 'uw'): return ('ControlReg', type, idx, None, id) def pcStateReg(idx, id): - return ('PCState', 'uw', idx, (None, None, 'IsControl'), id) + return ('PCState', 'ud', idx, (None, None, 'IsControl'), id) }}; def operands {{ #Abstracted integer reg operands 'Dest': intReg('dest'), + 'Dest64': intReg64('dest'), + 'XDest': intRegX64('dest'), + 'WDest': intRegW64('dest'), 'IWDest': intRegIWPC('dest'), 'AIWDest': intRegAIWPC('dest'), 'Dest2': intReg('dest2'), + 'XDest2': intRegX64('dest2'), + 'FDest2': floatReg('dest2'), 'Result': intReg('result'), + 'XResult': intRegX64('result'), + 'XBase': intRegX64('base', id = srtBase), 'Base': intRegAPC('base', id = srtBase), + 'XOffset': intRegX64('offset'), 'Index': intReg('index'), 'Shift': intReg('shift'), 'Op1': intReg('op1'), 'Op2': intReg('op2'), 'Op3': intReg('op3'), + 'Op164': intReg64('op1'), + 'Op264': intReg64('op2'), + 'Op364': intReg64('op3'), + 'XOp1': intRegX64('op1'), + 'XOp2': intRegX64('op2'), + 'XOp3': intRegX64('op3'), + 'WOp1': intRegW64('op1'), + 'WOp2': intRegW64('op2'), + 'WOp3': intRegW64('op3'), 'Reg0': intReg('reg0'), 'Reg1': intReg('reg1'), 'Reg2': intReg('reg2'), @@ -147,13 +207,19 @@ def operands {{ #Fixed index integer reg operands 'SpMode': intRegNPC('intRegInMode((OperatingMode)regMode, INTREG_SP)'), + 'DecodedBankedIntReg': intRegNPC('decodeMrsMsrBankedIntRegIndex(byteMask, r)'), 'LR': intRegNPC('INTREG_LR'), + 'XLR': intRegX64('INTREG_X30'), 'R7': intRegNPC('7'), # First four arguments are passed in registers 'R0': intRegNPC('0'), 'R1': intRegNPC('1'), 'R2': intRegNPC('2'), 'R3': intRegNPC('3'), + 'X0': intRegX64('0'), + 'X1': intRegX64('1'), + 'X2': intRegX64('2'), + 'X3': intRegX64('3'), #Pseudo integer condition code registers 'CondCodesNZ': intRegCC('INTREG_CONDCODES_NZ'), @@ -230,9 +296,95 @@ def operands {{ 'FpOp2P2': floatReg('(op2 + 2)'), 'FpOp2P3': floatReg('(op2 + 3)'), + # Create AArch64 unpacked view of the FP registers + 'AA64FpOp1P0': floatReg('((op1 * 4) + 0)'), + 'AA64FpOp1P1': floatReg('((op1 * 4) + 1)'), + 'AA64FpOp1P2': floatReg('((op1 * 4) + 2)'), + 'AA64FpOp1P3': floatReg('((op1 * 4) + 3)'), + 'AA64FpOp2P0': floatReg('((op2 * 4) + 0)'), + 'AA64FpOp2P1': floatReg('((op2 * 4) + 1)'), + 'AA64FpOp2P2': floatReg('((op2 * 4) + 2)'), + 'AA64FpOp2P3': floatReg('((op2 * 4) + 3)'), + 'AA64FpOp3P0': floatReg('((op3 * 4) + 0)'), + 'AA64FpOp3P1': floatReg('((op3 * 4) + 1)'), + 'AA64FpOp3P2': floatReg('((op3 * 4) + 2)'), + 'AA64FpOp3P3': floatReg('((op3 * 4) + 3)'), + 'AA64FpDestP0': floatReg('((dest * 4) + 0)'), + 'AA64FpDestP1': floatReg('((dest * 4) + 1)'), + 'AA64FpDestP2': floatReg('((dest * 4) + 2)'), + 'AA64FpDestP3': floatReg('((dest * 4) + 3)'), + 'AA64FpDest2P0': floatReg('((dest2 * 4) + 0)'), + 'AA64FpDest2P1': floatReg('((dest2 * 4) + 1)'), + 'AA64FpDest2P2': floatReg('((dest2 * 4) + 2)'), + 'AA64FpDest2P3': floatReg('((dest2 * 4) + 3)'), + + 'AA64FpOp1P0V0': floatReg('((((op1+0)) * 4) + 0)'), + 'AA64FpOp1P1V0': floatReg('((((op1+0)) * 4) + 1)'), + 'AA64FpOp1P2V0': floatReg('((((op1+0)) * 4) + 2)'), + 'AA64FpOp1P3V0': floatReg('((((op1+0)) * 4) + 3)'), + + 'AA64FpOp1P0V1': floatReg('((((op1+1)) * 4) + 0)'), + 'AA64FpOp1P1V1': floatReg('((((op1+1)) * 4) + 1)'), + 'AA64FpOp1P2V1': floatReg('((((op1+1)) * 4) + 2)'), + 'AA64FpOp1P3V1': floatReg('((((op1+1)) * 4) + 3)'), + + 'AA64FpOp1P0V2': floatReg('((((op1+2)) * 4) + 0)'), + 'AA64FpOp1P1V2': floatReg('((((op1+2)) * 4) + 1)'), + 'AA64FpOp1P2V2': floatReg('((((op1+2)) * 4) + 2)'), + 'AA64FpOp1P3V2': floatReg('((((op1+2)) * 4) + 3)'), + + 'AA64FpOp1P0V3': floatReg('((((op1+3)) * 4) + 0)'), + 'AA64FpOp1P1V3': floatReg('((((op1+3)) * 4) + 1)'), + 'AA64FpOp1P2V3': floatReg('((((op1+3)) * 4) + 2)'), + 'AA64FpOp1P3V3': floatReg('((((op1+3)) * 4) + 3)'), + + 'AA64FpOp1P0V0S': floatReg('((((op1+0)%32) * 4) + 0)'), + 'AA64FpOp1P1V0S': floatReg('((((op1+0)%32) * 4) + 1)'), + 'AA64FpOp1P2V0S': floatReg('((((op1+0)%32) * 4) + 2)'), + 'AA64FpOp1P3V0S': floatReg('((((op1+0)%32) * 4) + 3)'), + + 'AA64FpOp1P0V1S': floatReg('((((op1+1)%32) * 4) + 0)'), + 'AA64FpOp1P1V1S': floatReg('((((op1+1)%32) * 4) + 1)'), + 'AA64FpOp1P2V1S': floatReg('((((op1+1)%32) * 4) + 2)'), + 'AA64FpOp1P3V1S': floatReg('((((op1+1)%32) * 4) + 3)'), + + 'AA64FpOp1P0V2S': floatReg('((((op1+2)%32) * 4) + 0)'), + 'AA64FpOp1P1V2S': floatReg('((((op1+2)%32) * 4) + 1)'), + 'AA64FpOp1P2V2S': floatReg('((((op1+2)%32) * 4) + 2)'), + 'AA64FpOp1P3V2S': floatReg('((((op1+2)%32) * 4) + 3)'), + + 'AA64FpOp1P0V3S': floatReg('((((op1+3)%32) * 4) + 0)'), + 'AA64FpOp1P1V3S': floatReg('((((op1+3)%32) * 4) + 1)'), + 'AA64FpOp1P2V3S': floatReg('((((op1+3)%32) * 4) + 2)'), + 'AA64FpOp1P3V3S': floatReg('((((op1+3)%32) * 4) + 3)'), + + 'AA64FpDestP0V0': floatReg('((((dest+0)) * 4) + 0)'), + 'AA64FpDestP1V0': floatReg('((((dest+0)) * 4) + 1)'), + 'AA64FpDestP2V0': floatReg('((((dest+0)) * 4) + 2)'), + 'AA64FpDestP3V0': floatReg('((((dest+0)) * 4) + 3)'), + + 'AA64FpDestP0V1': floatReg('((((dest+1)) * 4) + 0)'), + 'AA64FpDestP1V1': floatReg('((((dest+1)) * 4) + 1)'), + 'AA64FpDestP2V1': floatReg('((((dest+1)) * 4) + 2)'), + 'AA64FpDestP3V1': floatReg('((((dest+1)) * 4) + 3)'), + + 'AA64FpDestP0V0L': floatReg('((((dest+0)%32) * 4) + 0)'), + 'AA64FpDestP1V0L': floatReg('((((dest+0)%32) * 4) + 1)'), + 'AA64FpDestP2V0L': floatReg('((((dest+0)%32) * 4) + 2)'), + 'AA64FpDestP3V0L': floatReg('((((dest+0)%32) * 4) + 3)'), + + 'AA64FpDestP0V1L': floatReg('((((dest+1)%32) * 4) + 0)'), + 'AA64FpDestP1V1L': floatReg('((((dest+1)%32) * 4) + 1)'), + 'AA64FpDestP2V1L': floatReg('((((dest+1)%32) * 4) + 2)'), + 'AA64FpDestP3V1L': floatReg('((((dest+1)%32) * 4) + 3)'), + #Abstracted control reg operands 'MiscDest': cntrlReg('dest'), 'MiscOp1': cntrlReg('op1'), + 'MiscNsBankedDest': cntrlNsBankedReg('dest'), + 'MiscNsBankedOp1': cntrlNsBankedReg('op1'), + 'MiscNsBankedDest64': cntrlNsBankedReg64('dest'), + 'MiscNsBankedOp164': cntrlNsBankedReg64('op1'), #Fixed index control regs 'Cpsr': cntrlReg('MISCREG_CPSR', srtCpsr), @@ -244,22 +396,41 @@ def operands {{ 'FpscrQc': cntrlRegNC('MISCREG_FPSCR_QC'), 'FpscrExc': cntrlRegNC('MISCREG_FPSCR_EXC'), 'Cpacr': cntrlReg('MISCREG_CPACR'), + 'Cpacr64': cntrlReg('MISCREG_CPACR_EL1'), 'Fpexc': cntrlRegNC('MISCREG_FPEXC'), + 'Nsacr': cntrlReg('MISCREG_NSACR'), + 'ElrHyp': cntrlRegNC('MISCREG_ELR_HYP'), + 'Hcr': cntrlReg('MISCREG_HCR'), + 'Hcr64': cntrlReg('MISCREG_HCR_EL2'), + 'Hdcr': cntrlReg('MISCREG_HDCR'), + 'Hcptr': cntrlReg('MISCREG_HCPTR'), + 'CptrEl264': cntrlReg('MISCREG_CPTR_EL2'), + 'CptrEl364': cntrlReg('MISCREG_CPTR_EL3'), + 'Hstr': cntrlReg('MISCREG_HSTR'), + 'Scr': cntrlReg('MISCREG_SCR'), + 'Scr64': cntrlReg('MISCREG_SCR_EL3'), 'Sctlr': cntrlRegNC('MISCREG_SCTLR'), 'SevMailbox': cntrlRegNC('MISCREG_SEV_MAILBOX'), 'LLSCLock': cntrlRegNC('MISCREG_LOCKFLAG'), + 'Dczid' : cntrlRegNC('MISCREG_DCZID_EL0'), #Register fields for microops 'URa' : intReg('ura'), + 'XURa' : intRegX64('ura'), + 'WURa' : intRegW64('ura'), 'IWRa' : intRegIWPC('ura'), 'Fa' : floatReg('ura'), + 'FaP1' : floatReg('ura + 1'), 'URb' : intReg('urb'), + 'XURb' : intRegX64('urb'), 'URc' : intReg('urc'), + 'XURc' : intRegX64('urc'), #Memory Operand 'Mem': ('Mem', 'uw', None, ('IsMemRef', 'IsLoad', 'IsStore'), srtNormal), #PCState fields + 'RawPC': pcStateReg('pc', srtPC), 'PC': pcStateReg('instPC', srtPC), 'NPC': pcStateReg('instNPC', srtPC), 'pNPC': pcStateReg('instNPC', srtEPC), diff --git a/src/arch/arm/isa/templates/basic.isa b/src/arch/arm/isa/templates/basic.isa index b3878b89a..de4506e05 100644 --- a/src/arch/arm/isa/templates/basic.isa +++ b/src/arch/arm/isa/templates/basic.isa @@ -1,5 +1,17 @@ // -*- mode:c++ -*- +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// // Copyright (c) 2007-2008 The Florida State University // All rights reserved. // @@ -60,6 +72,13 @@ def template BasicConstructor {{ } }}; +def template BasicConstructor64 {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst) : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) + { + %(constructor)s; + } +}}; + // Basic instruction class execute method template. def template BasicExecute {{ diff --git a/src/arch/arm/isa/templates/branch64.isa b/src/arch/arm/isa/templates/branch64.isa new file mode 100644 index 000000000..84b3e6ae7 --- /dev/null +++ b/src/arch/arm/isa/templates/branch64.isa @@ -0,0 +1,141 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +def template BranchImm64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, int64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImm64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _imm) + { + %(constructor)s; + } +}}; + +def template BranchImmCond64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, int64_t _imm, + ConditionCode _condCode); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImmCond64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm, + ConditionCode _condCode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _imm, _condCode) + { + %(constructor)s; + } +}}; + +def template BranchReg64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template BranchReg64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1) + { + %(constructor)s; + } +}}; + +def template BranchImmReg64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + int64_t imm, IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImmReg64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _imm, _op1) + { + %(constructor)s; + } +}}; + +def template BranchImmImmReg64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, int64_t _imm1, int64_t _imm2, + IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImmImmReg64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm1, int64_t _imm2, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _imm1, _imm2, _op1) + { + %(constructor)s; + } +}}; diff --git a/src/arch/arm/isa/templates/data64.isa b/src/arch/arm/isa/templates/data64.isa new file mode 100644 index 000000000..b6f7ce8d0 --- /dev/null +++ b/src/arch/arm/isa/templates/data64.isa @@ -0,0 +1,279 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +def template DataXImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, uint64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template DataXImmConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm) + { + %(constructor)s; + } +}}; + +def template DataXSRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, + int32_t _shiftAmt, ArmShiftType _shiftType); + %(BasicExecDeclare)s +}; +}}; + +def template DataXSRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + int32_t _shiftAmt, + ArmShiftType _shiftType) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _shiftAmt, _shiftType) + { + %(constructor)s; + } +}}; + +def template DataXERegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, + ArmExtendType _extendType, int32_t _shiftAmt); + %(BasicExecDeclare)s +}; +}}; + +def template DataXERegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + ArmExtendType _extendType, + int32_t _shiftAmt) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _extendType, _shiftAmt) + { + %(constructor)s; + } +}}; + +def template DataX1RegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template DataX1RegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _op1) + { + %(constructor)s; + } +}}; + +def template DataX2RegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2); + %(BasicExecDeclare)s +}; +}}; + +def template DataX2RegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2) + { + %(constructor)s; + } +}}; + +def template DataX2RegImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, uint64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template DataX2RegImmConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _imm) + { + %(constructor)s; + } +}}; + +def template DataX3RegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _op3); + %(BasicExecDeclare)s +}; +}}; + +def template DataX3RegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + IntRegIndex _op3) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _op3) + { + %(constructor)s; + } +}}; + +def template DataXCondCompImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + uint64_t _imm, ConditionCode _condCode, uint8_t _defCc); + %(BasicExecDeclare)s +}; +}}; + +def template DataXCondCompImmConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1, + uint64_t _imm, + ConditionCode _condCode, + uint8_t _defCc) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _op1, _imm, _condCode, _defCc) + { + %(constructor)s; + } +}}; + +def template DataXCondCompRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + IntRegIndex _op2, ConditionCode _condCode, + uint8_t _defCc); + %(BasicExecDeclare)s +}; +}}; + +def template DataXCondCompRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1, + IntRegIndex _op2, + ConditionCode _condCode, + uint8_t _defCc) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _op1, _op2, _condCode, _defCc) + { + %(constructor)s; + } +}}; + +def template DataXCondSelDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode); + %(BasicExecDeclare)s +}; +}}; + +def template DataXCondSelConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + ConditionCode _condCode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _condCode) + { + %(constructor)s; + } +}}; diff --git a/src/arch/arm/isa/templates/macromem.isa b/src/arch/arm/isa/templates/macromem.isa index 195204a95..465090660 100644 --- a/src/arch/arm/isa/templates/macromem.isa +++ b/src/arch/arm/isa/templates/macromem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -338,6 +338,18 @@ def template MicroIntImmConstructor {{ } }}; +def template MicroIntImmXConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + RegIndex _ura, + RegIndex _urb, + int32_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _ura, _urb, _imm) + { + %(constructor)s; + } +}}; + def template MicroIntRegDeclare {{ class %(class_name)s : public %(base_class)s { @@ -349,6 +361,28 @@ def template MicroIntRegDeclare {{ }; }}; +def template MicroIntXERegConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + RegIndex _ura, RegIndex _urb, RegIndex _urc, + ArmExtendType _type, uint32_t _shiftAmt) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _ura, _urb, _urc, _type, _shiftAmt) + { + %(constructor)s; + } +}}; + +def template MicroIntXERegDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, + RegIndex _ura, RegIndex _urb, RegIndex _urc, + ArmExtendType _type, uint32_t _shiftAmt); + %(BasicExecDeclare)s + }; +}}; + def template MicroIntRegConstructor {{ %(class_name)s::%(class_name)s(ExtMachInst machInst, RegIndex _ura, RegIndex _urb, RegIndex _urc, @@ -402,6 +436,96 @@ def template MacroMemConstructor {{ }}; +def template BigFpMemImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); + %(BasicExecPanic)s +}; +}}; + +def template BigFpMemImmConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm) + : %(base_class)s(mnemonic, machInst, %(op_class)s, load, dest, base, imm) +{ + %(constructor)s; +} +}}; + +def template BigFpMemRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, + IntRegIndex offset, ArmExtendType type, int64_t imm); + %(BasicExecPanic)s +}; +}}; + +def template BigFpMemRegConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, + IntRegIndex offset, ArmExtendType type, int64_t imm) + : %(base_class)s(mnemonic, machInst, %(op_class)s, load, dest, base, + offset, type, imm) +{ + %(constructor)s; +} +}}; + +def template BigFpMemLitDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + IntRegIndex dest, int64_t imm); + %(BasicExecPanic)s +}; +}}; + +def template BigFpMemLitConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + IntRegIndex dest, int64_t imm) + : %(base_class)s(mnemonic, machInst, %(op_class)s, dest, imm) +{ + %(constructor)s; +} +}}; + +def template PairMemDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, + bool exclusive, bool acrel, uint32_t imm, + AddrMode mode, IntRegIndex rn, IntRegIndex rt, + IntRegIndex rt2); + %(BasicExecPanic)s +}; +}}; + +def template PairMemConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, + bool exclusive, bool acrel, uint32_t imm, AddrMode mode, + IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) + : %(base_class)s(mnemonic, machInst, %(op_class)s, size, + fp, load, noAlloc, signExt, exclusive, acrel, + imm, mode, rn, rt, rt2) +{ + %(constructor)s; +} +}}; + def template VMemMultDeclare {{ class %(class_name)s : public %(base_class)s { diff --git a/src/arch/arm/isa/templates/mem.isa b/src/arch/arm/isa/templates/mem.isa index 871378f3f..7682c277d 100644 --- a/src/arch/arm/isa/templates/mem.isa +++ b/src/arch/arm/isa/templates/mem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -697,6 +697,11 @@ def template LoadStoreImmDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; @@ -763,6 +768,11 @@ def template StoreRegDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; @@ -808,6 +818,11 @@ def template LoadRegDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; @@ -828,6 +843,11 @@ def template LoadImmDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; diff --git a/src/arch/arm/isa/templates/mem64.isa b/src/arch/arm/isa/templates/mem64.isa new file mode 100644 index 000000000..87dcba988 --- /dev/null +++ b/src/arch/arm/isa/templates/mem64.isa @@ -0,0 +1,686 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + SPAlignmentCheckCode = ''' + if (baseIsSP && bits(XBase, 3, 0) && + SPAlignmentCheckEnabled(xc->tcBase())) { + return new SPAlignmentFault(); + } + ''' +}}; + +def template Load64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags); + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template Store64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = writeMemAtomic(xc, traceData, Mem, EA, + memAccessFlags, NULL); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template Store64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags, + NULL); + } + + return fault; + } +}}; + +def template StoreEx64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + uint64_t writeResult = 0; + if (fault == NoFault) { + fault = writeMemAtomic(xc, traceData, Mem, EA, memAccessFlags, + &writeResult); + } + + if (fault == NoFault) { + %(postacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template StoreEx64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags, + NULL); + } + + return fault; + } +}}; + +def template Load64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + fault = readMemTiming(xc, traceData, EA, Mem, memAccessFlags); + } + + return fault; + } +}}; + +def template Load64CompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, + %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + + // ARM instructions will not have a pkt if the predicate is false + getMem(pkt, Mem, traceData); + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template Store64CompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, + %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + return NoFault; + } +}}; + +def template StoreEx64CompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, + %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + + uint64_t writeResult = pkt->req->getExtraData(); + %(postacc_code)s; + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template DCStore64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _base, IntRegIndex _dest, uint64_t _imm); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template DCStore64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, IntRegIndex _base, IntRegIndex _dest, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_base, _dest, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + } +}}; + +def template DCStore64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(NULL, op_size, EA, memAccessFlags, NULL); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template DCStore64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(NULL, op_size, EA, memAccessFlags, NULL); + } + + return fault; + } +}}; + + +def template LoadStoreImm64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreImmU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm, + bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreImmDU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base, + int64_t _imm = 0, bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template StoreImmDEx64Declare {{ + /** + * Static instruction class for "%(mnemonic)s". + */ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2, + IntRegIndex _base, int64_t _imm = 0); + + %(BasicExecDeclare)s + + %(InitiateAccDeclare)s + + %(CompleteAccDeclare)s + }; +}}; + + +def template LoadStoreReg64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreRegU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt, + bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreRaw64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _base); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreEx64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _base, IntRegIndex _result); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreLit64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, int64_t _imm); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreLitU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, int64_t _imm, + bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreImm64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_dest, (IntRegIndex)_base, _imm) + { + %(constructor)s; +#if %(use_uops)d + assert(numMicroops >= 2); + uops = new StaticInstPtr[numMicroops]; + uops[0] = new %(acc_name)s(machInst, _dest, _base, _imm); + uops[0]->setDelayedCommit(); + uops[1] = new %(wb_decl)s; + uops[1]->setLastMicroop(); +#endif + } +}}; + +def template LoadStoreImmU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm, + bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; + +def template LoadStoreImmDU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base, + int64_t _imm, bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _dest2, _base, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; + +def template StoreImmDEx64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2, + IntRegIndex _base, int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _result, _dest, _dest2, _base, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + } +}}; + + +def template LoadStoreReg64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _offset, _type, _shiftAmt) + { + %(constructor)s; +#if %(use_uops)d + assert(numMicroops >= 2); + uops = new StaticInstPtr[numMicroops]; + uops[0] = new %(acc_name)s(machInst, _dest, _base, _offset, + _type, _shiftAmt); + uops[0]->setDelayedCommit(); + uops[1] = new %(wb_decl)s; + uops[1]->setLastMicroop(); +#endif + } +}}; + +def template LoadStoreRegU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt, + bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _offset, _type, _shiftAmt) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; + +def template LoadStoreRaw64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _base) + { + %(constructor)s; + } +}}; + +def template LoadStoreEx64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _result) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _result) + { + %(constructor)s; + } +}}; + +def template LoadStoreLit64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_dest, _imm) + { + %(constructor)s; +#if %(use_uops)d + assert(numMicroops >= 2); + uops = new StaticInstPtr[numMicroops]; + uops[0] = new %(acc_name)s(machInst, _dest, _imm); + uops[0]->setDelayedCommit(); + uops[1] = new %(wb_decl)s; + uops[1]->setLastMicroop(); +#endif + } +}}; + +def template LoadStoreLitU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, int64_t _imm, + bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_dest, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; diff --git a/src/arch/arm/isa/templates/misc.isa b/src/arch/arm/isa/templates/misc.isa index 212897aa0..36db5b6c2 100644 --- a/src/arch/arm/isa/templates/misc.isa +++ b/src/arch/arm/isa/templates/misc.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -62,6 +62,69 @@ def template MrsConstructor {{ } }}; +def template MrsBankedRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + uint8_t byteMask; + bool r; + + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + uint8_t _sysM, bool _r); + %(BasicExecDeclare)s +}; +}}; + +def template MrsBankedRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + uint8_t _sysM, + bool _r) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest), + byteMask(_sysM), r(_r) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + +def template MsrBankedRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + bool r; + + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + uint8_t _sysM, bool _r); + %(BasicExecDeclare)s +}; +}}; + +def template MsrBankedRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1, + uint8_t _sysM, + bool _r) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1, _sysM), + r(_r) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + def template MsrRegDeclare {{ class %(class_name)s : public %(base_class)s { @@ -114,6 +177,66 @@ def template MsrImmConstructor {{ } }}; +def template MrrcOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + IntRegIndex _dest, IntRegIndex _dest2, uint32_t imm); + %(BasicExecDeclare)s +}; +}}; + +def template MrrcOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex op1, + IntRegIndex dest, + IntRegIndex dest2, + uint32_t imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, op1, dest, + dest2, imm) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + +def template McrrOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _dest, uint32_t imm); + %(BasicExecDeclare)s +}; +}}; + +def template McrrOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex op1, + IntRegIndex op2, + IntRegIndex dest, + uint32_t imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, op1, op2, + dest, imm) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + def template ImmOpDeclare {{ class %(class_name)s : public %(base_class)s { @@ -310,6 +433,35 @@ def template RegRegImmOpConstructor {{ } }}; +def template RegImmImmOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm1, uint64_t _imm2); + %(BasicExecDeclare)s +}; +}}; + +def template RegImmImmOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + uint64_t _imm1, + uint64_t _imm2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm1, _imm2) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + def template RegRegImmImmOpDeclare {{ class %(class_name)s : public %(base_class)s { diff --git a/src/arch/arm/isa/templates/misc64.isa b/src/arch/arm/isa/templates/misc64.isa new file mode 100644 index 000000000..09d3d4470 --- /dev/null +++ b/src/arch/arm/isa/templates/misc64.isa @@ -0,0 +1,91 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +def template RegRegImmImmOp64Declare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + uint64_t _imm1, uint64_t _imm2); + %(BasicExecDeclare)s +}; +}}; + +def template RegRegImmImmOp64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + uint64_t _imm1, + uint64_t _imm2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm1, _imm2) + { + %(constructor)s; + } +}}; + +def template RegRegRegImmOp64Declare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, uint64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template RegRegRegImmOp64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _imm) + { + %(constructor)s; + } +}}; + diff --git a/src/arch/arm/isa/templates/neon.isa b/src/arch/arm/isa/templates/neon.isa index 573d245b8..ffa6b53d4 100644 --- a/src/arch/arm/isa/templates/neon.isa +++ b/src/arch/arm/isa/templates/neon.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -39,8 +39,26 @@ let {{ simdEnabledCheckCode = ''' - if (!neonEnabled(Cpacr, Cpsr, Fpexc)) - return disabledFault(); + { + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq, Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase(), Fpexc, true)) + {return disabledFault();} + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } + } ''' }}; diff --git a/src/arch/arm/isa/templates/neon64.isa b/src/arch/arm/isa/templates/neon64.isa new file mode 100644 index 000000000..d20e4e653 --- /dev/null +++ b/src/arch/arm/isa/templates/neon64.isa @@ -0,0 +1,527 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Mbou Eyole +// Giacomo Gabrielli + +let {{ + simd64EnabledCheckCode = vfp64EnabledCheckCode +}}; + +def template NeonX2RegOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX2RegImmOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _imm) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1RegOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1RegImmOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1Reg2ImmOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm1, + uint64_t _imm2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm1, _imm2) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1RegImmOnlyOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonXExecDeclare {{ + template + Fault %(class_name)s<%(targs)s>::execute( + %(CPU_exec_context)s *, Trace::InstRecord *) const; +}}; + +def template NeonXEqualRegOpExecute {{ + template <class Element> + Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + const unsigned rCount = %(r_count)d; + const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element); + const unsigned eCountFull = 4 * sizeof(FloatRegBits) / sizeof(Element); + + union RegVect { + FloatRegBits regs[rCount]; + Element elements[eCount]; + }; + + union FullRegVect { + FloatRegBits regs[4]; + Element elements[eCountFull]; + }; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonXUnequalRegOpExecute {{ + template <class Element> + Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + typedef typename bigger_type_t<Element>::type BigElement; + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + const unsigned rCount = %(r_count)d; + const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element); + const unsigned eCountFull = 4 * sizeof(FloatRegBits) / sizeof(Element); + + union RegVect { + FloatRegBits regs[rCount]; + Element elements[eCount]; + BigElement bigElements[eCount / 2]; + }; + + union BigRegVect { + FloatRegBits regs[2 * rCount]; + BigElement elements[eCount]; + }; + + union FullRegVect { + FloatRegBits regs[4]; + Element elements[eCountFull]; + }; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + +def template MicroNeonMemDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + protected: + // True if the base register is SP (used for SP alignment checking) + bool baseIsSP; + // Access size in bytes + uint8_t accSize; + // Vector element size (0 -> 8-bit, 1 -> 16-bit, 2 -> 32-bit, + // 3 -> 64-bit) + uint8_t eSize; + + public: + %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _ura, + uint32_t _imm, unsigned extraMemFlags, bool _baseIsSP, + uint8_t _accSize, uint8_t _eSize) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, + _ura, _imm), + baseIsSP(_baseIsSP), accSize(_accSize), eSize(_eSize) + { + memAccessFlags |= extraMemFlags; + %(constructor)s; + } + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; +}}; + +def template NeonLoadExecute64 {{ + Fault %(class_name)s::execute( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + uint8_t *dataPtr = memUnion.bytes; + + if (fault == NoFault) { + fault = xc->readMem(EA, dataPtr, accSize, memAccessFlags); + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonLoadInitiateAcc64 {{ + Fault %(class_name)s::initiateAcc( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + uint8_t *dataPtr = memUnion.bytes; + + if (fault == NoFault) { + fault = xc->readMem(EA, dataPtr, accSize, memAccessFlags); + } + + return fault; + } +}}; + +def template NeonLoadCompleteAcc64 {{ + Fault %(class_name)s::completeAcc( + PacketPtr pkt, %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(mem_decl)s; + %(op_decl)s; + %(op_rd)s; + + MemUnion &memUnion = *(MemUnion *)pkt->getPtr<uint8_t>(); + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonStoreExecute64 {{ + Fault %(class_name)s::execute( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + uint8_t *dataPtr = memUnion.bytes; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(dataPtr, accSize, EA, memAccessFlags, + NULL); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonStoreInitiateAcc64 {{ + Fault %(class_name)s::initiateAcc( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(memUnion.bytes, accSize, EA, memAccessFlags, + NULL); + } + + return fault; + } +}}; + +def template NeonStoreCompleteAcc64 {{ + Fault %(class_name)s::completeAcc( + PacketPtr pkt, %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + return NoFault; + } +}}; + +def template VMemMultDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + // Constructor + %(class_name)s(ExtMachInst machInst, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t numRegs, bool wb); + %(BasicExecPanic)s + }; +}}; + +def template VMemSingleDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + // Constructor + %(class_name)s(ExtMachInst machInst, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t index, bool wb, + bool replicate = false); + %(BasicExecPanic)s + }; +}}; + +def template VMemMultConstructor64 {{ + %(class_name)s::%(class_name)s( + ExtMachInst machInst, RegIndex rn, RegIndex vd, RegIndex rm, + uint8_t _eSize, uint8_t _dataSize, uint8_t _numStructElems, + uint8_t _numRegs, bool _wb) : + %(base_class)s( + "%(mnemonic)s", machInst, %(op_class)s, rn, vd, rm, + _eSize, _dataSize, _numStructElems, _numRegs, _wb) + { + %(constructor)s; + } +}}; + +def template VMemSingleConstructor64 {{ + %(class_name)s::%(class_name)s( + ExtMachInst machInst, RegIndex rn, RegIndex vd, RegIndex rm, + uint8_t _eSize, uint8_t _dataSize, uint8_t _numStructElems, + uint8_t _index, bool _wb, bool _replicate) : + %(base_class)s( + "%(mnemonic)s", machInst, %(op_class)s, rn, vd, rm, + _eSize, _dataSize, _numStructElems, _index, _wb, + _replicate) + { + %(constructor)s; + } +}}; + +def template MicroNeonMixDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1, + uint8_t _eSize, uint8_t _dataSize, + uint8_t _numStructElems, uint8_t _numRegs, + uint8_t _step) : + %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _eSize, _dataSize, _numStructElems, + _numRegs, _step) + { + %(constructor)s; + } + + %(BasicExecDeclare)s + }; +}}; + +def template MicroNeonMixLaneDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1, + uint8_t _eSize, uint8_t _dataSize, + uint8_t _numStructElems, uint8_t _lane, uint8_t _step, + bool _replicate = false) : + %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _eSize, _dataSize, _numStructElems, + _lane, _step, _replicate) + { + %(constructor)s; + } + + %(BasicExecDeclare)s + }; +}}; + +def template MicroNeonMixExecute64 {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + uint64_t resTemp = 0; + resTemp = resTemp; + %(op_decl)s; + %(op_rd)s; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; diff --git a/src/arch/arm/isa/templates/templates.isa b/src/arch/arm/isa/templates/templates.isa index 148139225..2263cdff4 100644 --- a/src/arch/arm/isa/templates/templates.isa +++ b/src/arch/arm/isa/templates/templates.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -40,26 +40,37 @@ //Basic instruction templates ##include "basic.isa" +//Templates for AArch64 bit data instructions. +##include "data64.isa" + //Templates for predicated instructions ##include "pred.isa" //Templates for memory instructions ##include "mem.isa" +//Templates for AArch64 memory instructions +##include "mem64.isa" + //Miscellaneous instructions that don't fit elsewhere ##include "misc.isa" +##include "misc64.isa" //Templates for microcoded memory instructions ##include "macromem.isa" //Templates for branches ##include "branch.isa" +##include "branch64.isa" //Templates for multiplies ##include "mult.isa" //Templates for VFP instructions ##include "vfp.isa" +##include "vfp64.isa" //Templates for Neon instructions ##include "neon.isa" + +##include "neon64.isa" diff --git a/src/arch/arm/isa/templates/vfp.isa b/src/arch/arm/isa/templates/vfp.isa index 90dd751ff..176b6604c 100644 --- a/src/arch/arm/isa/templates/vfp.isa +++ b/src/arch/arm/isa/templates/vfp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -39,32 +39,117 @@ let {{ vfpEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr, Fpexc)) - return disabledFault(); + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase(), Fpexc)) + {return disabledFault();} + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } + ''' + + vfp64EnabledCheckCode = ''' + CPSR cpsrEnCheck = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsrEnCheck.el; + if (!vfpNeon64Enabled(Cpacr64, el)) + return new SupervisorTrap(machInst, 0x1E00000, + EC_TRAPPED_SIMD_FP); + + if (ArmSystem::haveVirtualization(xc->tcBase()) && el <= EL2) { + HCPTR cptrEnCheck = xc->tcBase()->readMiscReg(MISCREG_CPTR_EL2); + if (cptrEnCheck.tfp) + return new HypervisorTrap(machInst, 0x1E00000, + EC_TRAPPED_SIMD_FP); + } + + if (ArmSystem::haveSecurity(xc->tcBase())) { + HCPTR cptrEnCheck = xc->tcBase()->readMiscReg(MISCREG_CPTR_EL3); + if (cptrEnCheck.tfp) + return new SecureMonitorTrap(machInst, 0x1E00000, + EC_TRAPPED_SIMD_FP); + } ''' vmsrEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr)) + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase())) if (dest != (int)MISCREG_FPEXC && dest != (int)MISCREG_FPSID) - return disabledFault(); + {return disabledFault();} if (!inPrivilegedMode(Cpsr)) if (dest != (int)MISCREG_FPSCR) return disabledFault(); - + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } ''' vmrsEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr)) + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase())) if (op1 != (int)MISCREG_FPEXC && op1 != (int)MISCREG_FPSID && op1 != (int)MISCREG_MVFR0 && op1 != (int)MISCREG_MVFR1) - return disabledFault(); + {return disabledFault();} if (!inPrivilegedMode(Cpsr)) if (op1 != (int)MISCREG_FPSCR) return disabledFault(); + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } ''' vmrsApsrEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr)) - return disabledFault(); + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase())) + {return disabledFault();} + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } ''' }}; diff --git a/src/arch/arm/isa/templates/vfp64.isa b/src/arch/arm/isa/templates/vfp64.isa new file mode 100644 index 000000000..518cedaae --- /dev/null +++ b/src/arch/arm/isa/templates/vfp64.isa @@ -0,0 +1,140 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2012 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Thomas Grocutt + +def template AA64FpRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegImmOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm, VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegImmOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + uint64_t _imm, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegRegRegOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop); + %(BasicExecDeclare)s +}; +}}; + +def template AA64FpRegRegRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + IntRegIndex _op3, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _op3, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; |