summaryrefslogtreecommitdiff
path: root/src/arch/arm/isa
diff options
context:
space:
mode:
authorGabe Black <gblack@eecs.umich.edu>2010-08-25 19:10:42 -0500
committerGabe Black <gblack@eecs.umich.edu>2010-08-25 19:10:42 -0500
commit6368edb281f162e4fbb0a91744992a25134135f4 (patch)
treee84dfa7d10903e6c7a56e01cc6ca23f4b0d41908 /src/arch/arm/isa
parentf4f6b31df1a8787a12d71108eac24543bdf541e3 (diff)
downloadgem5-6368edb281f162e4fbb0a91744992a25134135f4.tar.xz
ARM: Implement all ARM SIMD instructions.
Diffstat (limited to 'src/arch/arm/isa')
-rw-r--r--src/arch/arm/isa/decoder/thumb.isa2
-rw-r--r--src/arch/arm/isa/formats/fp.isa1643
-rw-r--r--src/arch/arm/isa/insts/fp.isa176
-rw-r--r--src/arch/arm/isa/insts/insts.isa5
-rw-r--r--src/arch/arm/isa/insts/macromem.isa499
-rw-r--r--src/arch/arm/isa/insts/neon.isa3343
-rw-r--r--src/arch/arm/isa/operands.isa26
-rw-r--r--src/arch/arm/isa/templates/macromem.isa192
-rw-r--r--src/arch/arm/isa/templates/mem.isa200
-rw-r--r--src/arch/arm/isa/templates/neon.isa227
-rw-r--r--src/arch/arm/isa/templates/templates.isa3
11 files changed, 5863 insertions, 453 deletions
diff --git a/src/arch/arm/isa/decoder/thumb.isa b/src/arch/arm/isa/decoder/thumb.isa
index 65ea7e30c..d0f5b8646 100644
--- a/src/arch/arm/isa/decoder/thumb.isa
+++ b/src/arch/arm/isa/decoder/thumb.isa
@@ -88,7 +88,7 @@ decode BIGTHUMB {
0xf: McrMrc15::mcrMrc15();
}
}
- 0x3: WarnUnimpl::Advanced_SIMD();
+ 0x3: ThumbNeonData::ThumbNeonData();
default: decode LTCOPROC {
0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStre();
0xf: decode HTOPCODE_9_4 {
diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa
index 0a5f77e6e..1482c2119 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -45,6 +45,52 @@
// Floating Point operate instructions
//
+output header {{
+
+ template<template <typename T> class Base>
+ StaticInstPtr
+ newNeonMemInst(const unsigned size,
+ const ExtMachInst &machInst,
+ const RegIndex dest, const RegIndex ra,
+ const uint32_t imm, const unsigned extraMemFlags)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint8_t>(machInst, dest, ra, imm, extraMemFlags);
+ case 1:
+ return new Base<uint16_t>(machInst, dest, ra, imm, extraMemFlags);
+ case 2:
+ return new Base<uint32_t>(machInst, dest, ra, imm, extraMemFlags);
+ case 3:
+ return new Base<uint64_t>(machInst, dest, ra, imm, extraMemFlags);
+ default:
+ panic("Unrecognized width %d for Neon mem inst.\n", (1 << size));
+ }
+ }
+
+ template<template <typename T> class Base>
+ StaticInstPtr
+ newNeonMixInst(const unsigned size,
+ const ExtMachInst &machInst,
+ const RegIndex dest, const RegIndex op1,
+ const uint32_t step)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint8_t>(machInst, dest, op1, step);
+ case 1:
+ return new Base<uint16_t>(machInst, dest, op1, step);
+ case 2:
+ return new Base<uint32_t>(machInst, dest, op1, step);
+ case 3:
+ return new Base<uint64_t>(machInst, dest, op1, step);
+ default:
+ panic("Unrecognized width %d for Neon mem inst.\n", (1 << size));
+ }
+ }
+
+}};
+
let {{
header_output = '''
StaticInstPtr
@@ -59,116 +105,233 @@ let {{
decodeNeonMem(ExtMachInst machInst)
{
const uint32_t b = bits(machInst, 11, 8);
- const bool a = bits(machInst, 23);
- const bool l = bits(machInst, 21);
+ const bool single = bits(machInst, 23);
+ const bool singleAll = single && (bits(b, 3, 2) == 3);
+ const bool load = bits(machInst, 21);
- if (l) {
- // Load instructions.
- if (a) {
- if (bits(b, 3, 2) != 3) {
- switch (bits(b, 1, 0)) {
- case 0x0:
- return new WarnUnimplemented("vld1 single", machInst);
- case 0x1:
- return new WarnUnimplemented("vld2 single", machInst);
- case 0x2:
- return new WarnUnimplemented("vld3 single", machInst);
- case 0x3:
- return new WarnUnimplemented("vld4 single", machInst);
- }
- } else {
- switch (bits(b, 1, 0)) {
- case 0x0:
- return new WarnUnimplemented("vld1 single all",
- machInst);
- case 0x1:
- return new WarnUnimplemented("vld2 single all",
- machInst);
- case 0x2:
- return new WarnUnimplemented("vld3 single all",
- machInst);
- case 0x3:
- return new WarnUnimplemented("vld4 single all",
- machInst);
+ unsigned width = 0;
+
+ if (single) {
+ width = bits(b, 1, 0) + 1;
+ } else {
+ switch (bits(b, 3, 1)) {
+ case 0x0: width = 4;
+ break;
+ case 0x1: width = (b & 0x1) ? 2 : 1;
+ break;
+ case 0x2: width = 3;
+ break;
+ case 0x3: width = 1;
+ break;
+ case 0x4: width = 2;
+ break;
+ case 0x5:
+ if ((b & 0x1) == 0) {
+ width = 1;
+ break;
+ }
+ // Fall through on purpose.
+ default:
+ return new Unknown(machInst);
+ }
+ }
+ assert(width > 0 && width <= 4);
+
+ const RegIndex rm = (RegIndex)(uint32_t)bits(machInst, 3, 0);
+ const RegIndex rn = (RegIndex)(uint32_t)bits(machInst, 19, 16);
+ const RegIndex vd = (RegIndex)(uint32_t)(bits(machInst, 15, 12) |
+ bits(machInst, 22) << 4);
+ const uint32_t type = bits(machInst, 11, 8);
+ uint32_t size = 0;
+ uint32_t align = 0;
+ unsigned inc = 1;
+ unsigned regs = 1;
+ unsigned lane = 0;
+ if (single) {
+ if (singleAll) {
+ size = bits(machInst, 7, 6);
+ bool t = bits(machInst, 5);
+ unsigned eBytes = (1 << size);
+ align = (eBytes - 1) | TLB::AllowUnaligned;
+ if (width == 1) {
+ regs = t ? 2 : 1;
+ inc = 1;
+ } else {
+ regs = width;
+ inc = t ? 2 : 1;
+ }
+ switch (width) {
+ case 1:
+ case 2:
+ if (bits(machInst, 4))
+ align = width * eBytes - 1;
+ break;
+ case 3:
+ break;
+ case 4:
+ if (size == 3) {
+ if (bits(machInst, 4) == 0)
+ return new Unknown(machInst);
+ size = 2;
+ align = 0xf;
+ } else if (size == 2) {
+ if (bits(machInst, 4))
+ align = 7;
+ } else {
+ if (bits(machInst, 4))
+ align = 4 * eBytes - 1;
}
+ break;
}
} else {
- switch (bits(b, 3, 1)) {
- case 0x0:
- return new WarnUnimplemented("vld4 multiple", machInst);
- case 0x2:
- return new WarnUnimplemented("vld3 multiple", machInst);
- case 0x3:
- return new WarnUnimplemented("vld1 multiple", machInst);
- case 0x4:
- return new WarnUnimplemented("vld2 multiple", machInst);
- case 0x1:
- if (b & 0x1) {
- return new WarnUnimplemented("vld2 multiple", machInst);
- } else {
- return new WarnUnimplemented("vld1 multiple", machInst);
+ size = bits(machInst, 11, 10);
+ unsigned eBytes = (1 << size);
+ align = (eBytes - 1) | TLB::AllowUnaligned;
+ regs = width;
+ unsigned indexAlign = bits(machInst, 7, 4);
+ // If width is 1, inc is always 1. That's overridden later.
+ switch (size) {
+ case 0:
+ inc = 1;
+ lane = bits(indexAlign, 3, 1);
+ break;
+ case 1:
+ inc = bits(indexAlign, 1) ? 2 : 1;
+ lane = bits(indexAlign, 3, 2);
+ break;
+ case 2:
+ inc = bits(indexAlign, 2) ? 2 : 1;
+ lane = bits(indexAlign, 3);
+ break;
+ }
+ // Override inc for width of 1.
+ if (width == 1) {
+ inc = 1;
+ }
+ switch (width) {
+ case 1:
+ switch (size) {
+ case 0:
+ break;
+ case 1:
+ if (bits(indexAlign, 0))
+ align = 1;
+ break;
+ case 2:
+ if (bits(indexAlign, 1, 0))
+ align = 3;
+ break;
}
- case 0x5:
- if ((b & 0x1) == 0) {
- return new WarnUnimplemented("vld1 multiple", machInst);
- } else {
+ break;
+ case 2:
+ if (bits(indexAlign, 0))
+ align = (2 * eBytes) - 1;
+ break;
+ case 3:
+ break;
+ case 4:
+ switch (size) {
+ case 0:
+ case 1:
+ if (bits(indexAlign, 0))
+ align = (4 * eBytes) - 1;
+ break;
+ case 2:
+ if (bits(indexAlign, 0))
+ align = (4 << bits(indexAlign, 1, 0)) - 1;
break;
}
+ break;
}
}
+ if (size == 0x3) {
+ return new Unknown(machInst);
+ }
} else {
- // Store instructions.
- if (a) {
- if (bits(b, 3, 2) != 3) {
- switch (bits(b, 1, 0)) {
- case 0x0:
- return new WarnUnimplemented("vst1 single", machInst);
- case 0x1:
- return new WarnUnimplemented("vst2 single", machInst);
- case 0x2:
- return new WarnUnimplemented("vst3 single", machInst);
- case 0x3:
- return new WarnUnimplemented("vst4 single", machInst);
- }
- } else {
- switch (bits(b, 1, 0)) {
- case 0x0:
- return new WarnUnimplemented("vst1 single all",
- machInst);
- case 0x1:
- return new WarnUnimplemented("vst2 single all",
- machInst);
- case 0x2:
- return new WarnUnimplemented("vst3 single all",
- machInst);
- case 0x3:
- return new WarnUnimplemented("vst4 single all",
- machInst);
- }
+ size = bits(machInst, 7, 6);
+ align = bits(machInst, 5, 4);
+ if (align == 0) {
+ // @align wasn't specified, so alignment can be turned off.
+ align = ((1 << size) - 1) | TLB::AllowUnaligned;
+ } else {
+ align = ((4 << align) - 1);
+ }
+ switch (width) {
+ case 1:
+ switch (type) {
+ case 0x7: regs = 1;
+ break;
+ case 0xa: regs = 2;
+ break;
+ case 0x6: regs = 3;
+ break;
+ case 0x2: regs = 4;
+ break;
+ default:
+ return new Unknown(machInst);
}
+ break;
+ case 2:
+ // Regs doesn't behave exactly as it does in the manual
+ // because they loop over regs registers twice and we break
+ // it down in the macroop.
+ switch (type) {
+ case 0x8: regs = 2; inc = 1;
+ break;
+ case 0x9: regs = 2; inc = 2;
+ break;
+ case 0x3: regs = 4; inc = 2;
+ break;
+ default:
+ return new Unknown(machInst);
+ }
+ break;
+ case 3:
+ regs = 3;
+ switch (type) {
+ case 0x4: inc = 1;
+ break;
+ case 0x5: inc = 2;;
+ break;
+ default:
+ return new Unknown(machInst);
+ }
+ break;
+ case 4:
+ regs = 4;
+ switch (type) {
+ case 0: inc = 1;
+ break;
+ case 1: inc = 2;
+ break;
+ default:
+ return new Unknown(machInst);
+ }
+ break;
+ }
+ }
+
+ if (load) {
+ // Load instructions.
+ if (single) {
+ return new VldSingle(machInst, singleAll, width, rn, vd,
+ regs, inc, size, align, rm, lane);
} else {
- switch (bits(b, 3, 1)) {
- case 0x0:
- return new WarnUnimplemented("vst4 multiple", machInst);
- case 0x2:
- return new WarnUnimplemented("vst3 multiple", machInst);
- case 0x3:
- return new WarnUnimplemented("vst1 multiple", machInst);
- case 0x4:
- return new WarnUnimplemented("vst2 multiple", machInst);
- case 0x1:
- if (b & 0x1) {
- return new WarnUnimplemented("vst2 multiple", machInst);
- } else {
- return new WarnUnimplemented("vst1 multiple", machInst);
- }
- case 0x5:
- if ((b & 0x1) == 0) {
- return new WarnUnimplemented("vst1 multiple", machInst);
- } else {
- break;
- }
+ return new VldMult(machInst, width, rn, vd,
+ regs, inc, size, align, rm);
+ }
+ } else {
+ // Store instructions.
+ if (single) {
+ if (singleAll) {
+ return new Unknown(machInst);
+ } else {
+ return new VstSingle(machInst, false, width, rn, vd,
+ regs, inc, size, align, rm, lane);
}
+ } else {
+ return new VstMult(machInst, width, rn, vd,
+ regs, inc, size, align, rm);
}
}
return new Unknown(machInst);
@@ -183,153 +346,243 @@ let {{
const uint32_t a = bits(machInst, 11, 8);
const bool b = bits(machInst, 4);
const uint32_t c = bits(machInst, 21, 20);
+ const IntRegIndex vd =
+ (IntRegIndex)(2 * (bits(machInst, 15, 12) |
+ (bits(machInst, 22) << 4)));
+ const IntRegIndex vn =
+ (IntRegIndex)(2 * (bits(machInst, 19, 16) |
+ (bits(machInst, 7) << 4)));
+ const IntRegIndex vm =
+ (IntRegIndex)(2 * (bits(machInst, 3, 0) |
+ (bits(machInst, 5) << 4)));
+ const unsigned size = bits(machInst, 21, 20);
+ const bool q = bits(machInst, 6);
+ if (q && ((vd & 0x1) || (vn & 0x1) || (vm & 0x1)))
+ return new Unknown(machInst);
switch (a) {
case 0x0:
if (b) {
- if (bits(machInst, 9) == 0) {
- return new WarnUnimplemented("vhadd", machInst);
+ if (u) {
+ return decodeNeonUThreeReg<VqaddUD, VqaddUQ>(
+ q, size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vhsub", machInst);
+ return decodeNeonSThreeReg<VqaddSD, VqaddSQ>(
+ q, size, machInst, vd, vn, vm);
}
} else {
- return new WarnUnimplemented("vqadd", machInst);
+ if (size == 3)
+ return new Unknown(machInst);
+ return decodeNeonUSThreeReg<VhaddD, VhaddQ>(
+ q, u, size, machInst, vd, vn, vm);
}
case 0x1:
if (!b) {
- return new WarnUnimplemented("vrhadd", machInst);
+ return decodeNeonUSThreeReg<VrhaddD, VrhaddQ>(
+ q, u, size, machInst, vd, vn, vm);
} else {
if (u) {
switch (c) {
case 0:
- return new WarnUnimplemented("veor", machInst);
+ if (q) {
+ return new VeorQ<uint64_t>(machInst, vd, vn, vm);
+ } else {
+ return new VeorD<uint64_t>(machInst, vd, vn, vm);
+ }
case 1:
- return new WarnUnimplemented("vbsl", machInst);
+ if (q) {
+ return new VbslQ<uint64_t>(machInst, vd, vn, vm);
+ } else {
+ return new VbslD<uint64_t>(machInst, vd, vn, vm);
+ }
case 2:
- return new WarnUnimplemented("vbit", machInst);
+ if (q) {
+ return new VbitQ<uint64_t>(machInst, vd, vn, vm);
+ } else {
+ return new VbitD<uint64_t>(machInst, vd, vn, vm);
+ }
case 3:
- return new WarnUnimplemented("vbif", machInst);
+ if (q) {
+ return new VbifQ<uint64_t>(machInst, vd, vn, vm);
+ } else {
+ return new VbifD<uint64_t>(machInst, vd, vn, vm);
+ }
}
} else {
switch (c) {
case 0:
- return new WarnUnimplemented("vand (reg)", machInst);
+ if (q) {
+ return new VandQ<uint64_t>(machInst, vd, vn, vm);
+ } else {
+ return new VandD<uint64_t>(machInst, vd, vn, vm);
+ }
case 1:
- return new WarnUnimplemented("vbic (reg)", machInst);
+ if (q) {
+ return new VbicQ<uint64_t>(machInst, vd, vn, vm);
+ } else {
+ return new VbicD<uint64_t>(machInst, vd, vn, vm);
+ }
case 2:
- {
- const IntRegIndex n = (IntRegIndex)(
- (uint32_t)bits(machInst, 19, 16) |
- (uint32_t)(bits(machInst, 7) << 4));
- const IntRegIndex m = (IntRegIndex)(
- (uint32_t)bits(machInst, 3, 0) |
- (uint32_t)(bits(machInst, 5) << 4));
- if (n == m) {
- return new WarnUnimplemented("vmov (reg)",
- machInst);
+ if (vn == vm) {
+ if (q) {
+ return new VmovQ<uint64_t>(
+ machInst, vd, vn, vm);
+ } else {
+ return new VmovD<uint64_t>(
+ machInst, vd, vn, vm);
+ }
+ } else {
+ if (q) {
+ return new VorrQ<uint64_t>(
+ machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vorr (reg)",
- machInst);
+ return new VorrD<uint64_t>(
+ machInst, vd, vn, vm);
}
}
case 3:
- return new WarnUnimplemented("vorn (reg)", machInst);
+ if (q) {
+ return new VornQ<uint64_t>(
+ machInst, vd, vn, vm);
+ } else {
+ return new VornD<uint64_t>(
+ machInst, vd, vn, vm);
+ }
}
}
}
case 0x2:
if (b) {
- return new WarnUnimplemented("vqsub", machInst);
- } else {
- if (bits(machInst, 9) == 0) {
- return new WarnUnimplemented("vhadd", machInst);
+ if (u) {
+ return decodeNeonUThreeReg<VqsubUD, VqsubUQ>(
+ q, size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vhsub", machInst);
+ return decodeNeonSThreeReg<VqsubSD, VqsubSQ>(
+ q, size, machInst, vd, vn, vm);
}
+ } else {
+ if (size == 3)
+ return new Unknown(machInst);
+ return decodeNeonUSThreeReg<VhsubD, VhsubQ>(
+ q, u, size, machInst, vd, vn, vm);
}
case 0x3:
if (b) {
- return new WarnUnimplemented("vcge (reg)", machInst);
+ return decodeNeonUSThreeReg<VcgeD, VcgeQ>(
+ q, u, size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vcgt (reg)", machInst);
+ return decodeNeonUSThreeReg<VcgtD, VcgtQ>(
+ q, u, size, machInst, vd, vn, vm);
}
case 0x4:
if (b) {
- return new WarnUnimplemented("vqshl (reg)", machInst);
+ if (u) {
+ return decodeNeonUThreeReg<VqshlUD, VqshlUQ>(
+ q, size, machInst, vd, vm, vn);
+ } else {
+ return decodeNeonSThreeReg<VqshlSD, VqshlSQ>(
+ q, size, machInst, vd, vm, vn);
+ }
} else {
- return new WarnUnimplemented("vshl (reg)", machInst);
+ return decodeNeonUSThreeReg<VshlD, VshlQ>(
+ q, u, size, machInst, vd, vm, vn);
}
case 0x5:
if (b) {
- return new WarnUnimplemented("vqrshl", machInst);
+ if (u) {
+ return decodeNeonUThreeReg<VqrshlUD, VqrshlUQ>(
+ q, size, machInst, vd, vm, vn);
+ } else {
+ return decodeNeonSThreeReg<VqrshlSD, VqrshlSQ>(
+ q, size, machInst, vd, vm, vn);
+ }
} else {
- return new WarnUnimplemented("vrshl", machInst);
+ return decodeNeonUSThreeReg<VrshlD, VrshlQ>(
+ q, u, size, machInst, vd, vm, vn);
}
case 0x6:
if (b) {
- return new WarnUnimplemented("vmin (int)", machInst);
+ return decodeNeonUSThreeReg<VminD, VminQ>(
+ q, u, size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vmax (int)", machInst);
+ return decodeNeonUSThreeReg<VmaxD, VmaxQ>(
+ q, u, size, machInst, vd, vn, vm);
}
case 0x7:
if (b) {
- return new WarnUnimplemented("vaba", machInst);
+ return decodeNeonUSThreeReg<VabaD, VabaQ>(
+ q, u, size, machInst, vd, vn, vm);
} else {
if (bits(machInst, 23) == 1) {
- if (bits(machInst, 6) == 1) {
+ if (q) {
return new Unknown(machInst);
} else {
- return new WarnUnimplemented("vabdl (int)", machInst);
+ return decodeNeonUSThreeUSReg<Vabdl>(
+ u, size, machInst, vd, vn, vm);
}
} else {
- return new WarnUnimplemented("vabd (int)", machInst);
+ return decodeNeonUSThreeReg<VabdD, VabdQ>(
+ q, u, size, machInst, vd, vn, vm);
}
}
case 0x8:
if (b) {
if (u) {
- return new WarnUnimplemented("vceq (reg)", machInst);
+ return decodeNeonUThreeReg<VceqD, VceqQ>(
+ q, size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vtst", machInst);
+ return decodeNeonUThreeReg<VtstD, VtstQ>(
+ q, size, machInst, vd, vn, vm);
}
} else {
if (u) {
- return new WarnUnimplemented("vsub (int)", machInst);
+ return decodeNeonUThreeReg<NVsubD, NVsubQ>(
+ q, size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vadd (int)", machInst);
+ return decodeNeonUThreeReg<NVaddD, NVaddQ>(
+ q, size, machInst, vd, vn, vm);
}
}
case 0x9:
if (b) {
if (u) {
- return new WarnUnimplemented("vmul (poly)", machInst);
+ return decodeNeonUThreeReg<NVmulpD, NVmulpQ>(
+ q, size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vmul (int)", machInst);
+ return decodeNeonSThreeReg<NVmulD, NVmulQ>(
+ q, size, machInst, vd, vn, vm);
}
} else {
if (u) {
- return new WarnUnimplemented("vmls (int)", machInst);
+ return decodeNeonUSThreeReg<NVmlsD, NVmlsQ>(
+ q, u, size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vmla (int)", machInst);
+ return decodeNeonUSThreeReg<NVmlaD, NVmlaQ>(
+ q, u, size, machInst, vd, vn, vm);
}
}
case 0xa:
if (b) {
- return new WarnUnimplemented("vpmin (int)", machInst);
+ return decodeNeonUSThreeReg<VpminD, VpminQ>(
+ q, u, size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vpmax (int)", machInst);
+ return decodeNeonUSThreeReg<VpmaxD, VpmaxQ>(
+ q, u, size, machInst, vd, vn, vm);
}
case 0xb:
if (b) {
if (u) {
return new Unknown(machInst);
} else {
- return new WarnUnimplemented("vpadd (int)", machInst);
+ return decodeNeonUThreeReg<NVpaddD, NVpaddQ>(
+ q, size, machInst, vd, vn, vm);
}
} else {
if (u) {
- return new WarnUnimplemented("vqrdmulh", machInst);
+ return decodeNeonSThreeSReg<VqrdmulhD, VqrdmulhQ>(
+ q, size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vqdmulh", machInst);
+ return decodeNeonSThreeSReg<VqdmulhD, VqdmulhQ>(
+ q, size, machInst, vd, vn, vm);
}
}
case 0xc:
@@ -338,29 +591,57 @@ let {{
if (b) {
if (u) {
if (bits(c, 1) == 0) {
- return new WarnUnimplemented("vmul (fp)", machInst);
+ if (q) {
+ return new NVmulQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new NVmulDFp<float>(machInst, vd, vn, vm);
+ }
} else {
return new Unknown(machInst);
}
} else {
if (bits(c, 1) == 0) {
- return new WarnUnimplemented("vmla (fp)", machInst);
+ if (q) {
+ return new NVmlaQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new NVmlaDFp<float>(machInst, vd, vn, vm);
+ }
} else {
- return new WarnUnimplemented("vmls (fp)", machInst);
+ if (q) {
+ return new NVmlsQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new NVmlsDFp<float>(machInst, vd, vn, vm);
+ }
}
}
} else {
if (u) {
if (bits(c, 1) == 0) {
- return new WarnUnimplemented("vpadd (fp)", machInst);
+ if (q) {
+ return new VpaddQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VpaddDFp<float>(machInst, vd, vn, vm);
+ }
} else {
- return new WarnUnimplemented("vabd (fp)", machInst);
+ if (q) {
+ return new VabdQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VabdDFp<float>(machInst, vd, vn, vm);
+ }
}
} else {
if (bits(c, 1) == 0) {
- return new WarnUnimplemented("vadd (fp)", machInst);
+ if (q) {
+ return new VaddQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VaddDFp<float>(machInst, vd, vn, vm);
+ }
} else {
- return new WarnUnimplemented("vsub (fp)", machInst);
+ if (q) {
+ return new VsubQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VsubDFp<float>(machInst, vd, vn, vm);
+ }
}
}
}
@@ -368,9 +649,17 @@ let {{
if (b) {
if (u) {
if (bits(c, 1) == 0) {
- return new WarnUnimplemented("vacge", machInst);
+ if (q) {
+ return new VacgeQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VacgeDFp<float>(machInst, vd, vn, vm);
+ }
} else {
- return new WarnUnimplemented("vacgt", machInst);
+ if (q) {
+ return new VacgtQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VacgtDFp<float>(machInst, vd, vn, vm);
+ }
}
} else {
return new Unknown(machInst);
@@ -378,13 +667,25 @@ let {{
} else {
if (u) {
if (bits(c, 1) == 0) {
- return new WarnUnimplemented("vcge (reg)", machInst);
+ if (q) {
+ return new VcgeQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VcgeDFp<float>(machInst, vd, vn, vm);
+ }
} else {
- return new WarnUnimplemented("vcgt (reg)", machInst);
+ if (q) {
+ return new VcgtQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VcgtDFp<float>(machInst, vd, vn, vm);
+ }
}
} else {
if (bits(c, 1) == 0) {
- return new WarnUnimplemented("vceq (reg)", machInst);
+ if (q) {
+ return new VceqQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VceqDFp<float>(machInst, vd, vn, vm);
+ }
} else {
return new Unknown(machInst);
}
@@ -396,23 +697,47 @@ let {{
return new Unknown(machInst);
} else {
if (bits(c, 1) == 0) {
- return new WarnUnimplemented("vrecps", machInst);
+ if (q) {
+ return new VrecpsQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VrecpsDFp<float>(machInst, vd, vn, vm);
+ }
} else {
- return new WarnUnimplemented("vrsqrts", machInst);
+ if (q) {
+ return new VrsqrtsQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VrsqrtsDFp<float>(machInst, vd, vn, vm);
+ }
}
}
} else {
if (u) {
if (bits(c, 1) == 0) {
- return new WarnUnimplemented("vpmax (fp)", machInst);
+ if (q) {
+ return new VpmaxQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VpmaxDFp<float>(machInst, vd, vn, vm);
+ }
} else {
- return new WarnUnimplemented("vpmin (fp)", machInst);
+ if (q) {
+ return new VpminQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VpminDFp<float>(machInst, vd, vn, vm);
+ }
}
} else {
if (bits(c, 1) == 0) {
- return new WarnUnimplemented("vmax (fp)", machInst);
+ if (q) {
+ return new VmaxQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VmaxDFp<float>(machInst, vd, vn, vm);
+ }
} else {
- return new WarnUnimplemented("vmin (fp)", machInst);
+ if (q) {
+ return new VminQFp<float>(machInst, vd, vn, vm);
+ } else {
+ return new VminDFp<float>(machInst, vd, vn, vm);
+ }
}
}
}
@@ -423,50 +748,94 @@ let {{
static StaticInstPtr
decodeNeonOneRegModImm(ExtMachInst machInst)
{
+ const IntRegIndex vd =
+ (IntRegIndex)(2 * (bits(machInst, 15, 12) |
+ (bits(machInst, 22) << 4)));
+ const bool q = bits(machInst, 6);
const bool op = bits(machInst, 5);
- const uint32_t cmode = bits(machInst, 11, 8);
+ const uint8_t cmode = bits(machInst, 11, 8);
+ const uint8_t imm = ((THUMB ? bits(machInst, 28) :
+ bits(machInst, 24)) << 7) |
+ (bits(machInst, 18, 16) << 4) |
+ (bits(machInst, 3, 0) << 0);
+ const uint64_t bigImm = simd_modified_imm(op, cmode, imm);
if (op) {
if (bits(cmode, 3) == 0) {
if (bits(cmode, 0) == 0) {
- return new WarnUnimplemented("vmov (imm)", machInst);
+ if (q)
+ return new NVmvniQ<uint64_t>(machInst, vd, bigImm);
+ else
+ return new NVmvniD<uint64_t>(machInst, vd, bigImm);
} else {
- return new WarnUnimplemented("vorr (imm)", machInst);
+ if (q)
+ return new NVbiciQ<uint64_t>(machInst, vd, bigImm);
+ else
+ return new NVbiciD<uint64_t>(machInst, vd, bigImm);
}
} else {
if (bits(cmode, 2) == 1) {
- return new WarnUnimplemented("vmov (imm)", machInst);
+ switch (bits(cmode, 1, 0)) {
+ case 0:
+ case 1:
+ if (q)
+ return new NVmvniQ<uint64_t>(machInst, vd, bigImm);
+ else
+ return new NVmvniD<uint64_t>(machInst, vd, bigImm);
+ case 2:
+ if (q)
+ return new NVmoviQ<uint64_t>(machInst, vd, bigImm);
+ else
+ return new NVmoviD<uint64_t>(machInst, vd, bigImm);
+ case 3:
+ if (q)
+ return new Unknown(machInst);
+ else
+ return new Unknown(machInst);
+ }
} else {
if (bits(cmode, 0) == 0) {
- return new WarnUnimplemented("vmov (imm)", machInst);
+ if (q)
+ return new NVmvniQ<uint64_t>(machInst, vd, bigImm);
+ else
+ return new NVmvniD<uint64_t>(machInst, vd, bigImm);
} else {
- return new WarnUnimplemented("vorr (imm)", machInst);
+ if (q)
+ return new NVbiciQ<uint64_t>(machInst, vd, bigImm);
+ else
+ return new NVbiciD<uint64_t>(machInst, vd, bigImm);
}
}
}
} else {
if (bits(cmode, 3) == 0) {
if (bits(cmode, 0) == 0) {
- return new WarnUnimplemented("vmvn (imm)", machInst);
+ if (q)
+ return new NVmoviQ<uint64_t>(machInst, vd, bigImm);
+ else
+ return new NVmoviD<uint64_t>(machInst, vd, bigImm);
} else {
- return new WarnUnimplemented("vbic (imm)", machInst);
+ if (q)
+ return new NVorriQ<uint64_t>(machInst, vd, bigImm);
+ else
+ return new NVorriD<uint64_t>(machInst, vd, bigImm);
}
} else {
if (bits(cmode, 2) == 1) {
- switch (bits(cmode, 1, 0)) {
- case 0:
- case 1:
- return new WarnUnimplemented("vmvn (imm)", machInst);
- case 2:
- return new WarnUnimplemented("vmov (imm)", machInst);
- case 3:
- return new Unknown(machInst);
- }
- return new WarnUnimplemented("vmov (imm)", machInst);
+ if (q)
+ return new NVmoviQ<uint64_t>(machInst, vd, bigImm);
+ else
+ return new NVmoviD<uint64_t>(machInst, vd, bigImm);
} else {
if (bits(cmode, 0) == 0) {
- return new WarnUnimplemented("vmvn (imm)", machInst);
+ if (q)
+ return new NVmoviQ<uint64_t>(machInst, vd, bigImm);
+ else
+ return new NVmoviD<uint64_t>(machInst, vd, bigImm);
} else {
- return new WarnUnimplemented("vbic (imm)", machInst);
+ if (q)
+ return new NVorriQ<uint64_t>(machInst, vd, bigImm);
+ else
+ return new NVorriD<uint64_t>(machInst, vd, bigImm);
}
}
}
@@ -481,70 +850,149 @@ let {{
const bool u = THUMB ? bits(machInst, 28) : bits(machInst, 24);
const bool b = bits(machInst, 6);
const bool l = bits(machInst, 7);
+ const IntRegIndex vd =
+ (IntRegIndex)(2 * (bits(machInst, 15, 12) |
+ (bits(machInst, 22) << 4)));
+ const IntRegIndex vm =
+ (IntRegIndex)(2 * (bits(machInst, 3, 0) |
+ (bits(machInst, 5) << 4)));
+ unsigned imm6 = bits(machInst, 21, 16);
+ unsigned imm = ((l ? 1 : 0) << 6) | imm6;
+ unsigned size = 3;
+ unsigned lShiftAmt = 0;
+ unsigned bitSel;
+ for (bitSel = 1 << 6; true; bitSel >>= 1) {
+ if (bitSel & imm)
+ break;
+ else if (!size)
+ return new Unknown(machInst);
+ size--;
+ }
+ lShiftAmt = imm6 & ~bitSel;
+ unsigned rShiftAmt = 0;
+ if (a != 0xe && a != 0xf) {
+ if (size > 2)
+ rShiftAmt = 64 - imm6;
+ else
+ rShiftAmt = 2 * (8 << size) - imm6;
+ }
switch (a) {
case 0x0:
- return new WarnUnimplemented("vshr", machInst);
+ return decodeNeonUSTwoShiftReg<NVshrD, NVshrQ>(
+ b, u, size, machInst, vd, vm, rShiftAmt);
case 0x1:
- return new WarnUnimplemented("vsra", machInst);
+ return decodeNeonUSTwoShiftReg<NVsraD, NVsraQ>(
+ b, u, size, machInst, vd, vm, rShiftAmt);
case 0x2:
- return new WarnUnimplemented("vrshr", machInst);
+ return decodeNeonUSTwoShiftReg<NVrshrD, NVrshrQ>(
+ b, u, size, machInst, vd, vm, rShiftAmt);
case 0x3:
- return new WarnUnimplemented("vrsra", machInst);
+ return decodeNeonUSTwoShiftReg<NVrsraD, NVrsraQ>(
+ b, u, size, machInst, vd, vm, rShiftAmt);
case 0x4:
if (u) {
- return new WarnUnimplemented("vsri", machInst);
+ return decodeNeonUTwoShiftReg<NVsriD, NVsriQ>(
+ b, size, machInst, vd, vm, rShiftAmt);
} else {
return new Unknown(machInst);
}
case 0x5:
if (u) {
- return new WarnUnimplemented("vsli", machInst);
+ return decodeNeonUTwoShiftReg<NVsliD, NVsliQ>(
+ b, size, machInst, vd, vm, lShiftAmt);
} else {
- return new WarnUnimplemented("vshl (imm)", machInst);
+ return decodeNeonUTwoShiftReg<NVshlD, NVshlQ>(
+ b, size, machInst, vd, vm, lShiftAmt);
}
case 0x6:
case 0x7:
- return new WarnUnimplemented("vqshl, vqshlu (imm)", machInst);
+ if (u) {
+ if (a == 0x6) {
+ return decodeNeonSTwoShiftReg<NVqshlusD, NVqshlusQ>(
+ b, size, machInst, vd, vm, lShiftAmt);
+ } else {
+ return decodeNeonUTwoShiftReg<NVqshluD, NVqshluQ>(
+ b, size, machInst, vd, vm, lShiftAmt);
+ }
+ } else {
+ return decodeNeonSTwoShiftReg<NVqshlD, NVqshlQ>(
+ b, size, machInst, vd, vm, lShiftAmt);
+ }
case 0x8:
if (l) {
return new Unknown(machInst);
} else if (u) {
- if (b) {
- return new WarnUnimplemented("vqrshrn, vqrshrun", machInst);
- } else {
- return new WarnUnimplemented("vqshrn, vqshrun", machInst);
- }
+ return decodeNeonSTwoShiftSReg<NVqshruns, NVqrshruns>(
+ b, size, machInst, vd, vm, rShiftAmt);
} else {
- if (b) {
- return new WarnUnimplemented("vrshrn", machInst);
- } else {
- return new WarnUnimplemented("vshrn", machInst);
- }
+ return decodeNeonUTwoShiftSReg<NVshrn, NVrshrn>(
+ b, size, machInst, vd, vm, rShiftAmt);
}
case 0x9:
if (l) {
return new Unknown(machInst);
- } else if (b) {
- return new WarnUnimplemented("vqrshrn, vqrshrun", machInst);
+ } else if (u) {
+ return decodeNeonUTwoShiftSReg<NVqshrun, NVqrshrun>(
+ b, size, machInst, vd, vm, rShiftAmt);
} else {
- return new WarnUnimplemented("vqshrn, vqshrun", machInst);
+ return decodeNeonSTwoShiftSReg<NVqshrn, NVqrshrn>(
+ b, size, machInst, vd, vm, rShiftAmt);
}
case 0xa:
if (l || b) {
return new Unknown(machInst);
} else {
- // If the shift amount is zero, it's vmovl.
- return new WarnUnimplemented("vshll, vmovl", machInst);
+ return decodeNeonUSTwoShiftSReg<NVmovl, NVshll>(
+ lShiftAmt, u, size, machInst, vd, vm, lShiftAmt);
}
case 0xe:
+ if (l) {
+ return new Unknown(machInst);
+ } else {
+ if (bits(imm6, 5) == 0)
+ return new Unknown(machInst);
+ if (u) {
+ if (b) {
+ return new NVcvtu2fpQ<float>(
+ machInst, vd, vm, 64 - imm6);
+ } else {
+ return new NVcvtu2fpD<float>(
+ machInst, vd, vm, 64 - imm6);
+ }
+ } else {
+ if (b) {
+ return new NVcvts2fpQ<float>(
+ machInst, vd, vm, 64 - imm6);
+ } else {
+ return new NVcvts2fpD<float>(
+ machInst, vd, vm, 64 - imm6);
+ }
+ }
+ }
case 0xf:
if (l) {
return new Unknown(machInst);
- } else if (a == 0xe) {
- return new WarnUnimplemented("vcvt (fixed to fp)", machInst);
- } else if (a == 0xf) {
- return new WarnUnimplemented("vcvt (fp to fixed)", machInst);
+ } else {
+ if (bits(imm6, 5) == 0)
+ return new Unknown(machInst);
+ if (u) {
+ if (b) {
+ return new NVcvt2ufxQ<float>(
+ machInst, vd, vm, 64 - imm6);
+ } else {
+ return new NVcvt2ufxD<float>(
+ machInst, vd, vm, 64 - imm6);
+ }
+ } else {
+ if (b) {
+ return new NVcvt2sfxQ<float>(
+ machInst, vd, vm, 64 - imm6);
+ } else {
+ return new NVcvt2sfxD<float>(
+ machInst, vd, vm, 64 - imm6);
+ }
+ }
}
}
return new Unknown(machInst);
@@ -555,74 +1003,89 @@ let {{
{
const bool u = THUMB ? bits(machInst, 28) : bits(machInst, 24);
const uint32_t a = bits(machInst, 11, 8);
-
+ const IntRegIndex vd =
+ (IntRegIndex)(2 * (bits(machInst, 15, 12) |
+ (bits(machInst, 22) << 4)));
+ const IntRegIndex vn =
+ (IntRegIndex)(2 * (bits(machInst, 19, 16) |
+ (bits(machInst, 7) << 4)));
+ const IntRegIndex vm =
+ (IntRegIndex)(2 * (bits(machInst, 3, 0) |
+ (bits(machInst, 5) << 4)));
+ const unsigned size = bits(machInst, 21, 20);
switch (a) {
case 0x0:
- return new WarnUnimplemented("vaddl", machInst);
+ return decodeNeonUSThreeUSReg<Vaddl>(
+ u, size, machInst, vd, vn, vm);
case 0x1:
- return new WarnUnimplemented("vaddw", machInst);
+ return decodeNeonUSThreeUSReg<Vaddw>(
+ u, size, machInst, vd, vn, vm);
case 0x2:
- return new WarnUnimplemented("vsubl", machInst);
+ return decodeNeonUSThreeUSReg<Vsubl>(
+ u, size, machInst, vd, vn, vm);
case 0x3:
- return new WarnUnimplemented("vsubw", machInst);
+ return decodeNeonUSThreeUSReg<Vsubw>(
+ u, size, machInst, vd, vn, vm);
case 0x4:
if (u) {
- return new WarnUnimplemented("vraddhn", machInst);
+ return decodeNeonUThreeUSReg<Vraddhn>(
+ size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vaddhn", machInst);
+ return decodeNeonUThreeUSReg<Vaddhn>(
+ size, machInst, vd, vn, vm);
}
case 0x5:
- return new WarnUnimplemented("vabal", machInst);
+ return decodeNeonUSThreeUSReg<Vabal>(
+ u, size, machInst, vd, vn, vm);
case 0x6:
if (u) {
- return new WarnUnimplemented("vrsubhn", machInst);
+ return decodeNeonUThreeUSReg<Vrsubhn>(
+ size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vsubhn", machInst);
+ return decodeNeonUThreeUSReg<Vsubhn>(
+ size, machInst, vd, vn, vm);
}
case 0x7:
if (bits(machInst, 23)) {
- return new WarnUnimplemented("vabdl (int)", machInst);
+ return decodeNeonUSThreeUSReg<Vabdl>(
+ u, size, machInst, vd, vn, vm);
} else {
- return new WarnUnimplemented("vabd (int)", machInst);
+ return decodeNeonUSThreeReg<VabdD, VabdQ>(
+ bits(machInst, 6), u, size, machInst, vd, vn, vm);
}
case 0x8:
- return new WarnUnimplemented("vmlal (int)", machInst);
+ return decodeNeonUSThreeUSReg<Vmlal>(
+ u, size, machInst, vd, vn, vm);
case 0xa:
- return new WarnUnimplemented("vmlsl (int)", machInst);
+ return decodeNeonUSThreeUSReg<Vmlsl>(
+ u, size, machInst, vd, vn, vm);
case 0x9:
- if (bits(machInst, 23) == 0) {
- if (bits(machInst, 4) == 0) {
- if (u) {
- return new WarnUnimplemented("vmls (int)", machInst);
- } else {
- return new WarnUnimplemented("vmla (int)", machInst);
- }
- } else {
- if (u) {
- return new WarnUnimplemented("vmul (poly)", machInst);
- } else {
- return new WarnUnimplemented("vmul (int)", machInst);
- }
- }
+ if (u) {
+ return new Unknown(machInst);
} else {
- return new WarnUnimplemented("vqdmlal", machInst);
+ return decodeNeonSThreeUSReg<Vqdmlal>(
+ size, machInst, vd, vn, vm);
}
case 0xb:
- if (!u) {
+ if (u) {
return new Unknown(machInst);
} else {
- return new WarnUnimplemented("vqdmlsl", machInst);
+ return decodeNeonSThreeUSReg<Vqdmlsl>(
+ size, machInst, vd, vn, vm);
}
case 0xc:
- return new WarnUnimplemented("vmull (int)", machInst);
+ return decodeNeonUSThreeUSReg<Vmull>(
+ u, size, machInst, vd, vn, vm);
case 0xd:
- if (!u) {
+ if (u) {
return new Unknown(machInst);
} else {
- return new WarnUnimplemented("vqdmull", machInst);
+ return decodeNeonSThreeUSReg<Vqdmull>(
+ size, machInst, vd, vn, vm);
}
case 0xe:
- return new WarnUnimplemented("vmull (poly)", machInst);
+ return decodeNeonUThreeUSReg<Vmullp>(
+ size, machInst, vd, vn, vm);
}
return new Unknown(machInst);
}
@@ -632,48 +1095,256 @@ let {{
{
const bool u = THUMB ? bits(machInst, 28) : bits(machInst, 24);
const uint32_t a = bits(machInst, 11, 8);
-
+ const unsigned size = bits(machInst, 21, 20);
+ const IntRegIndex vd =
+ (IntRegIndex)(2 * (bits(machInst, 15, 12) |
+ (bits(machInst, 22) << 4)));
+ const IntRegIndex vn =
+ (IntRegIndex)(2 * (bits(machInst, 19, 16) |
+ (bits(machInst, 7) << 4)));
+ const IntRegIndex vm = (size == 2) ?
+ (IntRegIndex)(2 * bits(machInst, 3, 0)) :
+ (IntRegIndex)(2 * bits(machInst, 2, 0));
+ const unsigned index = (size == 2) ? (unsigned)bits(machInst, 5) :
+ (bits(machInst, 3) | (bits(machInst, 5) << 1));
switch (a) {
case 0x0:
- return new WarnUnimplemented("vmla (int scalar)", machInst);
+ if (u) {
+ switch (size) {
+ case 1:
+ return new VmlasQ<uint16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new VmlasQ<uint32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new VmlasD<uint16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new VmlasD<uint32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
case 0x1:
- return new WarnUnimplemented("vmla (fp scalar)", machInst);
+ if (u)
+ return new VmlasQFp<float>(machInst, vd, vn, vm, index);
+ else
+ return new VmlasDFp<float>(machInst, vd, vn, vm, index);
case 0x4:
- return new WarnUnimplemented("vmls (int scalar)", machInst);
+ if (u) {
+ switch (size) {
+ case 1:
+ return new VmlssQ<uint16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new VmlssQ<uint32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new VmlssD<uint16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new VmlssD<uint32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
case 0x5:
- return new WarnUnimplemented("vmls (fp scalar)", machInst);
+ if (u)
+ return new VmlssQFp<float>(machInst, vd, vn, vm, index);
+ else
+ return new VmlssDFp<float>(machInst, vd, vn, vm, index);
case 0x2:
- return new WarnUnimplemented("vmlal (scalar)", machInst);
+ if (u) {
+ switch (size) {
+ case 1:
+ return new Vmlals<uint16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new Vmlals<uint32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new Vmlals<int16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new Vmlals<int32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
case 0x6:
- return new WarnUnimplemented("vmlsl (scalar)", machInst);
+ if (u) {
+ switch (size) {
+ case 1:
+ return new Vmlsls<uint16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new Vmlsls<uint32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new Vmlsls<int16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new Vmlsls<int32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
case 0x3:
if (u) {
return new Unknown(machInst);
} else {
- return new WarnUnimplemented("vqdmlal", machInst);
+ switch (size) {
+ case 1:
+ return new Vqdmlals<int16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new Vqdmlals<int32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
}
case 0x7:
if (u) {
return new Unknown(machInst);
} else {
- return new WarnUnimplemented("vqdmlsl", machInst);
+ switch (size) {
+ case 1:
+ return new Vqdmlsls<int16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new Vqdmlsls<int32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
}
case 0x8:
- return new WarnUnimplemented("vmul (int scalar)", machInst);
+ if (u) {
+ switch (size) {
+ case 1:
+ return new VmulsQ<uint16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new VmulsQ<uint32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new VmulsD<uint16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new VmulsD<uint32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
case 0x9:
- return new WarnUnimplemented("vmul (fp scalar)", machInst);
+ if (u)
+ return new VmulsQFp<float>(machInst, vd, vn, vm, index);
+ else
+ return new VmulsDFp<float>(machInst, vd, vn, vm, index);
case 0xa:
- return new WarnUnimplemented("vmull (scalar)", machInst);
+ if (u) {
+ switch (size) {
+ case 1:
+ return new Vmulls<uint16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new Vmulls<uint32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new Vmulls<int16_t>(machInst, vd, vn, vm, index);
+ case 2:
+ return new Vmulls<int32_t>(machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
case 0xb:
if (u) {
return new Unknown(machInst);
} else {
- return new WarnUnimplemented("vqdmull", machInst);
+ if (u) {
+ switch (size) {
+ case 1:
+ return new Vqdmulls<uint16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new Vqdmulls<uint32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new Vqdmulls<int16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new Vqdmulls<int32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
}
case 0xc:
- return new WarnUnimplemented("vqdmulh", machInst);
+ if (u) {
+ switch (size) {
+ case 1:
+ return new VqdmulhsQ<int16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new VqdmulhsQ<int32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new VqdmulhsD<int16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new VqdmulhsD<int32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
case 0xd:
- return new WarnUnimplemented("vqrdmulh", machInst);
+ if (u) {
+ switch (size) {
+ case 1:
+ return new VqrdmulhsQ<int16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new VqrdmulhsQ<int32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new VqrdmulhsD<int16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new VqrdmulhsD<int32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
}
return new Unknown(machInst);
}
@@ -683,85 +1354,234 @@ let {{
{
const uint32_t a = bits(machInst, 17, 16);
const uint32_t b = bits(machInst, 10, 6);
+ const bool q = bits(machInst, 6);
+ const IntRegIndex vd =
+ (IntRegIndex)(2 * (bits(machInst, 15, 12) |
+ (bits(machInst, 22) << 4)));
+ const IntRegIndex vm =
+ (IntRegIndex)(2 * (bits(machInst, 3, 0) |
+ (bits(machInst, 5) << 4)));
+ const unsigned size = bits(machInst, 19, 18);
switch (a) {
case 0x0:
switch (bits(b, 4, 1)) {
case 0x0:
- return new WarnUnimplemented("vrev64", machInst);
+ switch (size) {
+ case 0:
+ if (q) {
+ return new NVrev64Q<uint8_t>(machInst, vd, vm);
+ } else {
+ return new NVrev64D<uint8_t>(machInst, vd, vm);
+ }
+ case 1:
+ if (q) {
+ return new NVrev64Q<uint16_t>(machInst, vd, vm);
+ } else {
+ return new NVrev64D<uint16_t>(machInst, vd, vm);
+ }
+ case 2:
+ if (q) {
+ return new NVrev64Q<uint32_t>(machInst, vd, vm);
+ } else {
+ return new NVrev64D<uint32_t>(machInst, vd, vm);
+ }
+ default:
+ return new Unknown(machInst);
+ }
case 0x1:
- return new WarnUnimplemented("vrev32", machInst);
+ switch (size) {
+ case 0:
+ if (q) {
+ return new NVrev32Q<uint8_t>(machInst, vd, vm);
+ } else {
+ return new NVrev32D<uint8_t>(machInst, vd, vm);
+ }
+ case 1:
+ if (q) {
+ return new NVrev32Q<uint16_t>(machInst, vd, vm);
+ } else {
+ return new NVrev32D<uint16_t>(machInst, vd, vm);
+ }
+ default:
+ return new Unknown(machInst);
+ }
case 0x2:
- return new WarnUnimplemented("vrev16", machInst);
+ if (size != 0) {
+ return new Unknown(machInst);
+ } else if (q) {
+ return new NVrev16Q<uint8_t>(machInst, vd, vm);
+ } else {
+ return new NVrev16D<uint8_t>(machInst, vd, vm);
+ }
case 0x4:
+ return decodeNeonSTwoMiscSReg<NVpaddlD, NVpaddlQ>(
+ q, size, machInst, vd, vm);
case 0x5:
- return new WarnUnimplemented("vpaddl", machInst);
+ return decodeNeonUTwoMiscSReg<NVpaddlD, NVpaddlQ>(
+ q, size, machInst, vd, vm);
case 0x8:
- return new WarnUnimplemented("vcls", machInst);
+ return decodeNeonSTwoMiscReg<NVclsD, NVclsQ>(
+ q, size, machInst, vd, vm);
case 0x9:
- return new WarnUnimplemented("vclz", machInst);
+ return decodeNeonSTwoMiscReg<NVclzD, NVclzQ>(
+ q, size, machInst, vd, vm);
case 0xa:
- return new WarnUnimplemented("vcnt", machInst);
+ return decodeNeonUTwoMiscReg<NVcntD, NVcntQ>(
+ q, size, machInst, vd, vm);
case 0xb:
- return new WarnUnimplemented("vmvn (reg)", machInst);
+ if (q)
+ return new NVmvnQ<uint64_t>(machInst, vd, vm);
+ else
+ return new NVmvnD<uint64_t>(machInst, vd, vm);
case 0xc:
+ return decodeNeonSTwoMiscSReg<NVpadalD, NVpadalQ>(
+ q, size, machInst, vd, vm);
case 0xd:
- return new WarnUnimplemented("vpadal", machInst);
+ return decodeNeonUTwoMiscSReg<NVpadalD, NVpadalQ>(
+ q, size, machInst, vd, vm);
case 0xe:
- return new WarnUnimplemented("vqabs", machInst);
+ return decodeNeonSTwoMiscReg<NVqabsD, NVqabsQ>(
+ q, size, machInst, vd, vm);
case 0xf:
- return new WarnUnimplemented("vqneg", machInst);
+ return decodeNeonSTwoMiscReg<NVqnegD, NVqnegQ>(
+ q, size, machInst, vd, vm);
default:
return new Unknown(machInst);
}
case 0x1:
switch (bits(b, 3, 1)) {
case 0x0:
- return new WarnUnimplemented("vcgt (imm #0)", machInst);
+ if (bits(b, 4)) {
+ if (q) {
+ return new NVcgtQFp<float>(machInst, vd, vm);
+ } else {
+ return new NVcgtDFp<float>(machInst, vd, vm);
+ }
+ } else {
+ return decodeNeonSTwoMiscReg<NVcgtD, NVcgtQ>(
+ q, size, machInst, vd, vm);
+ }
case 0x1:
- return new WarnUnimplemented("vcge (imm #0)", machInst);
+ if (bits(b, 4)) {
+ if (q) {
+ return new NVcgeQFp<float>(machInst, vd, vm);
+ } else {
+ return new NVcgeDFp<float>(machInst, vd, vm);
+ }
+ } else {
+ return decodeNeonSTwoMiscReg<NVcgeD, NVcgeQ>(
+ q, size, machInst, vd, vm);
+ }
case 0x2:
- return new WarnUnimplemented("vceq (imm #0)", machInst);
+ if (bits(b, 4)) {
+ if (q) {
+ return new NVceqQFp<float>(machInst, vd, vm);
+ } else {
+ return new NVceqDFp<float>(machInst, vd, vm);
+ }
+ } else {
+ return decodeNeonSTwoMiscReg<NVceqD, NVceqQ>(
+ q, size, machInst, vd, vm);
+ }
case 0x3:
- return new WarnUnimplemented("vcle (imm #0)", machInst);
+ if (bits(b, 4)) {
+ if (q) {
+ return new NVcleQFp<float>(machInst, vd, vm);
+ } else {
+ return new NVcleDFp<float>(machInst, vd, vm);
+ }
+ } else {
+ return decodeNeonSTwoMiscReg<NVcleD, NVcleQ>(
+ q, size, machInst, vd, vm);
+ }
case 0x4:
- return new WarnUnimplemented("vclt (imm #0)", machInst);
+ if (bits(b, 4)) {
+ if (q) {
+ return new NVcltQFp<float>(machInst, vd, vm);
+ } else {
+ return new NVcltDFp<float>(machInst, vd, vm);
+ }
+ } else {
+ return decodeNeonSTwoMiscReg<NVcltD, NVcltQ>(
+ q, size, machInst, vd, vm);
+ }
case 0x6:
- return new WarnUnimplemented("vabs (imm #0)", machInst);
+ if (bits(machInst, 10)) {
+ if (q)
+ return new NVabsQFp<float>(machInst, vd, vm);
+ else
+ return new NVabsDFp<float>(machInst, vd, vm);
+ } else {
+ return decodeNeonSTwoMiscReg<NVabsD, NVabsQ>(
+ q, size, machInst, vd, vm);
+ }
case 0x7:
- return new WarnUnimplemented("vneg (imm #0)", machInst);
+ if (bits(machInst, 10)) {
+ if (q)
+ return new NVnegQFp<float>(machInst, vd, vm);
+ else
+ return new NVnegDFp<float>(machInst, vd, vm);
+ } else {
+ return decodeNeonSTwoMiscReg<NVnegD, NVnegQ>(
+ q, size, machInst, vd, vm);
+ }
}
case 0x2:
switch (bits(b, 4, 1)) {
case 0x0:
- return new WarnUnimplemented("vswp", machInst);
+ if (q)
+ return new NVswpQ<uint64_t>(machInst, vd, vm);
+ else
+ return new NVswpD<uint64_t>(machInst, vd, vm);
case 0x1:
- return new WarnUnimplemented("vtrn", machInst);
+ return decodeNeonUTwoMiscReg<NVtrnD, NVtrnQ>(
+ q, size, machInst, vd, vm);
case 0x2:
- return new WarnUnimplemented("vuzp", machInst);
+ return decodeNeonUTwoMiscReg<NVuzpD, NVuzpQ>(
+ q, size, machInst, vd, vm);
case 0x3:
- return new WarnUnimplemented("vzip", machInst);
+ return decodeNeonUTwoMiscReg<NVzipD, NVzipQ>(
+ q, size, machInst, vd, vm);
case 0x4:
if (b == 0x8) {
- return new WarnUnimplemented("vmovn", machInst);
+ return decodeNeonUTwoMiscUSReg<NVmovn>(
+ size, machInst, vd, vm);
} else {
- return new WarnUnimplemented("vqmovun", machInst);
+ return decodeNeonSTwoMiscUSReg<NVqmovuns>(
+ size, machInst, vd, vm);
}
case 0x5:
- return new WarnUnimplemented("vqmovn", machInst);
+ if (q) {
+ return decodeNeonUTwoMiscUSReg<NVqmovun>(
+ size, machInst, vd, vm);
+ } else {
+ return decodeNeonSTwoMiscUSReg<NVqmovn>(
+ size, machInst, vd, vm);
+ }
case 0x6:
if (b == 0xc) {
- return new WarnUnimplemented("vshll", machInst);
+ const IntRegIndex vd =
+ (IntRegIndex)(2 * (bits(machInst, 15, 12) |
+ (bits(machInst, 22) << 4)));
+ const IntRegIndex vm =
+ (IntRegIndex)(2 * (bits(machInst, 3, 0) |
+ (bits(machInst, 5) << 4)));
+ unsigned size = bits(machInst, 19, 18);
+ return decodeNeonSTwoShiftUSReg<NVshll>(
+ size, machInst, vd, vm, 8 << size);
} else {
return new Unknown(machInst);
}
case 0xc:
case 0xe:
if (b == 0x18) {
- return new WarnUnimplemented("vcvt (single to half)",
- machInst);
+ if (size != 1 || (vm % 2))
+ return new Unknown(machInst);
+ return new NVcvts2h<uint16_t>(machInst, vd, vm);
} else if (b == 0x1c) {
- return new WarnUnimplemented("vcvt (half to single)",
- machInst);
+ if (size != 1 || (vd % 2))
+ return new Unknown(machInst);
+ return new NVcvth2s<uint16_t>(machInst, vd, vm);
} else {
return new Unknown(machInst);
}
@@ -770,11 +1590,75 @@ let {{
}
case 0x3:
if (bits(b, 4, 3) == 0x3) {
- return new WarnUnimplemented("vcvt (fp and int)", machInst);
+ if ((q && (vd % 2 || vm % 2)) || size != 2) {
+ return new Unknown(machInst);
+ } else {
+ if (bits(b, 2)) {
+ if (bits(b, 1)) {
+ if (q) {
+ return new NVcvt2ufxQ<float>(
+ machInst, vd, vm, 0);
+ } else {
+ return new NVcvt2ufxD<float>(
+ machInst, vd, vm, 0);
+ }
+ } else {
+ if (q) {
+ return new NVcvt2sfxQ<float>(
+ machInst, vd, vm, 0);
+ } else {
+ return new NVcvt2sfxD<float>(
+ machInst, vd, vm, 0);
+ }
+ }
+ } else {
+ if (bits(b, 1)) {
+ if (q) {
+ return new NVcvtu2fpQ<float>(
+ machInst, vd, vm, 0);
+ } else {
+ return new NVcvtu2fpD<float>(
+ machInst, vd, vm, 0);
+ }
+ } else {
+ if (q) {
+ return new NVcvts2fpQ<float>(
+ machInst, vd, vm, 0);
+ } else {
+ return new NVcvts2fpD<float>(
+ machInst, vd, vm, 0);
+ }
+ }
+ }
+ }
} else if ((b & 0x1a) == 0x10) {
- return new WarnUnimplemented("vrecpe", machInst);
+ if (bits(b, 2)) {
+ if (q) {
+ return new NVrecpeQFp<float>(machInst, vd, vm);
+ } else {
+ return new NVrecpeDFp<float>(machInst, vd, vm);
+ }
+ } else {
+ if (q) {
+ return new NVrecpeQ<uint32_t>(machInst, vd, vm);
+ } else {
+ return new NVrecpeD<uint32_t>(machInst, vd, vm);
+ }
+ }
} else if ((b & 0x1a) == 0x12) {
- return new WarnUnimplemented("vrsqrte", machInst);
+ if (bits(b, 2)) {
+ if (q) {
+ return new NVrsqrteQFp<float>(machInst, vd, vm);
+ } else {
+ return new NVrsqrteDFp<float>(machInst, vd, vm);
+ }
+ } else {
+ if (q) {
+ return new NVrsqrteQ<uint32_t>(machInst, vd, vm);
+ } else {
+ return new NVrsqrteD<uint32_t>(machInst, vd, vm);
+ }
+ }
} else {
return new Unknown(machInst);
}
@@ -799,29 +1683,76 @@ let {{
}
} else if ((c & 0x9) == 9) {
return decodeNeonTwoRegAndShift(machInst);
- } else if ((c & 0x5) == 0) {
- if (bits(a, 3, 2) != 0x3) {
+ } else if (bits(a, 2, 1) != 0x3) {
+ if ((c & 0x5) == 0) {
return decodeNeonThreeRegDiffLengths(machInst);
- }
- } else if ((c & 0x5) == 4) {
- if (bits(a, 3, 2) != 0x3) {
+ } else if ((c & 0x5) == 4) {
return decodeNeonTwoRegScalar(machInst);
}
} else if ((a & 0x16) == 0x16) {
+ const IntRegIndex vd =
+ (IntRegIndex)(2 * (bits(machInst, 15, 12) |
+ (bits(machInst, 22) << 4)));
+ const IntRegIndex vn =
+ (IntRegIndex)(2 * (bits(machInst, 19, 16) |
+ (bits(machInst, 7) << 4)));
+ const IntRegIndex vm =
+ (IntRegIndex)(2 * (bits(machInst, 3, 0) |
+ (bits(machInst, 5) << 4)));
if (!u) {
if (bits(c, 0) == 0) {
- return new WarnUnimplemented("vext", machInst);
+ unsigned imm4 = bits(machInst, 11, 8);
+ bool q = bits(machInst, 6);
+ if (imm4 >= 16 && !q)
+ return new Unknown(machInst);
+ if (q) {
+ return new NVextQ<uint8_t>(machInst, vd, vn, vm, imm4);
+ } else {
+ return new NVextD<uint8_t>(machInst, vd, vn, vm, imm4);
+ }
}
} else if (bits(b, 3) == 0 && bits(c, 0) == 0) {
return decodeNeonTwoRegMisc(machInst);
} else if (bits(b, 3, 2) == 0x2 && bits(c, 0) == 0) {
+ unsigned length = bits(machInst, 9, 8) + 1;
+ if ((uint32_t)vn / 2 + length > 32)
+ return new Unknown(machInst);
if (bits(machInst, 6) == 0) {
- return new WarnUnimplemented("vtbl", machInst);
+ switch (length) {
+ case 1:
+ return new NVtbl1(machInst, vd, vn, vm);
+ case 2:
+ return new NVtbl2(machInst, vd, vn, vm);
+ case 3:
+ return new NVtbl3(machInst, vd, vn, vm);
+ case 4:
+ return new NVtbl4(machInst, vd, vn, vm);
+ }
} else {
- return new WarnUnimplemented("vtbx", machInst);
+ switch (length) {
+ case 1:
+ return new NVtbx1(machInst, vd, vn, vm);
+ case 2:
+ return new NVtbx2(machInst, vd, vn, vm);
+ case 3:
+ return new NVtbx3(machInst, vd, vn, vm);
+ case 4:
+ return new NVtbx4(machInst, vd, vn, vm);
+ }
}
} else if (b == 0xc && (c & 0x9) == 0) {
- return new WarnUnimplemented("vdup (scalar)", machInst);
+ unsigned imm4 = bits(machInst, 19, 16);
+ if (bits(imm4, 2, 0) == 0)
+ return new Unknown(machInst);
+ unsigned size = 0;
+ while ((imm4 & 0x1) == 0) {
+ size++;
+ imm4 >>= 1;
+ }
+ unsigned index = imm4 >> 1;
+ const bool q = bits(machInst, 6);
+ return decodeNeonUTwoShiftSReg<NVdupD, NVdupQ>(
+ q, size, machInst, vd, vm, index);
}
}
return new Unknown(machInst);
@@ -837,7 +1768,7 @@ def format ThumbNeonMem() {{
def format ThumbNeonData() {{
decode_block = '''
- return decodeNeonMem(machInst);
+ return decodeNeonData(machInst);
'''
}};
@@ -893,7 +1824,7 @@ let {{
break;
case 0x1:
{
- if (offset == 0 || vd + offset > NumFloatArchRegs) {
+ if (offset == 0 || vd + offset/2 > NumFloatArchRegs) {
break;
}
switch (bits(opcode, 1, 0)) {
@@ -1044,40 +1975,51 @@ let {{
if (bits(a, 2) == 0) {
uint32_t vd = (bits(machInst, 7) << 5) |
(bits(machInst, 19, 16) << 1);
- uint32_t index, size;
+ // Handle accessing each single precision half of the vector.
+ vd += bits(machInst, 21);
const IntRegIndex rt =
(IntRegIndex)(uint32_t)bits(machInst, 15, 12);
if (bits(machInst, 22) == 1) {
- size = 8;
- index = (bits(machInst, 21) << 2) |
- bits(machInst, 6, 5);
+ return new VmovCoreRegB(machInst, (IntRegIndex)vd,
+ rt, bits(machInst, 6, 5));
} else if (bits(machInst, 5) == 1) {
- size = 16;
- index = (bits(machInst, 21) << 1) |
- bits(machInst, 6);
+ return new VmovCoreRegH(machInst, (IntRegIndex)vd,
+ rt, bits(machInst, 6));
} else if (bits(machInst, 6) == 0) {
- size = 32;
- index = bits(machInst, 21);
+ return new VmovCoreRegW(machInst, (IntRegIndex)vd, rt);
} else {
return new Unknown(machInst);
}
- if (index >= (32 / size)) {
- index -= (32 / size);
- vd++;
- }
- switch (size) {
- case 8:
- return new VmovCoreRegB(machInst, (IntRegIndex)vd,
- rt, index);
- case 16:
- return new VmovCoreRegH(machInst, (IntRegIndex)vd,
- rt, index);
- case 32:
- return new VmovCoreRegW(machInst, (IntRegIndex)vd, rt);
- }
} else if (bits(b, 1) == 0) {
- // A8-594
- return new WarnUnimplemented("vdup", machInst);
+ bool q = bits(machInst, 21);
+ unsigned be = (bits(machInst, 22) << 1) | (bits(machInst, 5));
+ IntRegIndex vd = (IntRegIndex)(2 * (uint32_t)
+ (bits(machInst, 19, 16) | (bits(machInst, 7) << 4)));
+ IntRegIndex rt = (IntRegIndex)(uint32_t)
+ bits(machInst, 15, 12);
+ if (q) {
+ switch (be) {
+ case 0:
+ return new NVdupQGpr<uint32_t>(machInst, vd, rt);
+ case 1:
+ return new NVdupQGpr<uint16_t>(machInst, vd, rt);
+ case 2:
+ return new NVdupQGpr<uint8_t>(machInst, vd, rt);
+ case 3:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (be) {
+ case 0:
+ return new NVdupDGpr<uint32_t>(machInst, vd, rt);
+ case 1:
+ return new NVdupDGpr<uint16_t>(machInst, vd, rt);
+ case 2:
+ return new NVdupDGpr<uint8_t>(machInst, vd, rt);
+ case 3:
+ return new Unknown(machInst);
+ }
+ }
}
} else if (l == 1 && c == 0) {
if (a == 0) {
@@ -1128,30 +2070,14 @@ let {{
} else {
uint32_t vd = (bits(machInst, 7) << 5) |
(bits(machInst, 19, 16) << 1);
- uint32_t index, size;
+ // Handle indexing into each single precision half of the vector.
+ vd += bits(machInst, 21);
+ uint32_t index;
const IntRegIndex rt =
(IntRegIndex)(uint32_t)bits(machInst, 15, 12);
const bool u = (bits(machInst, 23) == 1);
if (bits(machInst, 22) == 1) {
- size = 8;
- index = (bits(machInst, 21) << 2) |
- bits(machInst, 6, 5);
- } else if (bits(machInst, 5) == 1) {
- size = 16;
- index = (bits(machInst, 21) << 1) |
- bits(machInst, 6);
- } else if (bits(machInst, 6) == 0 && !u) {
- size = 32;
- index = bits(machInst, 21);
- } else {
- return new Unknown(machInst);
- }
- if (index >= (32 / size)) {
- index -= (32 / size);
- vd++;
- }
- switch (size) {
- case 8:
+ index = bits(machInst, 6, 5);
if (u) {
return new VmovRegCoreUB(machInst, rt,
(IntRegIndex)vd, index);
@@ -1159,7 +2085,8 @@ let {{
return new VmovRegCoreSB(machInst, rt,
(IntRegIndex)vd, index);
}
- case 16:
+ } else if (bits(machInst, 5) == 1) {
+ index = bits(machInst, 6);
if (u) {
return new VmovRegCoreUH(machInst, rt,
(IntRegIndex)vd, index);
@@ -1167,8 +2094,10 @@ let {{
return new VmovRegCoreSH(machInst, rt,
(IntRegIndex)vd, index);
}
- case 32:
+ } else if (bits(machInst, 6) == 0 && !u) {
return new VmovRegCoreW(machInst, rt, (IntRegIndex)vd);
+ } else {
+ return new Unknown(machInst);
}
}
return new Unknown(machInst);
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa
index c4682b66c..9748c8a49 100644
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -282,7 +282,7 @@ let {{
exec_output += PredOpExecute.subst(vmovRegQIop);
vmovCoreRegBCode = '''
- FpDest.uw = insertBits(FpDest.uw, imm * 8, imm * 8 + 7, Op1.ub);
+ FpDest.uw = insertBits(FpDest.uw, imm * 8 + 7, imm * 8, Op1.ub);
'''
vmovCoreRegBIop = InstObjParams("vmov", "VmovCoreRegB", "FpRegRegImmOp",
{ "code": vmovCoreRegBCode,
@@ -292,7 +292,7 @@ let {{
exec_output += PredOpExecute.subst(vmovCoreRegBIop);
vmovCoreRegHCode = '''
- FpDest.uw = insertBits(FpDest.uw, imm * 16, imm * 16 + 15, Op1.uh);
+ FpDest.uw = insertBits(FpDest.uw, imm * 16 + 15, imm * 16, Op1.uh);
'''
vmovCoreRegHIop = InstObjParams("vmov", "VmovCoreRegH", "FpRegRegImmOp",
{ "code": vmovCoreRegHCode,
@@ -312,7 +312,8 @@ let {{
exec_output += PredOpExecute.subst(vmovCoreRegWIop);
vmovRegCoreUBCode = '''
- Dest = bits(FpOp1.uw, imm * 8, imm * 8 + 7);
+ assert(imm < 4);
+ Dest = bits(FpOp1.uw, imm * 8 + 7, imm * 8);
'''
vmovRegCoreUBIop = InstObjParams("vmov", "VmovRegCoreUB", "FpRegRegImmOp",
{ "code": vmovRegCoreUBCode,
@@ -322,7 +323,8 @@ let {{
exec_output += PredOpExecute.subst(vmovRegCoreUBIop);
vmovRegCoreUHCode = '''
- Dest = bits(FpOp1.uw, imm * 16, imm * 16 + 15);
+ assert(imm < 2);
+ Dest = bits(FpOp1.uw, imm * 16 + 15, imm * 16);
'''
vmovRegCoreUHIop = InstObjParams("vmov", "VmovRegCoreUH", "FpRegRegImmOp",
{ "code": vmovRegCoreUHCode,
@@ -332,7 +334,8 @@ let {{
exec_output += PredOpExecute.subst(vmovRegCoreUHIop);
vmovRegCoreSBCode = '''
- Dest = sext<8>(bits(FpOp1.uw, imm * 8, imm * 8 + 7));
+ assert(imm < 4);
+ Dest = sext<8>(bits(FpOp1.uw, imm * 8 + 7, imm * 8));
'''
vmovRegCoreSBIop = InstObjParams("vmov", "VmovRegCoreSB", "FpRegRegImmOp",
{ "code": vmovRegCoreSBCode,
@@ -342,7 +345,8 @@ let {{
exec_output += PredOpExecute.subst(vmovRegCoreSBIop);
vmovRegCoreSHCode = '''
- Dest = sext<16>(bits(FpOp1.uw, imm * 16, imm * 16 + 15));
+ assert(imm < 2);
+ Dest = sext<16>(bits(FpOp1.uw, imm * 16 + 15, imm * 16));
'''
vmovRegCoreSHIop = InstObjParams("vmov", "VmovRegCoreSH", "FpRegRegImmOp",
{ "code": vmovRegCoreSHCode,
@@ -396,7 +400,7 @@ let {{
Fpscr = fpscr;
'''
singleBinOp = "binaryOp(fpscr, FpOp1, FpOp2," + \
- "%(func)s, fpscr.fz, fpscr.rMode)"
+ "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
singleUnaryOp = "unaryOp(fpscr, FpOp1, %(func)s, fpscr.fz, fpscr.rMode)"
doubleCode = '''
FPSCR fpscr = Fpscr;
@@ -408,7 +412,7 @@ let {{
doubleBinOp = '''
binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
- %(func)s, fpscr.fz, fpscr.rMode);
+ %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode);
'''
doubleUnaryOp = '''
unaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw), %(func)s,
@@ -499,8 +503,9 @@ let {{
vmlaSCode = '''
FPSCR fpscr = Fpscr;
float mid = binaryOp(fpscr, FpOp1, FpOp2,
- fpMulS, fpscr.fz, fpscr.rMode);
- FpDest = binaryOp(fpscr, FpDest, mid, fpAddS, fpscr.fz, fpscr.rMode);
+ fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
+ FpDest = binaryOp(fpscr, FpDest, mid, fpAddS,
+ fpscr.fz, fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
'''
vmlaSIop = InstObjParams("vmlas", "VmlaS", "FpRegRegRegOp",
@@ -514,9 +519,10 @@ let {{
FPSCR fpscr = Fpscr;
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
- fpMulD, fpscr.fz, fpscr.rMode);
+ fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
double dest = binaryOp(fpscr, dbl(FpDestP0.uw, FpDestP1.uw),
- mid, fpAddD, fpscr.fz, fpscr.rMode);
+ mid, fpAddD, fpscr.fz,
+ fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
FpDestP0.uw = dblLow(dest);
FpDestP1.uw = dblHi(dest);
@@ -531,8 +537,9 @@ let {{
vmlsSCode = '''
FPSCR fpscr = Fpscr;
float mid = binaryOp(fpscr, FpOp1, FpOp2,
- fpMulS, fpscr.fz, fpscr.rMode);
- FpDest = binaryOp(fpscr, FpDest, -mid, fpAddS, fpscr.fz, fpscr.rMode);
+ fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
+ FpDest = binaryOp(fpscr, FpDest, -mid, fpAddS,
+ fpscr.fz, fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
'''
vmlsSIop = InstObjParams("vmlss", "VmlsS", "FpRegRegRegOp",
@@ -546,9 +553,10 @@ let {{
FPSCR fpscr = Fpscr;
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
- fpMulD, fpscr.fz, fpscr.rMode);
+ fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
double dest = binaryOp(fpscr, dbl(FpDestP0.uw, FpDestP1.uw),
- -mid, fpAddD, fpscr.fz, fpscr.rMode);
+ -mid, fpAddD, fpscr.fz,
+ fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
FpDestP0.uw = dblLow(dest);
FpDestP1.uw = dblHi(dest);
@@ -563,8 +571,9 @@ let {{
vnmlaSCode = '''
FPSCR fpscr = Fpscr;
float mid = binaryOp(fpscr, FpOp1, FpOp2,
- fpMulS, fpscr.fz, fpscr.rMode);
- FpDest = binaryOp(fpscr, -FpDest, -mid, fpAddS, fpscr.fz, fpscr.rMode);
+ fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
+ FpDest = binaryOp(fpscr, -FpDest, -mid, fpAddS,
+ fpscr.fz, fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
'''
vnmlaSIop = InstObjParams("vnmlas", "VnmlaS", "FpRegRegRegOp",
@@ -578,9 +587,10 @@ let {{
FPSCR fpscr = Fpscr;
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
- fpMulD, fpscr.fz, fpscr.rMode);
+ fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
double dest = binaryOp(fpscr, -dbl(FpDestP0.uw, FpDestP1.uw),
- -mid, fpAddD, fpscr.fz, fpscr.rMode);
+ -mid, fpAddD, fpscr.fz,
+ fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
FpDestP0.uw = dblLow(dest);
FpDestP1.uw = dblHi(dest);
@@ -595,8 +605,9 @@ let {{
vnmlsSCode = '''
FPSCR fpscr = Fpscr;
float mid = binaryOp(fpscr, FpOp1, FpOp2,
- fpMulS, fpscr.fz, fpscr.rMode);
- FpDest = binaryOp(fpscr, -FpDest, mid, fpAddS, fpscr.fz, fpscr.rMode);
+ fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
+ FpDest = binaryOp(fpscr, -FpDest, mid, fpAddS,
+ fpscr.fz, fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
'''
vnmlsSIop = InstObjParams("vnmlss", "VnmlsS", "FpRegRegRegOp",
@@ -610,9 +621,10 @@ let {{
FPSCR fpscr = Fpscr;
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
- fpMulD, fpscr.fz, fpscr.rMode);
+ fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
double dest = binaryOp(fpscr, -dbl(FpDestP0.uw, FpDestP1.uw),
- mid, fpAddD, fpscr.fz, fpscr.rMode);
+ mid, fpAddD, fpscr.fz,
+ fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
FpDestP0.uw = dblLow(dest);
FpDestP1.uw = dblHi(dest);
@@ -626,7 +638,8 @@ let {{
vnmulSCode = '''
FPSCR fpscr = Fpscr;
- FpDest = -binaryOp(fpscr, FpOp1, FpOp2, fpMulS, fpscr.fz, fpscr.rMode);
+ FpDest = -binaryOp(fpscr, FpOp1, FpOp2, fpMulS,
+ fpscr.fz, fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
'''
vnmulSIop = InstObjParams("vnmuls", "VnmulS", "FpRegRegRegOp",
@@ -640,7 +653,8 @@ let {{
FPSCR fpscr = Fpscr;
double dest = -binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
- fpMulD, fpscr.fz, fpscr.rMode);
+ fpMulD, fpscr.fz, fpscr.dn,
+ fpscr.rMode);
Fpscr = fpscr;
FpDestP0.uw = dblLow(dest);
FpDestP1.uw = dblHi(dest);
@@ -665,7 +679,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1.uw) : "m" (FpOp1.uw));
FpDest = FpOp1.uw;
__asm__ __volatile__("" :: "m" (FpDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtUIntFpSIop = InstObjParams("vcvt", "VcvtUIntFpS", "FpRegRegOp",
@@ -681,7 +695,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1P0.uw) : "m" (FpOp1P0.uw));
double cDest = (uint64_t)FpOp1P0.uw;
__asm__ __volatile__("" :: "m" (cDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@@ -699,7 +713,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1.sw) : "m" (FpOp1.sw));
FpDest = FpOp1.sw;
__asm__ __volatile__("" :: "m" (FpDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtSIntFpSIop = InstObjParams("vcvt", "VcvtSIntFpS", "FpRegRegOp",
@@ -715,7 +729,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1P0.sw) : "m" (FpOp1P0.sw));
double cDest = FpOp1P0.sw;
__asm__ __volatile__("" :: "m" (cDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@@ -734,7 +748,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.uw = vfpFpSToFixed(FpOp1, false, false, 0, false);
__asm__ __volatile__("" :: "m" (FpDest.uw));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpUIntSRIop = InstObjParams("vcvt", "VcvtFpUIntSR", "FpRegRegOp",
@@ -752,7 +766,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t result = vfpFpDToFixed(cOp1, false, false, 0, false);
__asm__ __volatile__("" :: "m" (result));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = result;
'''
@@ -770,7 +784,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.sw = vfpFpSToFixed(FpOp1, true, false, 0, false);
__asm__ __volatile__("" :: "m" (FpDest.sw));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSIntSRIop = InstObjParams("vcvtr", "VcvtFpSIntSR", "FpRegRegOp",
@@ -788,7 +802,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
int64_t result = vfpFpDToFixed(cOp1, true, false, 0, false);
__asm__ __volatile__("" :: "m" (result));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = result;
'''
@@ -807,7 +821,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.uw = vfpFpSToFixed(FpOp1, false, false, 0);
__asm__ __volatile__("" :: "m" (FpDest.uw));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpUIntSIop = InstObjParams("vcvt", "VcvtFpUIntS", "FpRegRegOp",
@@ -826,7 +840,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t result = vfpFpDToFixed(cOp1, false, false, 0);
__asm__ __volatile__("" :: "m" (result));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = result;
'''
@@ -845,7 +859,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.sw = vfpFpSToFixed(FpOp1, true, false, 0);
__asm__ __volatile__("" :: "m" (FpDest.sw));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSIntSIop = InstObjParams("vcvt", "VcvtFpSIntS", "FpRegRegOp",
@@ -864,7 +878,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
int64_t result = vfpFpDToFixed(cOp1, true, false, 0);
__asm__ __volatile__("" :: "m" (result));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = result;
'''
@@ -882,7 +896,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
double cDest = fixFpSFpDDest(Fpscr, FpOp1);
__asm__ __volatile__("" :: "m" (cDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@@ -902,7 +916,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
FpDest = fixFpDFpSDest(Fpscr, cOp1);
__asm__ __volatile__("" :: "m" (FpDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpDFpSIop = InstObjParams("vcvt", "VcvtFpDFpS", "FpRegRegOp",
@@ -917,9 +931,10 @@ let {{
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest = vcvtFpHFpS(fpscr, FpOp1, true);
+ FpDest = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp,
+ bits(fpToBits(FpOp1), 31, 16));
__asm__ __volatile__("" :: "m" (FpDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpHTFpSIop = InstObjParams("vcvtt", "VcvtFpHTFpS", "FpRegRegOp",
@@ -933,9 +948,10 @@ let {{
FPSCR fpscr = Fpscr;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest = vcvtFpHFpS(fpscr, FpOp1, false);
+ FpDest = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp,
+ bits(fpToBits(FpOp1), 15, 0));
__asm__ __volatile__("" :: "m" (FpDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpHBFpSIop = InstObjParams("vcvtb", "VcvtFpHBFpS", "FpRegRegOp",
@@ -949,11 +965,13 @@ let {{
FPSCR fpscr = Fpscr;
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
- __asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest)
- : "m" (FpOp1), "m" (FpDest));
- FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, true);
- __asm__ __volatile__("" :: "m" (FpDest));
- finishVfp(fpscr, state);
+ __asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest.uw)
+ : "m" (FpOp1), "m" (FpDest.uw));
+ FpDest.uw = insertBits(FpDest.uw, 31, 16,,
+ vcvtFpSFpH(fpscr, fpscr.fz, fpscr.dn,
+ fpscr.rMode, fpscr.ahp, FpOp1));
+ __asm__ __volatile__("" :: "m" (FpDest.uw));
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSFpHTIop = InstObjParams("vcvtt", "VcvtFpSFpHT", "FpRegRegOp",
@@ -967,11 +985,13 @@ let {{
FPSCR fpscr = Fpscr;
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
- __asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest)
- : "m" (FpOp1), "m" (FpDest));
- FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, false);
- __asm__ __volatile__("" :: "m" (FpDest));
- finishVfp(fpscr, state);
+ __asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest.uw)
+ : "m" (FpOp1), "m" (FpDest.uw));
+ FpDest.uw = insertBits(FpDest.uw, 15, 0,
+ vcvtFpSFpH(fpscr, fpscr.fz, fpscr.dn,
+ fpscr.rMode, fpscr.ahp, FpOp1));
+ __asm__ __volatile__("" :: "m" (FpDest.uw));
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSFpHBIop = InstObjParams("vcvtb", "VcvtFpSFpHB", "FpRegRegOp",
@@ -1201,7 +1221,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.sw = vfpFpSToFixed(FpOp1, true, false, imm);
__asm__ __volatile__("" :: "m" (FpDest.sw));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSFixedSIop = InstObjParams("vcvt", "VcvtFpSFixedS", "FpRegRegImmOp",
@@ -1219,7 +1239,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t mid = vfpFpDToFixed(cOp1, true, false, imm);
__asm__ __volatile__("" :: "m" (mid));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = mid;
FpDestP1.uw = mid >> 32;
@@ -1238,7 +1258,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.uw = vfpFpSToFixed(FpOp1, false, false, imm);
__asm__ __volatile__("" :: "m" (FpDest.uw));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpUFixedSIop = InstObjParams("vcvt", "VcvtFpUFixedS", "FpRegRegImmOp",
@@ -1256,7 +1276,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t mid = vfpFpDToFixed(cOp1, false, false, imm);
__asm__ __volatile__("" :: "m" (mid));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = mid;
FpDestP1.uw = mid >> 32;
@@ -1272,9 +1292,9 @@ let {{
FPSCR fpscr = Fpscr;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1.sw) : "m" (FpOp1.sw));
- FpDest = vfpSFixedToFpS(Fpscr, FpOp1.sw, false, imm);
+ FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.sw, false, imm);
__asm__ __volatile__("" :: "m" (FpDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtSFixedFpSIop = InstObjParams("vcvt", "VcvtSFixedFpS", "FpRegRegImmOp",
@@ -1289,9 +1309,9 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- double cDest = vfpSFixedToFpD(Fpscr, mid, false, imm);
+ double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm);
__asm__ __volatile__("" :: "m" (cDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@@ -1307,9 +1327,9 @@ let {{
FPSCR fpscr = Fpscr;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1.uw) : "m" (FpOp1.uw));
- FpDest = vfpUFixedToFpS(Fpscr, FpOp1.uw, false, imm);
+ FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.uw, false, imm);
__asm__ __volatile__("" :: "m" (FpDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtUFixedFpSIop = InstObjParams("vcvt", "VcvtUFixedFpS", "FpRegRegImmOp",
@@ -1324,9 +1344,9 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- double cDest = vfpUFixedToFpD(Fpscr, mid, false, imm);
+ double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm);
__asm__ __volatile__("" :: "m" (cDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@@ -1345,7 +1365,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.sh = vfpFpSToFixed(FpOp1, true, true, imm);
__asm__ __volatile__("" :: "m" (FpDest.sh));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSHFixedSIop = InstObjParams("vcvt", "VcvtFpSHFixedS",
@@ -1364,7 +1384,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t result = vfpFpDToFixed(cOp1, true, true, imm);
__asm__ __volatile__("" :: "m" (result));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = result;
FpDestP1.uw = result >> 32;
@@ -1384,7 +1404,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.uh = vfpFpSToFixed(FpOp1, false, true, imm);
__asm__ __volatile__("" :: "m" (FpDest.uh));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpUHFixedSIop = InstObjParams("vcvt", "VcvtFpUHFixedS",
@@ -1403,7 +1423,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t mid = vfpFpDToFixed(cOp1, false, true, imm);
__asm__ __volatile__("" :: "m" (mid));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = mid;
FpDestP1.uw = mid >> 32;
@@ -1420,9 +1440,9 @@ let {{
FPSCR fpscr = Fpscr;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1.sh) : "m" (FpOp1.sh));
- FpDest = vfpSFixedToFpS(Fpscr, FpOp1.sh, true, imm);
+ FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.sh, true, imm);
__asm__ __volatile__("" :: "m" (FpDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtSHFixedFpSIop = InstObjParams("vcvt", "VcvtSHFixedFpS",
@@ -1438,9 +1458,9 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- double cDest = vfpSFixedToFpD(Fpscr, mid, true, imm);
+ double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm);
__asm__ __volatile__("" :: "m" (cDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@@ -1457,9 +1477,9 @@ let {{
FPSCR fpscr = Fpscr;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1.uh) : "m" (FpOp1.uh));
- FpDest = vfpUFixedToFpS(Fpscr, FpOp1.uh, true, imm);
+ FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.uh, true, imm);
__asm__ __volatile__("" :: "m" (FpDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtUHFixedFpSIop = InstObjParams("vcvt", "VcvtUHFixedFpS",
@@ -1475,9 +1495,9 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- double cDest = vfpUFixedToFpD(Fpscr, mid, true, imm);
+ double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm);
__asm__ __volatile__("" :: "m" (cDest));
- finishVfp(fpscr, state);
+ finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
diff --git a/src/arch/arm/isa/insts/insts.isa b/src/arch/arm/isa/insts/insts.isa
index a79557f3d..9c51f3cf0 100644
--- a/src/arch/arm/isa/insts/insts.isa
+++ b/src/arch/arm/isa/insts/insts.isa
@@ -70,5 +70,8 @@
//Divide
##include "div.isa"
-//FP (VFP and Neon)
+//VFP
##include "fp.isa"
+
+//Neon
+##include "neon.isa"
diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa
index ca2c7c6ab..652a929f1 100644
--- a/src/arch/arm/isa/insts/macromem.isa
+++ b/src/arch/arm/isa/insts/macromem.isa
@@ -57,11 +57,34 @@ let {{
microLdrFpUopCode = "Fa.uw = cSwap(Mem.uw, ((CPSR)Cpsr).e);"
microLdrFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrFpUop',
- 'MicroMemOp',
- {'memacc_code': microLdrFpUopCode,
- 'ea_code': 'EA = Rb + (up ? imm : -imm);',
- 'predicate_test': predicateTest},
- ['IsMicroop'])
+ 'MicroMemOp',
+ {'memacc_code': microLdrFpUopCode,
+ 'ea_code':
+ 'EA = Rb + (up ? imm : -imm);',
+ 'predicate_test': predicateTest},
+ ['IsMicroop'])
+
+ microLdrDBFpUopCode = "Fa.uw = cSwap(Mem.uw, ((CPSR)Cpsr).e);"
+ microLdrDBFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrDBFpUop',
+ 'MicroMemOp',
+ {'memacc_code': microLdrFpUopCode,
+ 'ea_code': '''
+ EA = Rb + (up ? imm : -imm) +
+ (((CPSR)Cpsr).e ? 4 : 0);
+ ''',
+ 'predicate_test': predicateTest},
+ ['IsMicroop'])
+
+ microLdrDTFpUopCode = "Fa.uw = cSwap(Mem.uw, ((CPSR)Cpsr).e);"
+ microLdrDTFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrDTFpUop',
+ 'MicroMemOp',
+ {'memacc_code': microLdrFpUopCode,
+ 'ea_code': '''
+ EA = Rb + (up ? imm : -imm) -
+ (((CPSR)Cpsr).e ? 4 : 0);
+ ''',
+ 'predicate_test': predicateTest},
+ ['IsMicroop'])
microLdrRetUopCode = '''
CPSR cpsr = Cpsr;
@@ -98,10 +121,36 @@ let {{
'predicate_test': predicateTest},
['IsMicroop'])
+ microStrDBFpUopCode = "Mem = cSwap(Fa.uw, ((CPSR)Cpsr).e);"
+ microStrDBFpUopIop = InstObjParams('strfp_uop', 'MicroStrDBFpUop',
+ 'MicroMemOp',
+ {'memacc_code': microStrFpUopCode,
+ 'postacc_code': "",
+ 'ea_code': '''
+ EA = Rb + (up ? imm : -imm) +
+ (((CPSR)Cpsr).e ? 4 : 0);
+ ''',
+ 'predicate_test': predicateTest},
+ ['IsMicroop'])
+
+ microStrDTFpUopCode = "Mem = cSwap(Fa.uw, ((CPSR)Cpsr).e);"
+ microStrDTFpUopIop = InstObjParams('strfp_uop', 'MicroStrDTFpUop',
+ 'MicroMemOp',
+ {'memacc_code': microStrFpUopCode,
+ 'postacc_code': "",
+ 'ea_code': '''
+ EA = Rb + (up ? imm : -imm) -
+ (((CPSR)Cpsr).e ? 4 : 0);
+ ''',
+ 'predicate_test': predicateTest},
+ ['IsMicroop'])
+
header_output = decoder_output = exec_output = ''
- loadIops = (microLdrUopIop, microLdrFpUopIop, microLdrRetUopIop)
- storeIops = (microStrUopIop, microStrFpUopIop)
+ loadIops = (microLdrUopIop, microLdrRetUopIop,
+ microLdrFpUopIop, microLdrDBFpUopIop, microLdrDTFpUopIop)
+ storeIops = (microStrUopIop, microStrFpUopIop,
+ microStrDBFpUopIop, microStrDTFpUopIop)
for iop in loadIops + storeIops:
header_output += MicroMemDeclare.subst(iop)
decoder_output += MicroMemConstructor.subst(iop)
@@ -115,6 +164,403 @@ let {{
StoreCompleteAcc.subst(iop)
}};
+let {{
+ exec_output = header_output = ''
+
+ eaCode = 'EA = Ra + imm;'
+
+ for size in (1, 2, 3, 4, 6, 8, 12, 16):
+ # Set up the memory access.
+ regs = (size + 3) // 4
+ subst = { "size" : size, "regs" : regs }
+ memDecl = '''
+ union MemUnion {
+ uint8_t bytes[%(size)d];
+ Element elements[%(size)d / sizeof(Element)];
+ uint32_t floatRegBits[%(regs)d];
+ };
+ ''' % subst
+
+ # Do endian conversion for all the elements.
+ convCode = '''
+ const unsigned eCount = sizeof(memUnion.elements) /
+ sizeof(memUnion.elements[0]);
+ if (((CPSR)Cpsr).e) {
+ for (unsigned i = 0; i < eCount; i++) {
+ memUnion.elements[i] = gtobe(memUnion.elements[i]);
+ }
+ } else {
+ for (unsigned i = 0; i < eCount; i++) {
+ memUnion.elements[i] = gtole(memUnion.elements[i]);
+ }
+ }
+ '''
+
+ # Offload everything into registers
+ regSetCode = ''
+ for reg in range(regs):
+ mask = ''
+ if reg == regs - 1:
+ mask = ' & mask(%d)' % (32 - 8 * (regs * 4 - size))
+ regSetCode += '''
+ FpDestP%(reg)d.uw = gtoh(memUnion.floatRegBits[%(reg)d])%(mask)s;
+ ''' % { "reg" : reg, "mask" : mask }
+
+ # Pull everything in from registers
+ regGetCode = ''
+ for reg in range(regs):
+ regGetCode += '''
+ memUnion.floatRegBits[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+
+ loadMemAccCode = convCode + regSetCode
+ storeMemAccCode = regGetCode + convCode
+
+ loadIop = InstObjParams('ldrneon%(size)d_uop' % subst,
+ 'MicroLdrNeon%(size)dUop' % subst,
+ 'MicroNeonMemOp',
+ { 'mem_decl' : memDecl,
+ 'size' : size,
+ 'memacc_code' : loadMemAccCode,
+ 'ea_code' : eaCode,
+ 'predicate_test' : predicateTest },
+ [ 'IsMicroop', 'IsMemRef', 'IsLoad' ])
+ storeIop = InstObjParams('strneon%(size)d_uop' % subst,
+ 'MicroStrNeon%(size)dUop' % subst,
+ 'MicroNeonMemOp',
+ { 'mem_decl' : memDecl,
+ 'size' : size,
+ 'memacc_code' : storeMemAccCode,
+ 'ea_code' : eaCode,
+ 'predicate_test' : predicateTest },
+ [ 'IsMicroop', 'IsMemRef', 'IsStore' ])
+
+ exec_output += NeonLoadExecute.subst(loadIop) + \
+ NeonLoadInitiateAcc.subst(loadIop) + \
+ NeonLoadCompleteAcc.subst(loadIop) + \
+ NeonStoreExecute.subst(storeIop) + \
+ NeonStoreInitiateAcc.subst(storeIop) + \
+ NeonStoreCompleteAcc.subst(storeIop)
+ header_output += MicroNeonMemDeclare.subst(loadIop) + \
+ MicroNeonMemDeclare.subst(storeIop)
+}};
+
+let {{
+ exec_output = ''
+ for eSize, type in (1, 'uint8_t'), \
+ (2, 'uint16_t'), \
+ (4, 'uint32_t'), \
+ (8, 'uint64_t'):
+ size = eSize
+ # An instruction handles no more than 16 bytes and no more than
+ # 4 elements, or the number of elements needed to fill 8 or 16 bytes.
+ sizes = set((16, 8))
+ for count in 1, 2, 3, 4:
+ size = count * eSize
+ if size <= 16:
+ sizes.add(size)
+ for size in sizes:
+ substDict = {
+ "class_name" : "MicroLdrNeon%dUop" % size,
+ "targs" : type
+ }
+ exec_output += MicroNeonMemExecDeclare.subst(substDict)
+ substDict["class_name"] = "MicroStrNeon%dUop" % size
+ exec_output += MicroNeonMemExecDeclare.subst(substDict)
+ size += eSize
+}};
+
+////////////////////////////////////////////////////////////////////
+//
+// Neon (de)interlacing microops
+//
+
+let {{
+ header_output = exec_output = ''
+ for dRegs in (2, 3, 4):
+ loadConv = ''
+ unloadConv = ''
+ for dReg in range(dRegs):
+ loadConv += '''
+ conv1.cRegs[%(sReg0)d] = htog(FpOp1P%(sReg0)d.uw);
+ conv1.cRegs[%(sReg1)d] = htog(FpOp1P%(sReg1)d.uw);
+ ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
+ unloadConv += '''
+ FpDestS%(dReg)dP0.uw = gtoh(conv2.cRegs[2 * %(dReg)d + 0]);
+ FpDestS%(dReg)dP1.uw = gtoh(conv2.cRegs[2 * %(dReg)d + 1]);
+ ''' % { "dReg" : dReg }
+ microDeintNeonCode = '''
+ const unsigned dRegs = %(dRegs)d;
+ const unsigned regs = 2 * dRegs;
+ const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
+ sizeof(Element);
+ union convStruct {
+ FloatRegBits cRegs[regs];
+ Element elements[dRegs * perDReg];
+ } conv1, conv2;
+
+ %(loadConv)s
+
+ unsigned srcElem = 0;
+ for (unsigned destOffset = 0;
+ destOffset < perDReg; destOffset++) {
+ for (unsigned dReg = 0; dReg < dRegs; dReg++) {
+ conv2.elements[dReg * perDReg + destOffset] =
+ conv1.elements[srcElem++];
+ }
+ }
+
+ %(unloadConv)s
+ ''' % { "dRegs" : dRegs,
+ "loadConv" : loadConv,
+ "unloadConv" : unloadConv }
+ microDeintNeonIop = \
+ InstObjParams('deintneon%duop' % (dRegs * 2),
+ 'MicroDeintNeon%dUop' % (dRegs * 2),
+ 'MicroNeonMixOp',
+ { 'predicate_test': predicateTest,
+ 'code' : microDeintNeonCode },
+ ['IsMicroop'])
+ header_output += MicroNeonMixDeclare.subst(microDeintNeonIop)
+ exec_output += MicroNeonMixExecute.subst(microDeintNeonIop)
+
+ loadConv = ''
+ unloadConv = ''
+ for dReg in range(dRegs):
+ loadConv += '''
+ conv1.cRegs[2 * %(dReg)d + 0] = htog(FpOp1S%(dReg)dP0.uw);
+ conv1.cRegs[2 * %(dReg)d + 1] = htog(FpOp1S%(dReg)dP1.uw);
+ ''' % { "dReg" : dReg }
+ unloadConv += '''
+ FpDestP%(sReg0)d.uw = gtoh(conv2.cRegs[%(sReg0)d]);
+ FpDestP%(sReg1)d.uw = gtoh(conv2.cRegs[%(sReg1)d]);
+ ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
+ microInterNeonCode = '''
+ const unsigned dRegs = %(dRegs)d;
+ const unsigned regs = 2 * dRegs;
+ const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
+ sizeof(Element);
+ union convStruct {
+ FloatRegBits cRegs[regs];
+ Element elements[dRegs * perDReg];
+ } conv1, conv2;
+
+ %(loadConv)s
+
+ unsigned destElem = 0;
+ for (unsigned srcOffset = 0;
+ srcOffset < perDReg; srcOffset++) {
+ for (unsigned dReg = 0; dReg < dRegs; dReg++) {
+ conv2.elements[destElem++] =
+ conv1.elements[dReg * perDReg + srcOffset];
+ }
+ }
+
+ %(unloadConv)s
+ ''' % { "dRegs" : dRegs,
+ "loadConv" : loadConv,
+ "unloadConv" : unloadConv }
+ microInterNeonIop = \
+ InstObjParams('interneon%duop' % (dRegs * 2),
+ 'MicroInterNeon%dUop' % (dRegs * 2),
+ 'MicroNeonMixOp',
+ { 'predicate_test': predicateTest,
+ 'code' : microInterNeonCode },
+ ['IsMicroop'])
+ header_output += MicroNeonMixDeclare.subst(microInterNeonIop)
+ exec_output += MicroNeonMixExecute.subst(microInterNeonIop)
+}};
+
+let {{
+ exec_output = ''
+ for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
+ for dRegs in (2, 3, 4):
+ Name = "MicroDeintNeon%dUop" % (dRegs * 2)
+ substDict = { "class_name" : Name, "targs" : type }
+ exec_output += MicroNeonExecDeclare.subst(substDict)
+ Name = "MicroInterNeon%dUop" % (dRegs * 2)
+ substDict = { "class_name" : Name, "targs" : type }
+ exec_output += MicroNeonExecDeclare.subst(substDict)
+}};
+
+////////////////////////////////////////////////////////////////////
+//
+// Neon microops to pack/unpack a single lane
+//
+
+let {{
+ header_output = exec_output = ''
+ for sRegs in 1, 2:
+ baseLoadRegs = ''
+ for reg in range(sRegs):
+ baseLoadRegs += '''
+ sourceRegs.fRegs[%(reg0)d] = htog(FpOp1P%(reg0)d.uw);
+ sourceRegs.fRegs[%(reg1)d] = htog(FpOp1P%(reg1)d.uw);
+ ''' % { "reg0" : (2 * reg + 0),
+ "reg1" : (2 * reg + 1) }
+ for dRegs in range(sRegs, 5):
+ unloadRegs = ''
+ loadRegs = baseLoadRegs
+ for reg in range(dRegs):
+ loadRegs += '''
+ destRegs[%(reg)d].fRegs[0] = htog(FpDestS%(reg)dP0.uw);
+ destRegs[%(reg)d].fRegs[1] = htog(FpDestS%(reg)dP1.uw);
+ ''' % { "reg" : reg }
+ unloadRegs += '''
+ FpDestS%(reg)dP0.uw = gtoh(destRegs[%(reg)d].fRegs[0]);
+ FpDestS%(reg)dP1.uw = gtoh(destRegs[%(reg)d].fRegs[1]);
+ ''' % { "reg" : reg }
+ microUnpackNeonCode = '''
+ const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
+ sizeof(Element);
+
+ union SourceRegs {
+ FloatRegBits fRegs[2 * %(sRegs)d];
+ Element elements[%(sRegs)d * perDReg];
+ } sourceRegs;
+
+ union DestReg {
+ FloatRegBits fRegs[2];
+ Element elements[perDReg];
+ } destRegs[%(dRegs)d];
+
+ %(loadRegs)s
+
+ for (unsigned i = 0; i < %(dRegs)d; i++) {
+ destRegs[i].elements[lane] = sourceRegs.elements[i];
+ }
+
+ %(unloadRegs)s
+ ''' % { "sRegs" : sRegs, "dRegs" : dRegs,
+ "loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
+
+ microUnpackNeonIop = \
+ InstObjParams('unpackneon%dto%duop' % (sRegs * 2, dRegs * 2),
+ 'MicroUnpackNeon%dto%dUop' %
+ (sRegs * 2, dRegs * 2),
+ 'MicroNeonMixLaneOp',
+ { 'predicate_test': predicateTest,
+ 'code' : microUnpackNeonCode },
+ ['IsMicroop'])
+ header_output += MicroNeonMixLaneDeclare.subst(microUnpackNeonIop)
+ exec_output += MicroNeonMixExecute.subst(microUnpackNeonIop)
+
+ for sRegs in 1, 2:
+ loadRegs = ''
+ for reg in range(sRegs):
+ loadRegs += '''
+ sourceRegs.fRegs[%(reg0)d] = htog(FpOp1P%(reg0)d.uw);
+ sourceRegs.fRegs[%(reg1)d] = htog(FpOp1P%(reg1)d.uw);
+ ''' % { "reg0" : (2 * reg + 0),
+ "reg1" : (2 * reg + 1) }
+ for dRegs in range(sRegs, 5):
+ unloadRegs = ''
+ for reg in range(dRegs):
+ unloadRegs += '''
+ FpDestS%(reg)dP0.uw = gtoh(destRegs[%(reg)d].fRegs[0]);
+ FpDestS%(reg)dP1.uw = gtoh(destRegs[%(reg)d].fRegs[1]);
+ ''' % { "reg" : reg }
+ microUnpackAllNeonCode = '''
+ const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
+ sizeof(Element);
+
+ union SourceRegs {
+ FloatRegBits fRegs[2 * %(sRegs)d];
+ Element elements[%(sRegs)d * perDReg];
+ } sourceRegs;
+
+ union DestReg {
+ FloatRegBits fRegs[2];
+ Element elements[perDReg];
+ } destRegs[%(dRegs)d];
+
+ %(loadRegs)s
+
+ for (unsigned i = 0; i < %(dRegs)d; i++) {
+ for (unsigned j = 0; j < perDReg; j++)
+ destRegs[i].elements[j] = sourceRegs.elements[i];
+ }
+
+ %(unloadRegs)s
+ ''' % { "sRegs" : sRegs, "dRegs" : dRegs,
+ "loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
+
+ microUnpackAllNeonIop = \
+ InstObjParams('unpackallneon%dto%duop' % (sRegs * 2, dRegs * 2),
+ 'MicroUnpackAllNeon%dto%dUop' %
+ (sRegs * 2, dRegs * 2),
+ 'MicroNeonMixOp',
+ { 'predicate_test': predicateTest,
+ 'code' : microUnpackAllNeonCode },
+ ['IsMicroop'])
+ header_output += MicroNeonMixDeclare.subst(microUnpackAllNeonIop)
+ exec_output += MicroNeonMixExecute.subst(microUnpackAllNeonIop)
+
+ for dRegs in 1, 2:
+ unloadRegs = ''
+ for reg in range(dRegs):
+ unloadRegs += '''
+ FpDestP%(reg0)d.uw = gtoh(destRegs.fRegs[%(reg0)d]);
+ FpDestP%(reg1)d.uw = gtoh(destRegs.fRegs[%(reg1)d]);
+ ''' % { "reg0" : (2 * reg + 0),
+ "reg1" : (2 * reg + 1) }
+ for sRegs in range(dRegs, 5):
+ loadRegs = ''
+ for reg in range(sRegs):
+ loadRegs += '''
+ sourceRegs[%(reg)d].fRegs[0] = htog(FpOp1S%(reg)dP0.uw);
+ sourceRegs[%(reg)d].fRegs[1] = htog(FpOp1S%(reg)dP1.uw);
+ ''' % { "reg" : reg }
+ microPackNeonCode = '''
+ const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
+ sizeof(Element);
+
+ union SourceReg {
+ FloatRegBits fRegs[2];
+ Element elements[perDReg];
+ } sourceRegs[%(sRegs)d];
+
+ union DestRegs {
+ FloatRegBits fRegs[2 * %(dRegs)d];
+ Element elements[%(dRegs)d * perDReg];
+ } destRegs;
+
+ %(loadRegs)s
+
+ for (unsigned i = 0; i < %(sRegs)d; i++) {
+ destRegs.elements[i] = sourceRegs[i].elements[lane];
+ }
+
+ %(unloadRegs)s
+ ''' % { "sRegs" : sRegs, "dRegs" : dRegs,
+ "loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
+
+ microPackNeonIop = \
+ InstObjParams('packneon%dto%duop' % (sRegs * 2, dRegs * 2),
+ 'MicroPackNeon%dto%dUop' %
+ (sRegs * 2, dRegs * 2),
+ 'MicroNeonMixLaneOp',
+ { 'predicate_test': predicateTest,
+ 'code' : microPackNeonCode },
+ ['IsMicroop'])
+ header_output += MicroNeonMixLaneDeclare.subst(microPackNeonIop)
+ exec_output += MicroNeonMixExecute.subst(microPackNeonIop)
+}};
+
+let {{
+ exec_output = ''
+ for type in ('uint8_t', 'uint16_t', 'uint32_t'):
+ for sRegs in 1, 2:
+ for dRegs in range(sRegs, 5):
+ for format in ("MicroUnpackNeon%(sRegs)dto%(dRegs)dUop",
+ "MicroUnpackAllNeon%(sRegs)dto%(dRegs)dUop",
+ "MicroPackNeon%(dRegs)dto%(sRegs)dUop"):
+ Name = format % { "sRegs" : sRegs * 2,
+ "dRegs" : dRegs * 2 }
+ substDict = { "class_name" : Name, "targs" : type }
+ exec_output += MicroNeonExecDeclare.subst(substDict)
+}};
+
////////////////////////////////////////////////////////////////////
//
// Integer = Integer op Immediate microops
@@ -122,23 +568,32 @@ let {{
let {{
microAddiUopIop = InstObjParams('addi_uop', 'MicroAddiUop',
- 'MicroIntOp',
+ 'MicroIntImmOp',
{'code': 'Ra = Rb + imm;',
'predicate_test': predicateTest},
['IsMicroop'])
+ microAddUopIop = InstObjParams('add_uop', 'MicroAddUop',
+ 'MicroIntOp',
+ {'code': 'Ra = Rb + Rc;',
+ 'predicate_test': predicateTest},
+ ['IsMicroop'])
+
microSubiUopIop = InstObjParams('subi_uop', 'MicroSubiUop',
- 'MicroIntOp',
+ 'MicroIntImmOp',
{'code': 'Ra = Rb - imm;',
'predicate_test': predicateTest},
['IsMicroop'])
- header_output = MicroIntDeclare.subst(microAddiUopIop) + \
- MicroIntDeclare.subst(microSubiUopIop)
- decoder_output = MicroIntConstructor.subst(microAddiUopIop) + \
- MicroIntConstructor.subst(microSubiUopIop)
+ header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \
+ MicroIntImmDeclare.subst(microSubiUopIop) + \
+ MicroIntDeclare.subst(microAddUopIop)
+ decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \
+ MicroIntImmConstructor.subst(microSubiUopIop) + \
+ MicroIntConstructor.subst(microAddUopIop)
exec_output = PredOpExecute.subst(microAddiUopIop) + \
- PredOpExecute.subst(microSubiUopIop)
+ PredOpExecute.subst(microSubiUopIop) + \
+ PredOpExecute.subst(microAddUopIop)
}};
let {{
@@ -146,6 +601,22 @@ let {{
header_output = MacroMemDeclare.subst(iop)
decoder_output = MacroMemConstructor.subst(iop)
+ iop = InstObjParams("vldmult", "VldMult", 'VldMultOp', "", [])
+ header_output += VMemMultDeclare.subst(iop)
+ decoder_output += VMemMultConstructor.subst(iop)
+
+ iop = InstObjParams("vldsingle", "VldSingle", 'VldSingleOp', "", [])
+ header_output += VMemSingleDeclare.subst(iop)
+ decoder_output += VMemSingleConstructor.subst(iop)
+
+ iop = InstObjParams("vstmult", "VstMult", 'VstMultOp', "", [])
+ header_output += VMemMultDeclare.subst(iop)
+ decoder_output += VMemMultConstructor.subst(iop)
+
+ iop = InstObjParams("vstsingle", "VstSingle", 'VstSingleOp', "", [])
+ header_output += VMemSingleDeclare.subst(iop)
+ decoder_output += VMemSingleConstructor.subst(iop)
+
vfpIop = InstObjParams("vldmstm", "VLdmStm", 'MacroVFPMemOp', "", [])
header_output += MacroVFPMemDeclare.subst(vfpIop)
decoder_output += MacroVFPMemConstructor.subst(vfpIop)
diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa
new file mode 100644
index 000000000..b629c6fe8
--- /dev/null
+++ b/src/arch/arm/isa/insts/neon.isa
@@ -0,0 +1,3343 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2010 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+output header {{
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUThreeUReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint8_t>(machInst, dest, op1, op2);
+ case 1:
+ return new Base<uint16_t>(machInst, dest, op1, op2);
+ case 2:
+ return new Base<uint32_t>(machInst, dest, op1, op2);
+ case 3:
+ return new Base<uint64_t>(machInst, dest, op1, op2);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonSThreeUReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ switch (size) {
+ case 0:
+ return new Base<int8_t>(machInst, dest, op1, op2);
+ case 1:
+ return new Base<int16_t>(machInst, dest, op1, op2);
+ case 2:
+ return new Base<int32_t>(machInst, dest, op1, op2);
+ case 3:
+ return new Base<int64_t>(machInst, dest, op1, op2);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUSThreeUReg(bool notSigned, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ if (notSigned) {
+ return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
+ } else {
+ return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUThreeUSReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint8_t>(machInst, dest, op1, op2);
+ case 1:
+ return new Base<uint16_t>(machInst, dest, op1, op2);
+ case 2:
+ return new Base<uint32_t>(machInst, dest, op1, op2);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonSThreeUSReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ switch (size) {
+ case 0:
+ return new Base<int8_t>(machInst, dest, op1, op2);
+ case 1:
+ return new Base<int16_t>(machInst, dest, op1, op2);
+ case 2:
+ return new Base<int32_t>(machInst, dest, op1, op2);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ if (notSigned) {
+ return decodeNeonUThreeUSReg<Base>(
+ size, machInst, dest, op1, op2);
+ } else {
+ return decodeNeonSThreeUSReg<Base>(
+ size, machInst, dest, op1, op2);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUThreeSReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ if (q) {
+ return decodeNeonUThreeUSReg<BaseQ>(
+ size, machInst, dest, op1, op2);
+ } else {
+ return decodeNeonUThreeUSReg<BaseD>(
+ size, machInst, dest, op1, op2);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonSThreeSReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ if (q) {
+ return decodeNeonSThreeUSReg<BaseQ>(
+ size, machInst, dest, op1, op2);
+ } else {
+ return decodeNeonSThreeUSReg<BaseD>(
+ size, machInst, dest, op1, op2);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ if (notSigned) {
+ return decodeNeonUThreeSReg<BaseD, BaseQ>(
+ q, size, machInst, dest, op1, op2);
+ } else {
+ return decodeNeonSThreeSReg<BaseD, BaseQ>(
+ q, size, machInst, dest, op1, op2);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUThreeReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ if (q) {
+ return decodeNeonUThreeUReg<BaseQ>(
+ size, machInst, dest, op1, op2);
+ } else {
+ return decodeNeonUThreeUReg<BaseD>(
+ size, machInst, dest, op1, op2);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonSThreeReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ if (q) {
+ return decodeNeonSThreeUReg<BaseQ>(
+ size, machInst, dest, op1, op2);
+ } else {
+ return decodeNeonSThreeUReg<BaseD>(
+ size, machInst, dest, op1, op2);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, IntRegIndex op2)
+ {
+ if (notSigned) {
+ return decodeNeonUThreeReg<BaseD, BaseQ>(
+ q, size, machInst, dest, op1, op2);
+ } else {
+ return decodeNeonSThreeReg<BaseD, BaseQ>(
+ q, size, machInst, dest, op1, op2);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUTwoShiftReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, uint64_t imm)
+ {
+ if (q) {
+ switch (size) {
+ case 0:
+ return new BaseQ<uint8_t>(machInst, dest, op1, imm);
+ case 1:
+ return new BaseQ<uint16_t>(machInst, dest, op1, imm);
+ case 2:
+ return new BaseQ<uint32_t>(machInst, dest, op1, imm);
+ case 3:
+ return new BaseQ<uint64_t>(machInst, dest, op1, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ return new BaseD<uint8_t>(machInst, dest, op1, imm);
+ case 1:
+ return new BaseD<uint16_t>(machInst, dest, op1, imm);
+ case 2:
+ return new BaseD<uint32_t>(machInst, dest, op1, imm);
+ case 3:
+ return new BaseD<uint64_t>(machInst, dest, op1, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonSTwoShiftReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, uint64_t imm)
+ {
+ if (q) {
+ switch (size) {
+ case 0:
+ return new BaseQ<int8_t>(machInst, dest, op1, imm);
+ case 1:
+ return new BaseQ<int16_t>(machInst, dest, op1, imm);
+ case 2:
+ return new BaseQ<int32_t>(machInst, dest, op1, imm);
+ case 3:
+ return new BaseQ<int64_t>(machInst, dest, op1, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ return new BaseD<int8_t>(machInst, dest, op1, imm);
+ case 1:
+ return new BaseD<int16_t>(machInst, dest, op1, imm);
+ case 2:
+ return new BaseD<int32_t>(machInst, dest, op1, imm);
+ case 3:
+ return new BaseD<int64_t>(machInst, dest, op1, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+ }
+
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, uint64_t imm)
+ {
+ if (notSigned) {
+ return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
+ q, size, machInst, dest, op1, imm);
+ } else {
+ return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
+ q, size, machInst, dest, op1, imm);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUTwoShiftUSReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, uint64_t imm)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint8_t>(machInst, dest, op1, imm);
+ case 1:
+ return new Base<uint16_t>(machInst, dest, op1, imm);
+ case 2:
+ return new Base<uint32_t>(machInst, dest, op1, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUTwoShiftSReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, uint64_t imm)
+ {
+ if (q) {
+ return decodeNeonUTwoShiftUSReg<BaseQ>(
+ size, machInst, dest, op1, imm);
+ } else {
+ return decodeNeonUTwoShiftUSReg<BaseD>(
+ size, machInst, dest, op1, imm);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonSTwoShiftUSReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, uint64_t imm)
+ {
+ switch (size) {
+ case 0:
+ return new Base<int8_t>(machInst, dest, op1, imm);
+ case 1:
+ return new Base<int16_t>(machInst, dest, op1, imm);
+ case 2:
+ return new Base<int32_t>(machInst, dest, op1, imm);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonSTwoShiftSReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, uint64_t imm)
+ {
+ if (q) {
+ return decodeNeonSTwoShiftUSReg<BaseQ>(
+ size, machInst, dest, op1, imm);
+ } else {
+ return decodeNeonSTwoShiftUSReg<BaseD>(
+ size, machInst, dest, op1, imm);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1, uint64_t imm)
+ {
+ if (notSigned) {
+ return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
+ q, size, machInst, dest, op1, imm);
+ } else {
+ return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
+ q, size, machInst, dest, op1, imm);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUTwoMiscUSReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint8_t>(machInst, dest, op1);
+ case 1:
+ return new Base<uint16_t>(machInst, dest, op1);
+ case 2:
+ return new Base<uint32_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonSTwoMiscUSReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
+ {
+ switch (size) {
+ case 0:
+ return new Base<int8_t>(machInst, dest, op1);
+ case 1:
+ return new Base<int16_t>(machInst, dest, op1);
+ case 2:
+ return new Base<int32_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUTwoMiscSReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
+ {
+ if (q) {
+ return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
+ } else {
+ return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonSTwoMiscSReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
+ {
+ if (q) {
+ return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
+ } else {
+ return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonUTwoMiscUReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint8_t>(machInst, dest, op1);
+ case 1:
+ return new Base<uint16_t>(machInst, dest, op1);
+ case 2:
+ return new Base<uint32_t>(machInst, dest, op1);
+ case 3:
+ return new Base<uint64_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeNeonSTwoMiscUReg(unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
+ {
+ switch (size) {
+ case 0:
+ return new Base<int8_t>(machInst, dest, op1);
+ case 1:
+ return new Base<int16_t>(machInst, dest, op1);
+ case 2:
+ return new Base<int32_t>(machInst, dest, op1);
+ case 3:
+ return new Base<int64_t>(machInst, dest, op1);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonSTwoMiscReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
+ {
+ if (q) {
+ return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
+ } else {
+ return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUTwoMiscReg(bool q, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
+ {
+ if (q) {
+ return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
+ } else {
+ return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
+ }
+ }
+
+ template <template <typename T> class BaseD,
+ template <typename T> class BaseQ>
+ StaticInstPtr
+ decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
+ ExtMachInst machInst, IntRegIndex dest,
+ IntRegIndex op1)
+ {
+ if (notSigned) {
+ return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
+ q, size, machInst, dest, op1);
+ } else {
+ return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
+ q, size, machInst, dest, op1);
+ }
+ }
+
+}};
+
+output exec {{
+ static float
+ vcgtFunc(float op1, float op2)
+ {
+ if (isSnan(op1) || isSnan(op2))
+ return 2.0;
+ return (op1 > op2) ? 0.0 : 1.0;
+ }
+
+ static float
+ vcgeFunc(float op1, float op2)
+ {
+ if (isSnan(op1) || isSnan(op2))
+ return 2.0;
+ return (op1 >= op2) ? 0.0 : 1.0;
+ }
+
+ static float
+ vceqFunc(float op1, float op2)
+ {
+ if (isSnan(op1) || isSnan(op2))
+ return 2.0;
+ return (op1 == op2) ? 0.0 : 1.0;
+ }
+
+ static float
+ vcleFunc(float op1, float op2)
+ {
+ if (isSnan(op1) || isSnan(op2))
+ return 2.0;
+ return (op1 <= op2) ? 0.0 : 1.0;
+ }
+
+ static float
+ vcltFunc(float op1, float op2)
+ {
+ if (isSnan(op1) || isSnan(op2))
+ return 2.0;
+ return (op1 < op2) ? 0.0 : 1.0;
+ }
+
+ static float
+ vacgtFunc(float op1, float op2)
+ {
+ if (isSnan(op1) || isSnan(op2))
+ return 2.0;
+ return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
+ }
+
+ static float
+ vacgeFunc(float op1, float op2)
+ {
+ if (isSnan(op1) || isSnan(op2))
+ return 2.0;
+ return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
+ }
+}};
+
+let {{
+
+ header_output = ""
+ exec_output = ""
+
+ smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
+ unsignedTypes = smallUnsignedTypes + ("uint64_t",)
+ smallSignedTypes = ("int8_t", "int16_t", "int32_t")
+ signedTypes = smallSignedTypes + ("int64_t",)
+ smallTypes = smallUnsignedTypes + smallSignedTypes
+ allTypes = unsignedTypes + signedTypes
+
+ def threeEqualRegInst(name, Name, types, rCount, op,
+ readDest=False, pairwise=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect srcReg1, srcReg2, destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ if pairwise:
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ Element srcElem1 = gtoh(2 * i < eCount ?
+ srcReg1.elements[2 * i] :
+ srcReg2.elements[2 * i - eCount]);
+ Element srcElem2 = gtoh(2 * i < eCount ?
+ srcReg1.elements[2 * i + 1] :
+ srcReg2.elements[2 * i + 1 - eCount]);
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ else:
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ Element srcElem1 = gtoh(srcReg1.elements[i]);
+ Element srcElem2 = gtoh(srcReg2.elements[i]);
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegRegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegRegOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def threeEqualRegInstFp(name, Name, types, rCount, op,
+ readDest=False, pairwise=False, toInt=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ typedef FloatReg FloatVect[rCount];
+ FloatVect srcRegs1, srcRegs2;
+ '''
+ if toInt:
+ eWalkCode += 'RegVect destRegs;\n'
+ else:
+ eWalkCode += 'FloatVect destRegs;\n'
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcRegs1[%(reg)d] = FpOp1P%(reg)d;
+ srcRegs2[%(reg)d] = FpOp2P%(reg)d;
+ ''' % { "reg" : reg }
+ if readDest:
+ if toInt:
+ eWalkCode += '''
+ destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
+ ''' % { "reg" : reg }
+ else:
+ eWalkCode += '''
+ destRegs[%(reg)d] = FpDestP%(reg)d;
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destReg = destRegs[r];'
+ destType = 'FloatReg'
+ writeDest = 'destRegs[r] = destReg;'
+ if toInt:
+ destType = 'FloatRegBits'
+ writeDest = 'destRegs.regs[r] = destReg;'
+ if pairwise:
+ eWalkCode += '''
+ for (unsigned r = 0; r < rCount; r++) {
+ FloatReg srcReg1 = (2 * r < rCount) ?
+ srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
+ FloatReg srcReg2 = (2 * r < rCount) ?
+ srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
+ %(destType)s destReg;
+ %(readDest)s
+ %(op)s
+ %(writeDest)s
+ }
+ ''' % { "op" : op,
+ "readDest" : readDestCode,
+ "destType" : destType,
+ "writeDest" : writeDest }
+ else:
+ eWalkCode += '''
+ for (unsigned r = 0; r < rCount; r++) {
+ FloatReg srcReg1 = srcRegs1[r];
+ FloatReg srcReg2 = srcRegs2[r];
+ %(destType)s destReg;
+ %(readDest)s
+ %(op)s
+ %(writeDest)s
+ }
+ ''' % { "op" : op,
+ "readDest" : readDestCode,
+ "destType" : destType,
+ "writeDest" : writeDest }
+ for reg in range(rCount):
+ if toInt:
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
+ ''' % { "reg" : reg }
+ else:
+ eWalkCode += '''
+ FpDestP%(reg)d = destRegs[%(reg)d];
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "FpRegRegRegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegRegOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def threeUnequalRegInst(name, Name, types, op,
+ bigSrc1, bigSrc2, bigDest, readDest):
+ global header_output, exec_output
+ src1Cnt = src2Cnt = destCnt = 2
+ src1Prefix = src2Prefix = destPrefix = ''
+ if bigSrc1:
+ src1Cnt = 4
+ src1Prefix = 'Big'
+ if bigSrc2:
+ src2Cnt = 4
+ src2Prefix = 'Big'
+ if bigDest:
+ destCnt = 4
+ destPrefix = 'Big'
+ eWalkCode = '''
+ %sRegVect srcReg1;
+ %sRegVect srcReg2;
+ %sRegVect destReg;
+ ''' % (src1Prefix, src2Prefix, destPrefix)
+ for reg in range(src1Cnt):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ for reg in range(src2Cnt):
+ eWalkCode += '''
+ srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ for reg in range(destCnt):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
+ %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
+ %(destPrefix)sElement destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode,
+ "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
+ "destPrefix" : destPrefix }
+ for reg in range(destCnt):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegRegOp",
+ { "code": eWalkCode,
+ "r_count": 2,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegRegOpDeclare.subst(iop)
+ exec_output += NeonUnequalRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def threeRegNarrowInst(name, Name, types, op, readDest=False):
+ threeUnequalRegInst(name, Name, types, op,
+ True, True, False, readDest)
+
+ def threeRegLongInst(name, Name, types, op, readDest=False):
+ threeUnequalRegInst(name, Name, types, op,
+ False, False, True, readDest)
+
+ def threeRegWideInst(name, Name, types, op, readDest=False):
+ threeUnequalRegInst(name, Name, types, op,
+ True, False, True, readDest)
+
+ def twoEqualRegInst(name, Name, types, rCount, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect srcReg1, srcReg2, destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ assert(imm >= 0 && imm < eCount);
+ for (unsigned i = 0; i < eCount; i++) {
+ Element srcElem1 = gtoh(srcReg1.elements[i]);
+ Element srcElem2 = gtoh(srcReg2.elements[imm]);
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegRegImmOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegRegImmOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoRegLongInst(name, Name, types, op, readDest=False):
+ global header_output, exec_output
+ rCount = 2
+ eWalkCode = '''
+ RegVect srcReg1, srcReg2;
+ BigRegVect destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
+ ''' % { "reg" : reg }
+ if readDest:
+ for reg in range(2 * rCount):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ assert(imm >= 0 && imm < eCount);
+ for (unsigned i = 0; i < eCount; i++) {
+ Element srcElem1 = gtoh(srcReg1.elements[i]);
+ Element srcElem2 = gtoh(srcReg2.elements[imm]);
+ BigElement destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(2 * rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegRegImmOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegRegImmOpDeclare.subst(iop)
+ exec_output += NeonUnequalRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ typedef FloatReg FloatVect[rCount];
+ FloatVect srcRegs1, srcRegs2, destRegs;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcRegs1[%(reg)d] = FpOp1P%(reg)d;
+ srcRegs2[%(reg)d] = FpOp2P%(reg)d;
+ ''' % { "reg" : reg }
+ if readDest:
+ eWalkCode += '''
+ destRegs[%(reg)d] = FpDestP%(reg)d;
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destReg = destRegs[i];'
+ eWalkCode += '''
+ assert(imm >= 0 && imm < rCount);
+ for (unsigned i = 0; i < rCount; i++) {
+ FloatReg srcReg1 = srcRegs1[i];
+ FloatReg srcReg2 = srcRegs2[imm];
+ FloatReg destReg;
+ %(readDest)s
+ %(op)s
+ destRegs[i] = destReg;
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d = destRegs[%(reg)d];
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "FpRegRegRegImmOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegRegImmOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoRegShiftInst(name, Name, types, rCount, op,
+ readDest=False, toInt=False, fromInt=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect srcRegs1, destRegs;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ eWalkCode += '''
+ destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
+ if toInt:
+ readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
+ readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
+ if fromInt:
+ readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
+ declDest = 'Element destElem;'
+ writeDestCode = 'destRegs.elements[i] = htog(destElem);'
+ if toInt:
+ declDest = 'FloatRegBits destReg;'
+ writeDestCode = 'destRegs.regs[i] = htog(destReg);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ %(readOp)s
+ %(declDest)s
+ %(readDest)s
+ %(op)s
+ %(writeDest)s
+ }
+ ''' % { "readOp" : readOpCode,
+ "declDest" : declDest,
+ "readDest" : readDestCode,
+ "op" : op,
+ "writeDest" : writeDestCode }
+ for reg in range(rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegImmOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegImmOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoRegNarrowShiftInst(name, Name, types, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ BigRegVect srcReg1;
+ RegVect destReg;
+ '''
+ for reg in range(4):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ for reg in range(2):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ BigElement srcElem1 = gtoh(srcReg1.elements[i]);
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(2):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegImmOp",
+ { "code": eWalkCode,
+ "r_count": 2,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegImmOpDeclare.subst(iop)
+ exec_output += NeonUnequalRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoRegLongShiftInst(name, Name, types, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect srcReg1;
+ BigRegVect destReg;
+ '''
+ for reg in range(2):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ for reg in range(4):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destReg = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ Element srcElem1 = gtoh(srcReg1.elements[i]);
+ BigElement destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(4):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegImmOp",
+ { "code": eWalkCode,
+ "r_count": 2,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegImmOpDeclare.subst(iop)
+ exec_output += NeonUnequalRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoRegMiscInst(name, Name, types, rCount, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect srcReg1, destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ unsigned j = i;
+ Element srcElem1 = gtoh(srcReg1.elements[i]);
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[j] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect srcReg1, destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ Element srcElem1 = gtoh(srcReg1.elements[imm]);
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegImmOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegImmOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect srcReg1, destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ eWalkCode += '''
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += op
+ for reg in range(rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoRegMiscInstFp(name, Name, types, rCount, op,
+ readDest=False, toInt=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ typedef FloatReg FloatVect[rCount];
+ FloatVect srcRegs1;
+ '''
+ if toInt:
+ eWalkCode += 'RegVect destRegs;\n'
+ else:
+ eWalkCode += 'FloatVect destRegs;\n'
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcRegs1[%(reg)d] = FpOp1P%(reg)d;
+ ''' % { "reg" : reg }
+ if readDest:
+ if toInt:
+ eWalkCode += '''
+ destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
+ ''' % { "reg" : reg }
+ else:
+ eWalkCode += '''
+ destRegs[%(reg)d] = FpDestP%(reg)d;
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destReg = destRegs[i];'
+ destType = 'FloatReg'
+ writeDest = 'destRegs[r] = destReg;'
+ if toInt:
+ destType = 'FloatRegBits'
+ writeDest = 'destRegs.regs[r] = destReg;'
+ eWalkCode += '''
+ for (unsigned r = 0; r < rCount; r++) {
+ FloatReg srcReg1 = srcRegs1[r];
+ %(destType)s destReg;
+ %(readDest)s
+ %(op)s
+ %(writeDest)s
+ }
+ ''' % { "op" : op,
+ "readDest" : readDestCode,
+ "destType" : destType,
+ "writeDest" : writeDest }
+ for reg in range(rCount):
+ if toInt:
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
+ ''' % { "reg" : reg }
+ else:
+ eWalkCode += '''
+ FpDestP%(reg)d = destRegs[%(reg)d];
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "FpRegRegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect srcRegs;
+ BigRegVect destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount / 2; i++) {
+ Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
+ Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
+ BigElement destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegOpDeclare.subst(iop)
+ exec_output += NeonUnequalRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoRegNarrowMiscInst(name, Name, types, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ BigRegVect srcReg1;
+ RegVect destReg;
+ '''
+ for reg in range(4):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ for reg in range(2):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ BigElement srcElem1 = gtoh(srcReg1.elements[i]);
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(2):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegOp",
+ { "code": eWalkCode,
+ "r_count": 2,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegOpDeclare.subst(iop)
+ exec_output += NeonUnequalRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def oneRegImmInst(name, Name, types, rCount, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect destReg;
+ '''
+ if readDest:
+ for reg in range(rCount):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ Element destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegImmOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegImmOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ def twoRegLongMiscInst(name, Name, types, op, readDest=False):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect srcReg1;
+ BigRegVect destReg;
+ '''
+ for reg in range(2):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ if readDest:
+ for reg in range(4):
+ eWalkCode += '''
+ destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
+ ''' % { "reg" : reg }
+ readDestCode = ''
+ if readDest:
+ readDestCode = 'destReg = gtoh(destReg.elements[i]);'
+ eWalkCode += '''
+ for (unsigned i = 0; i < eCount; i++) {
+ Element srcElem1 = gtoh(srcReg1.elements[i]);
+ BigElement destElem;
+ %(readDest)s
+ %(op)s
+ destReg.elements[i] = htog(destElem);
+ }
+ ''' % { "op" : op, "readDest" : readDestCode }
+ for reg in range(4):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegOp",
+ { "code": eWalkCode,
+ "r_count": 2,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegOpDeclare.subst(iop)
+ exec_output += NeonUnequalRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ vhaddCode = '''
+ Element carryBit =
+ (((unsigned)srcElem1 & 0x1) +
+ ((unsigned)srcElem2 & 0x1)) >> 1;
+ // Use division instead of a shift to ensure the sign extension works
+ // right. The compiler will figure out if it can be a shift. Mask the
+ // inputs so they get truncated correctly.
+ destElem = (((srcElem1 & ~(Element)1) / 2) +
+ ((srcElem2 & ~(Element)1) / 2)) + carryBit;
+ '''
+ threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode)
+ threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode)
+
+ vrhaddCode = '''
+ Element carryBit =
+ (((unsigned)srcElem1 & 0x1) +
+ ((unsigned)srcElem2 & 0x1) + 1) >> 1;
+ // Use division instead of a shift to ensure the sign extension works
+ // right. The compiler will figure out if it can be a shift. Mask the
+ // inputs so they get truncated correctly.
+ destElem = (((srcElem1 & ~(Element)1) / 2) +
+ ((srcElem2 & ~(Element)1) / 2)) + carryBit;
+ '''
+ threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode)
+ threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode)
+
+ vhsubCode = '''
+ Element barrowBit =
+ (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
+ // Use division instead of a shift to ensure the sign extension works
+ // right. The compiler will figure out if it can be a shift. Mask the
+ // inputs so they get truncated correctly.
+ destElem = (((srcElem1 & ~(Element)1) / 2) -
+ ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
+ '''
+ threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode)
+ threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode)
+
+ vandCode = '''
+ destElem = srcElem1 & srcElem2;
+ '''
+ threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode)
+ threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode)
+
+ vbicCode = '''
+ destElem = srcElem1 & ~srcElem2;
+ '''
+ threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode)
+ threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode)
+
+ vorrCode = '''
+ destElem = srcElem1 | srcElem2;
+ '''
+ threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode)
+ threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode)
+
+ threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode)
+ threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode)
+
+ vornCode = '''
+ destElem = srcElem1 | ~srcElem2;
+ '''
+ threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode)
+ threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode)
+
+ veorCode = '''
+ destElem = srcElem1 ^ srcElem2;
+ '''
+ threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode)
+ threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode)
+
+ vbifCode = '''
+ destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
+ '''
+ threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True)
+ threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True)
+ vbitCode = '''
+ destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
+ '''
+ threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True)
+ threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True)
+ vbslCode = '''
+ destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
+ '''
+ threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True)
+ threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True)
+
+ vmaxCode = '''
+ destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
+ '''
+ threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode)
+ threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode)
+
+ vminCode = '''
+ destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
+ '''
+ threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode)
+ threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode)
+
+ vaddCode = '''
+ destElem = srcElem1 + srcElem2;
+ '''
+ threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode)
+ threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode)
+
+ threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes,
+ 2, vaddCode, pairwise=True)
+ threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes,
+ 4, vaddCode, pairwise=True)
+ vaddlwCode = '''
+ destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
+ '''
+ threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode)
+ threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode)
+ vaddhnCode = '''
+ destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
+ (sizeof(Element) * 8);
+ '''
+ threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode)
+ vraddhnCode = '''
+ destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
+ ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
+ (sizeof(Element) * 8);
+ '''
+ threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode)
+
+ vsubCode = '''
+ destElem = srcElem1 - srcElem2;
+ '''
+ threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode)
+ threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode)
+ vsublwCode = '''
+ destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
+ '''
+ threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode)
+ threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode)
+
+ vqaddUCode = '''
+ destElem = srcElem1 + srcElem2;
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (destElem < srcElem1 || destElem < srcElem2) {
+ destElem = (Element)(-1);
+ fpscr.qc = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode)
+ threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode)
+ vsubhnCode = '''
+ destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
+ (sizeof(Element) * 8);
+ '''
+ threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode)
+ vrsubhnCode = '''
+ destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
+ ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
+ (sizeof(Element) * 8);
+ '''
+ threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode)
+
+ vqaddSCode = '''
+ destElem = srcElem1 + srcElem2;
+ FPSCR fpscr = (FPSCR)Fpscr;
+ bool negDest = (destElem < 0);
+ bool negSrc1 = (srcElem1 < 0);
+ bool negSrc2 = (srcElem2 < 0);
+ if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
+ destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+ if (negDest)
+ destElem -= 1;
+ fpscr.qc = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode)
+ threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode)
+
+ vqsubUCode = '''
+ destElem = srcElem1 - srcElem2;
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (destElem > srcElem1) {
+ destElem = 0;
+ fpscr.qc = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode)
+ threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode)
+
+ vqsubSCode = '''
+ destElem = srcElem1 - srcElem2;
+ FPSCR fpscr = (FPSCR)Fpscr;
+ bool negDest = (destElem < 0);
+ bool negSrc1 = (srcElem1 < 0);
+ bool posSrc2 = (srcElem2 >= 0);
+ if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
+ destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+ if (negDest)
+ destElem -= 1;
+ fpscr.qc = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode)
+ threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode)
+
+ vcgtCode = '''
+ destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
+ '''
+ threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode)
+ threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode)
+
+ vcgeCode = '''
+ destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
+ '''
+ threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode)
+ threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode)
+
+ vceqCode = '''
+ destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
+ '''
+ threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode)
+ threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode)
+
+ vshlCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ // Make sure the right shift sign extended when it should.
+ if (srcElem1 < 0 && destElem >= 0) {
+ destElem |= -((Element)1 << (sizeof(Element) * 8 -
+ 1 - shiftAmt));
+ }
+ } else {
+ if (shiftAmt >= sizeof(Element) * 8) {
+ destElem = 0;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ }
+ '''
+ threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode)
+ threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode)
+
+ vrshlCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ Element rBit = 0;
+ if (shiftAmt <= sizeof(Element) * 8)
+ rBit = bits(srcElem1, shiftAmt - 1);
+ if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
+ rBit = 1;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ // Make sure the right shift sign extended when it should.
+ if (srcElem1 < 0 && destElem >= 0) {
+ destElem |= -((Element)1 << (sizeof(Element) * 8 -
+ 1 - shiftAmt));
+ }
+ destElem += rBit;
+ } else if (shiftAmt > 0) {
+ if (shiftAmt >= sizeof(Element) * 8) {
+ destElem = 0;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ '''
+ threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode)
+ threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode)
+
+ vqshlUCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ // Make sure the right shift sign extended when it should.
+ if (srcElem1 < 0 && destElem >= 0) {
+ destElem |= -((Element)1 << (sizeof(Element) * 8 -
+ 1 - shiftAmt));
+ }
+ } else if (shiftAmt > 0) {
+ if (shiftAmt >= sizeof(Element) * 8) {
+ if (srcElem1 != 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = 0;
+ }
+ } else {
+ if (bits(srcElem1, sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - shiftAmt)) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode)
+ threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode)
+
+ vqshlSCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ // Make sure the right shift sign extended when it should.
+ if (srcElem1 < 0 && destElem >= 0) {
+ destElem |= -((Element)1 << (sizeof(Element) * 8 -
+ 1 - shiftAmt));
+ }
+ } else if (shiftAmt > 0) {
+ bool sat = false;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ if (srcElem1 != 0)
+ sat = true;
+ else
+ destElem = 0;
+ } else {
+ if (bits(srcElem1, sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - 1 - shiftAmt) !=
+ ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
+ sat = true;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ }
+ if (sat) {
+ fpscr.qc = 1;
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode)
+ threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode)
+
+ vqrshlUCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ Element rBit = 0;
+ if (shiftAmt <= sizeof(Element) * 8)
+ rBit = bits(srcElem1, shiftAmt - 1);
+ if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
+ rBit = 1;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ // Make sure the right shift sign extended when it should.
+ if (srcElem1 < 0 && destElem >= 0) {
+ destElem |= -((Element)1 << (sizeof(Element) * 8 -
+ 1 - shiftAmt));
+ }
+ destElem += rBit;
+ } else {
+ if (shiftAmt >= sizeof(Element) * 8) {
+ if (srcElem1 != 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = 0;
+ }
+ } else {
+ if (bits(srcElem1, sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - shiftAmt)) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ }
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode)
+ threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode)
+
+ vqrshlSCode = '''
+ int16_t shiftAmt = (int8_t)srcElem2;
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (shiftAmt < 0) {
+ shiftAmt = -shiftAmt;
+ Element rBit = 0;
+ if (shiftAmt <= sizeof(Element) * 8)
+ rBit = bits(srcElem1, shiftAmt - 1);
+ if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
+ rBit = 1;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ shiftAmt = sizeof(Element) * 8 - 1;
+ destElem = 0;
+ } else {
+ destElem = (srcElem1 >> shiftAmt);
+ }
+ // Make sure the right shift sign extended when it should.
+ if (srcElem1 < 0 && destElem >= 0) {
+ destElem |= -((Element)1 << (sizeof(Element) * 8 -
+ 1 - shiftAmt));
+ }
+ destElem += rBit;
+ } else if (shiftAmt > 0) {
+ bool sat = false;
+ if (shiftAmt >= sizeof(Element) * 8) {
+ if (srcElem1 != 0)
+ sat = true;
+ else
+ destElem = 0;
+ } else {
+ if (bits(srcElem1, sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - 1 - shiftAmt) !=
+ ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
+ sat = true;
+ } else {
+ destElem = srcElem1 << shiftAmt;
+ }
+ }
+ if (sat) {
+ fpscr.qc = 1;
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode)
+ threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode)
+
+ vabaCode = '''
+ destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
+ (srcElem2 - srcElem1);
+ '''
+ threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True)
+ threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True)
+ vabalCode = '''
+ destElem += (srcElem1 > srcElem2) ?
+ ((BigElement)srcElem1 - (BigElement)srcElem2) :
+ ((BigElement)srcElem2 - (BigElement)srcElem1);
+ '''
+ threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True)
+
+ vabdCode = '''
+ destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
+ (srcElem2 - srcElem1);
+ '''
+ threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode)
+ threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode)
+ vabdlCode = '''
+ destElem = (srcElem1 > srcElem2) ?
+ ((BigElement)srcElem1 - (BigElement)srcElem2) :
+ ((BigElement)srcElem2 - (BigElement)srcElem1);
+ '''
+ threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode)
+
+ vtstCode = '''
+ destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
+ '''
+ threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode)
+ threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode)
+
+ vmulCode = '''
+ destElem = srcElem1 * srcElem2;
+ '''
+ threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode)
+ threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode)
+ vmullCode = '''
+ destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
+ '''
+ threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode)
+
+ vmlaCode = '''
+ destElem = destElem + srcElem1 * srcElem2;
+ '''
+ threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True)
+ threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True)
+ vmlalCode = '''
+ destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
+ '''
+ threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True)
+
+ vqdmlalCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
+ Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
+ Element halfNeg = maxNeg / 2;
+ if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+ midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
+ fpscr.qc = 1;
+ }
+ bool negPreDest = (destElem < 0);
+ destElem += midElem;
+ bool negDest = (destElem < 0);
+ bool negMid = (midElem < 0);
+ if (negPreDest == negMid && negMid != negDest) {
+ destElem = mask(sizeof(BigElement) * 8 - 1);
+ if (negPreDest)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True)
+
+ vqdmlslCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
+ Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
+ Element halfNeg = maxNeg / 2;
+ if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+ midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
+ fpscr.qc = 1;
+ }
+ bool negPreDest = (destElem < 0);
+ destElem -= midElem;
+ bool negDest = (destElem < 0);
+ bool posMid = (midElem > 0);
+ if (negPreDest == posMid && posMid != negDest) {
+ destElem = mask(sizeof(BigElement) * 8 - 1);
+ if (negPreDest)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True)
+
+ vqdmullCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
+ if (srcElem1 == srcElem2 &&
+ srcElem1 == (Element)((Element)1 <<
+ (Element)(sizeof(Element) * 8 - 1))) {
+ destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
+ fpscr.qc = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode)
+
+ vmlsCode = '''
+ destElem = destElem - srcElem1 * srcElem2;
+ '''
+ threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True)
+ threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True)
+ vmlslCode = '''
+ destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
+ '''
+ threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True)
+
+ vmulpCode = '''
+ destElem = 0;
+ for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
+ if (bits(srcElem2, j))
+ destElem ^= srcElem1 << j;
+ }
+ '''
+ threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode)
+ threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode)
+ vmullpCode = '''
+ destElem = 0;
+ for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
+ if (bits(srcElem2, j))
+ destElem ^= (BigElement)srcElem1 << j;
+ }
+ '''
+ threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode)
+
+ threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True)
+ threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True)
+
+ threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True)
+ threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True)
+
+ vqdmulhCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
+ (sizeof(Element) * 8);
+ if (srcElem1 == srcElem2 &&
+ srcElem1 == (Element)((Element)1 <<
+ (sizeof(Element) * 8 - 1))) {
+ destElem = ~srcElem1;
+ fpscr.qc = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode)
+ threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode)
+
+ vqrdmulhCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
+ ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
+ (sizeof(Element) * 8);
+ Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
+ Element halfNeg = maxNeg / 2;
+ if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+ if (destElem < 0) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ } else {
+ destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+ }
+ fpscr.qc = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInst("vqrdmulh", "VqrdmulhD",
+ smallSignedTypes, 2, vqrdmulhCode)
+ threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
+ smallSignedTypes, 4, vqrdmulhCode)
+
+ vmaxfpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ bool done;
+ destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
+ if (!done) {
+ destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
+ true, true, VfpRoundNearest);
+ } else if (flushToZero(srcReg1, srcReg2)) {
+ fpscr.idc = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode)
+ threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode)
+
+ vminfpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ bool done;
+ destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
+ if (!done) {
+ destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
+ true, true, VfpRoundNearest);
+ } else if (flushToZero(srcReg1, srcReg2)) {
+ fpscr.idc = 1;
+ }
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode)
+ threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode)
+
+ threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",),
+ 2, vmaxfpCode, pairwise=True)
+ threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",),
+ 4, vmaxfpCode, pairwise=True)
+
+ threeEqualRegInstFp("vpmin", "VpminDFp", ("float",),
+ 2, vminfpCode, pairwise=True)
+ threeEqualRegInstFp("vpmin", "VpminQFp", ("float",),
+ 4, vminfpCode, pairwise=True)
+
+ vaddfpCode = '''
+ FPSCR fpscr = Fpscr;
+ destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
+ true, true, VfpRoundNearest);
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode)
+ threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode)
+
+ threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",),
+ 2, vaddfpCode, pairwise=True)
+ threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",),
+ 4, vaddfpCode, pairwise=True)
+
+ vsubfpCode = '''
+ FPSCR fpscr = Fpscr;
+ destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
+ true, true, VfpRoundNearest);
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode)
+ threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode)
+
+ vmulfpCode = '''
+ FPSCR fpscr = Fpscr;
+ destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
+ true, true, VfpRoundNearest);
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode)
+ threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode)
+
+ vmlafpCode = '''
+ FPSCR fpscr = Fpscr;
+ float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
+ true, true, VfpRoundNearest);
+ destReg = binaryOp(fpscr, mid, destReg, fpAddS,
+ true, true, VfpRoundNearest);
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True)
+ threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True)
+
+ vmlsfpCode = '''
+ FPSCR fpscr = Fpscr;
+ float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
+ true, true, VfpRoundNearest);
+ destReg = binaryOp(fpscr, destReg, mid, fpSubS,
+ true, true, VfpRoundNearest);
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True)
+ threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True)
+
+ vcgtfpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
+ true, true, VfpRoundNearest);
+ destReg = (res == 0) ? -1 : 0;
+ if (res == 2.0)
+ fpscr.ioc = 1;
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",),
+ 2, vcgtfpCode, toInt = True)
+ threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",),
+ 4, vcgtfpCode, toInt = True)
+
+ vcgefpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
+ true, true, VfpRoundNearest);
+ destReg = (res == 0) ? -1 : 0;
+ if (res == 2.0)
+ fpscr.ioc = 1;
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vcge", "VcgeDFp", ("float",),
+ 2, vcgefpCode, toInt = True)
+ threeEqualRegInstFp("vcge", "VcgeQFp", ("float",),
+ 4, vcgefpCode, toInt = True)
+
+ vacgtfpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
+ true, true, VfpRoundNearest);
+ destReg = (res == 0) ? -1 : 0;
+ if (res == 2.0)
+ fpscr.ioc = 1;
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",),
+ 2, vacgtfpCode, toInt = True)
+ threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",),
+ 4, vacgtfpCode, toInt = True)
+
+ vacgefpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
+ true, true, VfpRoundNearest);
+ destReg = (res == 0) ? -1 : 0;
+ if (res == 2.0)
+ fpscr.ioc = 1;
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vacge", "VacgeDFp", ("float",),
+ 2, vacgefpCode, toInt = True)
+ threeEqualRegInstFp("vacge", "VacgeQFp", ("float",),
+ 4, vacgefpCode, toInt = True)
+
+ vceqfpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
+ true, true, VfpRoundNearest);
+ destReg = (res == 0) ? -1 : 0;
+ if (res == 2.0)
+ fpscr.ioc = 1;
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vceq", "VceqDFp", ("float",),
+ 2, vceqfpCode, toInt = True)
+ threeEqualRegInstFp("vceq", "VceqQFp", ("float",),
+ 4, vceqfpCode, toInt = True)
+
+ vrecpsCode = '''
+ FPSCR fpscr = Fpscr;
+ destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
+ true, true, VfpRoundNearest);
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode)
+ threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode)
+
+ vrsqrtsCode = '''
+ FPSCR fpscr = Fpscr;
+ destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
+ true, true, VfpRoundNearest);
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode)
+ threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode)
+
+ vabdfpCode = '''
+ FPSCR fpscr = Fpscr;
+ float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
+ true, true, VfpRoundNearest);
+ destReg = fabs(mid);
+ Fpscr = fpscr;
+ '''
+ threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode)
+ threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode)
+
+ twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True)
+ twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True)
+ twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True)
+ twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True)
+ twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True)
+
+ twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True)
+ twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True)
+ twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True)
+ twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True)
+ twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True)
+
+ twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode)
+ twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode)
+ twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode)
+ twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode)
+ twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode)
+
+ twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode)
+ twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True)
+ twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True)
+ twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode)
+ twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode)
+ twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
+ smallSignedTypes, 2, vqrdmulhCode)
+ twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
+ smallSignedTypes, 4, vqrdmulhCode)
+
+ vshrCode = '''
+ if (imm >= sizeof(srcElem1) * 8) {
+ if (srcElem1 < 0)
+ destElem = -1;
+ else
+ destElem = 0;
+ } else {
+ destElem = srcElem1 >> imm;
+ }
+ '''
+ twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode)
+ twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode)
+
+ vsraCode = '''
+ Element mid;;
+ if (imm >= sizeof(srcElem1) * 8) {
+ mid = (srcElem1 < 0) ? -1 : 0;
+ } else {
+ mid = srcElem1 >> imm;
+ if (srcElem1 < 0 && mid >= 0) {
+ mid |= -(mid & ((Element)1 <<
+ (sizeof(Element) * 8 - 1 - imm)));
+ }
+ }
+ destElem += mid;
+ '''
+ twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True)
+ twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True)
+
+ vrshrCode = '''
+ if (imm > sizeof(srcElem1) * 8) {
+ destElem = 0;
+ } else if (imm) {
+ Element rBit = bits(srcElem1, imm - 1);
+ destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
+ } else {
+ destElem = srcElem1;
+ }
+ '''
+ twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode)
+ twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode)
+
+ vrsraCode = '''
+ if (imm > sizeof(srcElem1) * 8) {
+ destElem += 0;
+ } else if (imm) {
+ Element rBit = bits(srcElem1, imm - 1);
+ destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
+ } else {
+ destElem += srcElem1;
+ }
+ '''
+ twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True)
+ twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True)
+
+ vsriCode = '''
+ if (imm >= sizeof(Element) * 8)
+ destElem = destElem;
+ else
+ destElem = (srcElem1 >> imm) |
+ (destElem & ~mask(sizeof(Element) * 8 - imm));
+ '''
+ twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True)
+ twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True)
+
+ vshlCode = '''
+ if (imm >= sizeof(Element) * 8)
+ destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
+ else
+ destElem = srcElem1 << imm;
+ '''
+ twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode)
+ twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode)
+
+ vsliCode = '''
+ if (imm >= sizeof(Element) * 8)
+ destElem = destElem;
+ else
+ destElem = (srcElem1 << imm) | (destElem & mask(imm));
+ '''
+ twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True)
+ twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True)
+
+ vqshlCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (imm >= sizeof(Element) * 8) {
+ if (srcElem1 != 0) {
+ destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+ if (srcElem1 > 0)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ } else {
+ destElem = 0;
+ }
+ } else if (imm) {
+ destElem = (srcElem1 << imm);
+ uint64_t topBits = bits((uint64_t)srcElem1,
+ sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - 1 - imm);
+ if (topBits != 0 && topBits != mask(imm + 1)) {
+ destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+ if (srcElem1 > 0)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode)
+ twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode)
+
+ vqshluCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (imm >= sizeof(Element) * 8) {
+ if (srcElem1 != 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = 0;
+ }
+ } else if (imm) {
+ destElem = (srcElem1 << imm);
+ uint64_t topBits = bits((uint64_t)srcElem1,
+ sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - imm);
+ if (topBits != 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode)
+ twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode)
+
+ vqshlusCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (imm >= sizeof(Element) * 8) {
+ if (srcElem1 < 0) {
+ destElem = 0;
+ fpscr.qc = 1;
+ } else if (srcElem1 > 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = 0;
+ }
+ } else if (imm) {
+ destElem = (srcElem1 << imm);
+ uint64_t topBits = bits((uint64_t)srcElem1,
+ sizeof(Element) * 8 - 1,
+ sizeof(Element) * 8 - imm);
+ if (srcElem1 < 0) {
+ destElem = 0;
+ fpscr.qc = 1;
+ } else if (topBits != 0) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ }
+ } else {
+ if (srcElem1 < 0) {
+ fpscr.qc = 1;
+ destElem = 0;
+ } else {
+ destElem = srcElem1;
+ }
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode)
+ twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode)
+
+ vshrnCode = '''
+ if (imm >= sizeof(srcElem1) * 8) {
+ destElem = 0;
+ } else {
+ destElem = srcElem1 >> imm;
+ }
+ '''
+ twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode)
+
+ vrshrnCode = '''
+ if (imm > sizeof(srcElem1) * 8) {
+ destElem = 0;
+ } else if (imm) {
+ Element rBit = bits(srcElem1, imm - 1);
+ destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
+ } else {
+ destElem = srcElem1;
+ }
+ '''
+ twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode)
+
+ vqshrnCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0 && srcElem1 != -1)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
+ mid |= -(mid & ((BigElement)1 <<
+ (sizeof(BigElement) * 8 - 1 - imm)));
+ if (mid != (Element)mid) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode)
+
+ vqshrunCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
+ if (mid != (Element)mid) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegNarrowShiftInst("vqshrun", "NVqshrun",
+ smallUnsignedTypes, vqshrunCode)
+
+ vqshrunsCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
+ if (bits(mid, sizeof(BigElement) * 8 - 1,
+ sizeof(Element) * 8) != 0) {
+ if (srcElem1 < 0) {
+ destElem = 0;
+ } else {
+ destElem = mask(sizeof(Element) * 8);
+ }
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ destElem = srcElem1;
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegNarrowShiftInst("vqshrun", "NVqshruns",
+ smallSignedTypes, vqshrunsCode)
+
+ vqrshrnCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0 && srcElem1 != -1)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = (srcElem1 >> (imm - 1));
+ uint64_t rBit = mid & 0x1;
+ mid >>= 1;
+ mid |= -(mid & ((BigElement)1 <<
+ (sizeof(BigElement) * 8 - 1 - imm)));
+ mid += rBit;
+ if (mid != (Element)mid) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ if (srcElem1 != (Element)srcElem1) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ } else {
+ destElem = srcElem1;
+ }
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
+ smallSignedTypes, vqrshrnCode)
+
+ vqrshrunCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = (srcElem1 >> (imm - 1));
+ uint64_t rBit = mid & 0x1;
+ mid >>= 1;
+ mid += rBit;
+ if (mid != (Element)mid) {
+ destElem = mask(sizeof(Element) * 8);
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ if (srcElem1 != (Element)srcElem1) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ fpscr.qc = 1;
+ } else {
+ destElem = srcElem1;
+ }
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
+ smallUnsignedTypes, vqrshrunCode)
+
+ vqrshrunsCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (imm > sizeof(srcElem1) * 8) {
+ if (srcElem1 != 0)
+ fpscr.qc = 1;
+ destElem = 0;
+ } else if (imm) {
+ BigElement mid = (srcElem1 >> (imm - 1));
+ uint64_t rBit = mid & 0x1;
+ mid >>= 1;
+ mid |= -(mid & ((BigElement)1 <<
+ (sizeof(BigElement) * 8 - 1 - imm)));
+ mid += rBit;
+ if (bits(mid, sizeof(BigElement) * 8 - 1,
+ sizeof(Element) * 8) != 0) {
+ if (srcElem1 < 0) {
+ destElem = 0;
+ } else {
+ destElem = mask(sizeof(Element) * 8);
+ }
+ fpscr.qc = 1;
+ } else {
+ destElem = mid;
+ }
+ } else {
+ if (srcElem1 < 0) {
+ fpscr.qc = 1;
+ destElem = 0;
+ } else {
+ destElem = srcElem1;
+ }
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
+ smallSignedTypes, vqrshrunsCode)
+
+ vshllCode = '''
+ if (imm >= sizeof(destElem) * 8) {
+ destElem = 0;
+ } else {
+ destElem = (BigElement)srcElem1 << imm;
+ }
+ '''
+ twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode)
+
+ vmovlCode = '''
+ destElem = srcElem1;
+ '''
+ twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode)
+
+ vcvt2ufxCode = '''
+ FPSCR fpscr = Fpscr;
+ if (flushToZero(srcElem1))
+ fpscr.idc = 1;
+ VfpSavedState state = prepFpState(VfpRoundNearest);
+ __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
+ destReg = vfpFpSToFixed(srcElem1, false, false, imm);
+ __asm__ __volatile__("" :: "m" (destReg));
+ finishVfp(fpscr, state, true);
+ Fpscr = fpscr;
+ '''
+ twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",),
+ 2, vcvt2ufxCode, toInt = True)
+ twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",),
+ 4, vcvt2ufxCode, toInt = True)
+
+ vcvt2sfxCode = '''
+ FPSCR fpscr = Fpscr;
+ if (flushToZero(srcElem1))
+ fpscr.idc = 1;
+ VfpSavedState state = prepFpState(VfpRoundNearest);
+ __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
+ destReg = vfpFpSToFixed(srcElem1, true, false, imm);
+ __asm__ __volatile__("" :: "m" (destReg));
+ finishVfp(fpscr, state, true);
+ Fpscr = fpscr;
+ '''
+ twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",),
+ 2, vcvt2sfxCode, toInt = True)
+ twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",),
+ 4, vcvt2sfxCode, toInt = True)
+
+ vcvtu2fpCode = '''
+ FPSCR fpscr = Fpscr;
+ VfpSavedState state = prepFpState(VfpRoundNearest);
+ __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
+ destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
+ __asm__ __volatile__("" :: "m" (destElem));
+ finishVfp(fpscr, state, true);
+ Fpscr = fpscr;
+ '''
+ twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",),
+ 2, vcvtu2fpCode, fromInt = True)
+ twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",),
+ 4, vcvtu2fpCode, fromInt = True)
+
+ vcvts2fpCode = '''
+ FPSCR fpscr = Fpscr;
+ VfpSavedState state = prepFpState(VfpRoundNearest);
+ __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
+ destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
+ __asm__ __volatile__("" :: "m" (destElem));
+ finishVfp(fpscr, state, true);
+ Fpscr = fpscr;
+ '''
+ twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",),
+ 2, vcvts2fpCode, fromInt = True)
+ twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",),
+ 4, vcvts2fpCode, fromInt = True)
+
+ vcvts2hCode = '''
+ FPSCR fpscr = Fpscr;
+ float srcFp1 = bitsToFp(srcElem1, (float)0.0);
+ if (flushToZero(srcFp1))
+ fpscr.idc = 1;
+ VfpSavedState state = prepFpState(VfpRoundNearest);
+ __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
+ : "m" (srcFp1), "m" (destElem));
+ destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
+ fpscr.ahp, srcFp1);
+ __asm__ __volatile__("" :: "m" (destElem));
+ finishVfp(fpscr, state, true);
+ Fpscr = fpscr;
+ '''
+ twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode)
+
+ vcvth2sCode = '''
+ FPSCR fpscr = Fpscr;
+ VfpSavedState state = prepFpState(VfpRoundNearest);
+ __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
+ : "m" (srcElem1), "m" (destElem));
+ destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
+ __asm__ __volatile__("" :: "m" (destElem));
+ finishVfp(fpscr, state, true);
+ Fpscr = fpscr;
+ '''
+ twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode)
+
+ vrsqrteCode = '''
+ destElem = unsignedRSqrtEstimate(srcElem1);
+ '''
+ twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode)
+ twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode)
+
+ vrsqrtefpCode = '''
+ FPSCR fpscr = Fpscr;
+ if (flushToZero(srcReg1))
+ fpscr.idc = 1;
+ destReg = fprSqrtEstimate(fpscr, srcReg1);
+ Fpscr = fpscr;
+ '''
+ twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode)
+ twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode)
+
+ vrecpeCode = '''
+ destElem = unsignedRecipEstimate(srcElem1);
+ '''
+ twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode)
+ twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode)
+
+ vrecpefpCode = '''
+ FPSCR fpscr = Fpscr;
+ if (flushToZero(srcReg1))
+ fpscr.idc = 1;
+ destReg = fpRecipEstimate(fpscr, srcReg1);
+ Fpscr = fpscr;
+ '''
+ twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode)
+ twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode)
+
+ vrev16Code = '''
+ destElem = srcElem1;
+ unsigned groupSize = ((1 << 1) / sizeof(Element));
+ unsigned reverseMask = (groupSize - 1);
+ j = i ^ reverseMask;
+ '''
+ twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code)
+ twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code)
+ vrev32Code = '''
+ destElem = srcElem1;
+ unsigned groupSize = ((1 << 2) / sizeof(Element));
+ unsigned reverseMask = (groupSize - 1);
+ j = i ^ reverseMask;
+ '''
+ twoRegMiscInst("vrev32", "NVrev32D",
+ ("uint8_t", "uint16_t"), 2, vrev32Code)
+ twoRegMiscInst("vrev32", "NVrev32Q",
+ ("uint8_t", "uint16_t"), 4, vrev32Code)
+ vrev64Code = '''
+ destElem = srcElem1;
+ unsigned groupSize = ((1 << 3) / sizeof(Element));
+ unsigned reverseMask = (groupSize - 1);
+ j = i ^ reverseMask;
+ '''
+ twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code)
+ twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code)
+
+ vpaddlCode = '''
+ destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
+ '''
+ twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode)
+ twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode)
+
+ vpadalCode = '''
+ destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
+ '''
+ twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True)
+ twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True)
+
+ vclsCode = '''
+ unsigned count = 0;
+ if (srcElem1 < 0) {
+ srcElem1 <<= 1;
+ while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
+ count++;
+ srcElem1 <<= 1;
+ }
+ } else {
+ srcElem1 <<= 1;
+ while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
+ count++;
+ srcElem1 <<= 1;
+ }
+ }
+ destElem = count;
+ '''
+ twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode)
+ twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode)
+
+ vclzCode = '''
+ unsigned count = 0;
+ while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
+ count++;
+ srcElem1 <<= 1;
+ }
+ destElem = count;
+ '''
+ twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode)
+ twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode)
+
+ vcntCode = '''
+ unsigned count = 0;
+ while (srcElem1 && count < sizeof(Element) * 8) {
+ count += srcElem1 & 0x1;
+ srcElem1 >>= 1;
+ }
+ destElem = count;
+ '''
+ twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode)
+ twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode)
+
+ vmvnCode = '''
+ destElem = ~srcElem1;
+ '''
+ twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode)
+ twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode)
+
+ vqabsCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
+ fpscr.qc = 1;
+ destElem = ~srcElem1;
+ } else if (srcElem1 < 0) {
+ destElem = -srcElem1;
+ } else {
+ destElem = srcElem1;
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode)
+ twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode)
+
+ vqnegCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
+ fpscr.qc = 1;
+ destElem = ~srcElem1;
+ } else {
+ destElem = -srcElem1;
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode)
+ twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode)
+
+ vabsCode = '''
+ if (srcElem1 < 0) {
+ destElem = -srcElem1;
+ } else {
+ destElem = srcElem1;
+ }
+ '''
+ twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode)
+ twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode)
+ vabsfpCode = '''
+ union
+ {
+ uint32_t i;
+ float f;
+ } cStruct;
+ cStruct.f = srcReg1;
+ cStruct.i &= mask(sizeof(Element) * 8 - 1);
+ destReg = cStruct.f;
+ '''
+ twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode)
+ twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode)
+
+ vnegCode = '''
+ destElem = -srcElem1;
+ '''
+ twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode)
+ twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode)
+ vnegfpCode = '''
+ destReg = -srcReg1;
+ '''
+ twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode)
+ twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode)
+
+ vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
+ twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode)
+ twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode)
+ vcgtfpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
+ true, true, VfpRoundNearest);
+ destReg = (res == 0) ? -1 : 0;
+ if (res == 2.0)
+ fpscr.ioc = 1;
+ Fpscr = fpscr;
+ '''
+ twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",),
+ 2, vcgtfpCode, toInt = True)
+ twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",),
+ 4, vcgtfpCode, toInt = True)
+
+ vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
+ twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode)
+ twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode)
+ vcgefpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
+ true, true, VfpRoundNearest);
+ destReg = (res == 0) ? -1 : 0;
+ if (res == 2.0)
+ fpscr.ioc = 1;
+ Fpscr = fpscr;
+ '''
+ twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",),
+ 2, vcgefpCode, toInt = True)
+ twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",),
+ 4, vcgefpCode, toInt = True)
+
+ vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
+ twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode)
+ twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode)
+ vceqfpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
+ true, true, VfpRoundNearest);
+ destReg = (res == 0) ? -1 : 0;
+ if (res == 2.0)
+ fpscr.ioc = 1;
+ Fpscr = fpscr;
+ '''
+ twoRegMiscInstFp("vceq", "NVceqDFp", ("float",),
+ 2, vceqfpCode, toInt = True)
+ twoRegMiscInstFp("vceq", "NVceqQFp", ("float",),
+ 4, vceqfpCode, toInt = True)
+
+ vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
+ twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode)
+ twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode)
+ vclefpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
+ true, true, VfpRoundNearest);
+ destReg = (res == 0) ? -1 : 0;
+ if (res == 2.0)
+ fpscr.ioc = 1;
+ Fpscr = fpscr;
+ '''
+ twoRegMiscInstFp("vcle", "NVcleDFp", ("float",),
+ 2, vclefpCode, toInt = True)
+ twoRegMiscInstFp("vcle", "NVcleQFp", ("float",),
+ 4, vclefpCode, toInt = True)
+
+ vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
+ twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode)
+ twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode)
+ vcltfpCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
+ true, true, VfpRoundNearest);
+ destReg = (res == 0) ? -1 : 0;
+ if (res == 2.0)
+ fpscr.ioc = 1;
+ Fpscr = fpscr;
+ '''
+ twoRegMiscInstFp("vclt", "NVcltDFp", ("float",),
+ 2, vcltfpCode, toInt = True)
+ twoRegMiscInstFp("vclt", "NVcltQFp", ("float",),
+ 4, vcltfpCode, toInt = True)
+
+ vswpCode = '''
+ FloatRegBits mid;
+ for (unsigned r = 0; r < rCount; r++) {
+ mid = srcReg1.regs[r];
+ srcReg1.regs[r] = destReg.regs[r];
+ destReg.regs[r] = mid;
+ }
+ '''
+ twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode)
+ twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode)
+
+ vtrnCode = '''
+ Element mid;
+ for (unsigned i = 0; i < eCount; i += 2) {
+ mid = srcReg1.elements[i];
+ srcReg1.elements[i] = destReg.elements[i + 1];
+ destReg.elements[i + 1] = mid;
+ }
+ '''
+ twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode)
+ twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode)
+
+ vuzpCode = '''
+ Element mid[eCount];
+ memcpy(&mid, &srcReg1, sizeof(srcReg1));
+ for (unsigned i = 0; i < eCount / 2; i++) {
+ srcReg1.elements[i] = destReg.elements[2 * i + 1];
+ srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
+ destReg.elements[i] = destReg.elements[2 * i];
+ }
+ for (unsigned i = 0; i < eCount / 2; i++) {
+ destReg.elements[eCount / 2 + i] = mid[2 * i];
+ }
+ '''
+ twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode)
+ twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode)
+
+ vzipCode = '''
+ Element mid[eCount];
+ memcpy(&mid, &destReg, sizeof(destReg));
+ for (unsigned i = 0; i < eCount / 2; i++) {
+ destReg.elements[2 * i] = mid[i];
+ destReg.elements[2 * i + 1] = srcReg1.elements[i];
+ }
+ for (int i = 0; i < eCount / 2; i++) {
+ srcReg1.elements[2 * i] = mid[eCount / 2 + i];
+ srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
+ }
+ '''
+ twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode)
+ twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode)
+
+ vmovnCode = 'destElem = srcElem1;'
+ twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode)
+
+ vdupCode = 'destElem = srcElem1;'
+ twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode)
+ twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode)
+
+ def vdupGprInst(name, Name, types, rCount):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect destReg;
+ for (unsigned i = 0; i < eCount; i++) {
+ destReg.elements[i] = htog((Element)Op1);
+ }
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+ vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2)
+ vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4)
+
+ vmovCode = 'destElem = imm;'
+ oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode)
+ oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode)
+
+ vorrCode = 'destElem |= imm;'
+ oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True)
+ oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True)
+
+ vmvnCode = 'destElem = ~imm;'
+ oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode)
+ oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode)
+
+ vbicCode = 'destElem &= ~imm;'
+ oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True)
+ oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True)
+
+ vqmovnCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ destElem = srcElem1;
+ if ((BigElement)destElem != srcElem1) {
+ fpscr.qc = 1;
+ destElem = mask(sizeof(Element) * 8 - 1);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode)
+
+ vqmovunCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ destElem = srcElem1;
+ if ((BigElement)destElem != srcElem1) {
+ fpscr.qc = 1;
+ destElem = mask(sizeof(Element) * 8);
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegNarrowMiscInst("vqmovun", "NVqmovun",
+ smallUnsignedTypes, vqmovunCode)
+
+ vqmovunsCode = '''
+ FPSCR fpscr = (FPSCR)Fpscr;
+ destElem = srcElem1;
+ if (srcElem1 < 0 ||
+ ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
+ fpscr.qc = 1;
+ destElem = mask(sizeof(Element) * 8);
+ if (srcElem1 < 0)
+ destElem = ~destElem;
+ }
+ Fpscr = fpscr;
+ '''
+ twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
+ smallSignedTypes, vqmovunsCode)
+
+ def buildVext(name, Name, types, rCount, op):
+ global header_output, exec_output
+ eWalkCode = '''
+ RegVect srcReg1, srcReg2, destReg;
+ '''
+ for reg in range(rCount):
+ eWalkCode += '''
+ srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
+ srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
+ ''' % { "reg" : reg }
+ eWalkCode += op
+ for reg in range(rCount):
+ eWalkCode += '''
+ FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
+ ''' % { "reg" : reg }
+ iop = InstObjParams(name, Name,
+ "RegRegRegImmOp",
+ { "code": eWalkCode,
+ "r_count": rCount,
+ "predicate_test": predicateTest }, [])
+ header_output += NeonRegRegRegImmOpDeclare.subst(iop)
+ exec_output += NeonEqualRegExecute.subst(iop)
+ for type in types:
+ substDict = { "targs" : type,
+ "class_name" : Name }
+ exec_output += NeonExecDeclare.subst(substDict)
+
+ vextCode = '''
+ for (unsigned i = 0; i < eCount; i++) {
+ unsigned index = i + imm;
+ if (index < eCount) {
+ destReg.elements[i] = srcReg1.elements[index];
+ } else {
+ index -= eCount;
+ assert(index < eCount);
+ destReg.elements[i] = srcReg2.elements[index];
+ }
+ }
+ '''
+ buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode)
+ buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode)
+
+ def buildVtbxl(name, Name, length, isVtbl):
+ global header_output, decoder_output, exec_output
+ code = '''
+ union
+ {
+ uint8_t bytes[32];
+ FloatRegBits regs[8];
+ } table;
+
+ union
+ {
+ uint8_t bytes[8];
+ FloatRegBits regs[2];
+ } destReg, srcReg2;
+
+ const unsigned length = %(length)d;
+ const bool isVtbl = %(isVtbl)s;
+
+ srcReg2.regs[0] = htog(FpOp2P0.uw);
+ srcReg2.regs[1] = htog(FpOp2P1.uw);
+
+ destReg.regs[0] = htog(FpDestP0.uw);
+ destReg.regs[1] = htog(FpDestP1.uw);
+ ''' % { "length" : length, "isVtbl" : isVtbl }
+ for reg in range(8):
+ if reg < length * 2:
+ code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
+ { "reg" : reg }
+ else:
+ code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
+ code += '''
+ for (unsigned i = 0; i < sizeof(destReg); i++) {
+ uint8_t index = srcReg2.bytes[i];
+ if (index < 8 * length) {
+ destReg.bytes[i] = table.bytes[index];
+ } else {
+ if (isVtbl)
+ destReg.bytes[i] = 0;
+ // else destReg.bytes[i] unchanged
+ }
+ }
+
+ FpDestP0.uw = gtoh(destReg.regs[0]);
+ FpDestP1.uw = gtoh(destReg.regs[1]);
+ '''
+ iop = InstObjParams(name, Name,
+ "RegRegRegOp",
+ { "code": code,
+ "predicate_test": predicateTest }, [])
+ header_output += RegRegRegOpDeclare.subst(iop)
+ decoder_output += RegRegRegOpConstructor.subst(iop)
+ exec_output += PredOpExecute.subst(iop)
+
+ buildVtbxl("vtbl", "NVtbl1", 1, "true")
+ buildVtbxl("vtbl", "NVtbl2", 2, "true")
+ buildVtbxl("vtbl", "NVtbl3", 3, "true")
+ buildVtbxl("vtbl", "NVtbl4", 4, "true")
+
+ buildVtbxl("vtbx", "NVtbx1", 1, "false")
+ buildVtbxl("vtbx", "NVtbx2", 2, "false")
+ buildVtbxl("vtbx", "NVtbx3", 3, "false")
+ buildVtbxl("vtbx", "NVtbx4", 4, "false")
+}};
diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa
index a086bb03c..5490a28e0 100644
--- a/src/arch/arm/isa/operands.isa
+++ b/src/arch/arm/isa/operands.isa
@@ -47,6 +47,7 @@ def operand_types {{
'sw' : ('signed int', 32),
'uw' : ('unsigned int', 32),
'ud' : ('unsigned int', 64),
+ 'tud' : ('twin64 int', 64),
'sf' : ('float', 32),
'df' : ('float', 64)
}};
@@ -96,6 +97,18 @@ def operands {{
'FpDestP1': ('FloatReg', 'sf', '(dest + 1)', 'IsFloating', 2),
'FpDestP2': ('FloatReg', 'sf', '(dest + 2)', 'IsFloating', 2),
'FpDestP3': ('FloatReg', 'sf', '(dest + 3)', 'IsFloating', 2),
+ 'FpDestP4': ('FloatReg', 'sf', '(dest + 4)', 'IsFloating', 2),
+ 'FpDestP5': ('FloatReg', 'sf', '(dest + 5)', 'IsFloating', 2),
+ 'FpDestP6': ('FloatReg', 'sf', '(dest + 6)', 'IsFloating', 2),
+ 'FpDestP7': ('FloatReg', 'sf', '(dest + 7)', 'IsFloating', 2),
+ 'FpDestS0P0': ('FloatReg', 'sf', '(dest + step * 0 + 0)', 'IsFloating', 2),
+ 'FpDestS0P1': ('FloatReg', 'sf', '(dest + step * 0 + 1)', 'IsFloating', 2),
+ 'FpDestS1P0': ('FloatReg', 'sf', '(dest + step * 1 + 0)', 'IsFloating', 2),
+ 'FpDestS1P1': ('FloatReg', 'sf', '(dest + step * 1 + 1)', 'IsFloating', 2),
+ 'FpDestS2P0': ('FloatReg', 'sf', '(dest + step * 2 + 0)', 'IsFloating', 2),
+ 'FpDestS2P1': ('FloatReg', 'sf', '(dest + step * 2 + 1)', 'IsFloating', 2),
+ 'FpDestS3P0': ('FloatReg', 'sf', '(dest + step * 3 + 0)', 'IsFloating', 2),
+ 'FpDestS3P1': ('FloatReg', 'sf', '(dest + step * 3 + 1)', 'IsFloating', 2),
'Result': ('IntReg', 'uw', 'result', 'IsInteger', 2,
maybePCRead, maybePCWrite),
'Dest2': ('IntReg', 'uw', 'dest2', 'IsInteger', 2,
@@ -124,6 +137,18 @@ def operands {{
'FpOp1P1': ('FloatReg', 'sf', '(op1 + 1)', 'IsFloating', 2),
'FpOp1P2': ('FloatReg', 'sf', '(op1 + 2)', 'IsFloating', 2),
'FpOp1P3': ('FloatReg', 'sf', '(op1 + 3)', 'IsFloating', 2),
+ 'FpOp1P4': ('FloatReg', 'sf', '(op1 + 4)', 'IsFloating', 2),
+ 'FpOp1P5': ('FloatReg', 'sf', '(op1 + 5)', 'IsFloating', 2),
+ 'FpOp1P6': ('FloatReg', 'sf', '(op1 + 6)', 'IsFloating', 2),
+ 'FpOp1P7': ('FloatReg', 'sf', '(op1 + 7)', 'IsFloating', 2),
+ 'FpOp1S0P0': ('FloatReg', 'sf', '(op1 + step * 0 + 0)', 'IsFloating', 2),
+ 'FpOp1S0P1': ('FloatReg', 'sf', '(op1 + step * 0 + 1)', 'IsFloating', 2),
+ 'FpOp1S1P0': ('FloatReg', 'sf', '(op1 + step * 1 + 0)', 'IsFloating', 2),
+ 'FpOp1S1P1': ('FloatReg', 'sf', '(op1 + step * 1 + 1)', 'IsFloating', 2),
+ 'FpOp1S2P0': ('FloatReg', 'sf', '(op1 + step * 2 + 0)', 'IsFloating', 2),
+ 'FpOp1S2P1': ('FloatReg', 'sf', '(op1 + step * 2 + 1)', 'IsFloating', 2),
+ 'FpOp1S3P0': ('FloatReg', 'sf', '(op1 + step * 3 + 0)', 'IsFloating', 2),
+ 'FpOp1S3P1': ('FloatReg', 'sf', '(op1 + step * 3 + 1)', 'IsFloating', 2),
'MiscOp1': ('ControlReg', 'uw', 'op1', (None, None, 'IsControl'), 2),
'Op2': ('IntReg', 'uw', 'op2', 'IsInteger', 2,
maybePCRead, maybePCWrite),
@@ -164,6 +189,7 @@ def operands {{
maybePCRead, maybeIWPCWrite),
'Fa' : ('FloatReg', 'sf', 'ura', 'IsFloating', 2),
'Rb' : ('IntReg', 'uw', 'urb', 'IsInteger', 2, maybePCRead, maybePCWrite),
+ 'Rc' : ('IntReg', 'uw', 'urc', 'IsInteger', 2, maybePCRead, maybePCWrite),
#General Purpose Floating Point Reg Operands
'Fd': ('FloatReg', 'df', 'FD', 'IsFloating', 2),
diff --git a/src/arch/arm/isa/templates/macromem.isa b/src/arch/arm/isa/templates/macromem.isa
index 400342a29..5397a2637 100644
--- a/src/arch/arm/isa/templates/macromem.isa
+++ b/src/arch/arm/isa/templates/macromem.isa
@@ -74,7 +74,32 @@ def template MicroMemConstructor {{
////////////////////////////////////////////////////////////////////
//
-// Integer = Integer op Immediate microops
+// Neon load/store microops
+//
+
+def template MicroNeonMemDeclare {{
+ template <class Element>
+ class %(class_name)s : public %(base_class)s
+ {
+ public:
+ %(class_name)s(ExtMachInst machInst, RegIndex _dest,
+ RegIndex _ura, uint32_t _imm, unsigned extraMemFlags)
+ : %(base_class)s("%(mnemonic)s", machInst,
+ %(op_class)s, _dest, _ura, _imm)
+ {
+ memAccessFlags |= extraMemFlags;
+ %(constructor)s;
+ }
+
+ %(BasicExecDeclare)s
+ %(InitiateAccDeclare)s
+ %(CompleteAccDeclare)s
+ };
+}};
+
+////////////////////////////////////////////////////////////////////
+//
+// Integer = Integer op Integer microops
//
def template MicroIntDeclare {{
@@ -82,13 +107,130 @@ def template MicroIntDeclare {{
{
public:
%(class_name)s(ExtMachInst machInst,
+ RegIndex _ura, RegIndex _urb, RegIndex _urc);
+ %(BasicExecDeclare)s
+ };
+}};
+
+def template MicroIntConstructor {{
+ %(class_name)s::%(class_name)s(ExtMachInst machInst,
+ RegIndex _ura,
+ RegIndex _urb,
+ RegIndex _urc)
+ : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+ _ura, _urb, _urc)
+ {
+ %(constructor)s;
+ }
+}};
+
+def template MicroNeonMemExecDeclare {{
+ template
+ Fault %(class_name)s<%(targs)s>::execute(
+ %(CPU_exec_context)s *, Trace::InstRecord *) const;
+ template
+ Fault %(class_name)s<%(targs)s>::initiateAcc(
+ %(CPU_exec_context)s *, Trace::InstRecord *) const;
+ template
+ Fault %(class_name)s<%(targs)s>::completeAcc(PacketPtr,
+ %(CPU_exec_context)s *, Trace::InstRecord *) const;
+}};
+
+def template MicroNeonExecDeclare {{
+ template
+ Fault %(class_name)s<%(targs)s>::execute(
+ %(CPU_exec_context)s *, Trace::InstRecord *) const;
+}};
+
+////////////////////////////////////////////////////////////////////
+//
+// Neon (de)interlacing microops
+//
+
+def template MicroNeonMixDeclare {{
+ template <class Element>
+ class %(class_name)s : public %(base_class)s
+ {
+ public:
+ %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1,
+ uint8_t _step) :
+ %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+ _dest, _op1, _step)
+ {
+ %(constructor)s;
+ }
+
+ %(BasicExecDeclare)s
+ };
+}};
+
+def template MicroNeonMixExecute {{
+ template <class Element>
+ Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Fault fault = NoFault;
+ uint64_t resTemp = 0;
+ resTemp = resTemp;
+ %(op_decl)s;
+ %(op_rd)s;
+
+ if (%(predicate_test)s)
+ {
+ %(code)s;
+ if (fault == NoFault)
+ {
+ %(op_wb)s;
+ }
+ }
+
+ if (fault == NoFault && machInst.itstateMask != 0) {
+ xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
+ }
+
+ return fault;
+ }
+}};
+
+////////////////////////////////////////////////////////////////////
+//
+// Neon (un)packing microops using a particular lane
+//
+
+def template MicroNeonMixLaneDeclare {{
+ template <class Element>
+ class %(class_name)s : public %(base_class)s
+ {
+ public:
+ %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1,
+ uint8_t _step, unsigned _lane) :
+ %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+ _dest, _op1, _step, _lane)
+ {
+ %(constructor)s;
+ }
+
+ %(BasicExecDeclare)s
+ };
+}};
+
+////////////////////////////////////////////////////////////////////
+//
+// Integer = Integer op Immediate microops
+//
+
+def template MicroIntImmDeclare {{
+ class %(class_name)s : public %(base_class)s
+ {
+ public:
+ %(class_name)s(ExtMachInst machInst,
RegIndex _ura, RegIndex _urb,
uint8_t _imm);
%(BasicExecDeclare)s
};
}};
-def template MicroIntConstructor {{
+def template MicroIntImmConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst,
RegIndex _ura,
RegIndex _urb,
@@ -132,6 +274,52 @@ def template MacroMemConstructor {{
}};
+def template VMemMultDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+ public:
+ // Constructor
+ %(class_name)s(ExtMachInst machInst, unsigned width,
+ RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
+ uint32_t size, uint32_t align, RegIndex rm);
+ %(BasicExecPanic)s
+};
+}};
+
+def template VMemMultConstructor {{
+%(class_name)s::%(class_name)s(ExtMachInst machInst, unsigned width,
+ RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
+ uint32_t size, uint32_t align, RegIndex rm)
+ : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, width,
+ rn, vd, regs, inc, size, align, rm)
+{
+ %(constructor)s;
+}
+}};
+
+def template VMemSingleDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+ public:
+ // Constructor
+ %(class_name)s(ExtMachInst machInst, bool all, unsigned width,
+ RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
+ uint32_t size, uint32_t align, RegIndex rm, unsigned lane = 0);
+ %(BasicExecPanic)s
+};
+}};
+
+def template VMemSingleConstructor {{
+%(class_name)s::%(class_name)s(ExtMachInst machInst, bool all, unsigned width,
+ RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
+ uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
+ : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, all, width,
+ rn, vd, regs, inc, size, align, rm, lane)
+{
+ %(constructor)s;
+}
+}};
+
def template MacroVFPMemDeclare {{
/**
* Static instructions class for a store multiple instruction
diff --git a/src/arch/arm/isa/templates/mem.isa b/src/arch/arm/isa/templates/mem.isa
index 84cd1dd8f..686a8b0aa 100644
--- a/src/arch/arm/isa/templates/mem.isa
+++ b/src/arch/arm/isa/templates/mem.isa
@@ -180,6 +180,42 @@ def template LoadExecute {{
}
}};
+def template NeonLoadExecute {{
+ template <class Element>
+ Fault %(class_name)s<Element>::execute(
+ %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+
+ %(op_decl)s;
+ %(mem_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ MemUnion memUnion;
+ uint8_t *dataPtr = memUnion.bytes;
+
+ if (%(predicate_test)s)
+ {
+ if (fault == NoFault) {
+ fault = xc->readBytes(EA, dataPtr, %(size)d, memAccessFlags);
+ %(memacc_code)s;
+ }
+
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+ }
+
+ if (fault == NoFault && machInst.itstateMask != 0) {
+ xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
+ }
+
+ return fault;
+ }
+}};
+
def template StoreExecute {{
Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
@@ -217,6 +253,46 @@ def template StoreExecute {{
}
}};
+def template NeonStoreExecute {{
+ template <class Element>
+ Fault %(class_name)s<Element>::execute(
+ %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+
+ %(op_decl)s;
+ %(mem_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ MemUnion memUnion;
+ uint8_t *dataPtr = memUnion.bytes;
+
+ if (%(predicate_test)s)
+ {
+ if (fault == NoFault) {
+ %(memacc_code)s;
+ }
+
+ if (fault == NoFault) {
+ fault = xc->writeBytes(dataPtr, %(size)d, EA,
+ memAccessFlags, NULL);
+ }
+
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+ }
+
+ if (fault == NoFault && machInst.itstateMask != 0) {
+ xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
+ }
+
+ return fault;
+ }
+}};
+
def template StoreExExecute {{
Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
@@ -336,6 +412,45 @@ def template StoreInitiateAcc {{
}
}};
+def template NeonStoreInitiateAcc {{
+ template <class Element>
+ Fault %(class_name)s<Element>::initiateAcc(
+ %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+
+ %(op_decl)s;
+ %(mem_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ if (%(predicate_test)s)
+ {
+ MemUnion memUnion;
+ if (fault == NoFault) {
+ %(memacc_code)s;
+ }
+
+ if (fault == NoFault) {
+ fault = xc->writeBytes(memUnion.bytes, %(size)d, EA,
+ memAccessFlags, NULL);
+ }
+
+ // Need to write back any potential address register update
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+ }
+
+ if (fault == NoFault && machInst.itstateMask != 0) {
+ xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
+ }
+
+ return fault;
+ }
+}};
+
def template LoadInitiateAcc {{
Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
@@ -363,6 +478,31 @@ def template LoadInitiateAcc {{
}
}};
+def template NeonLoadInitiateAcc {{
+ template <class Element>
+ Fault %(class_name)s<Element>::initiateAcc(
+ %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+
+ %(op_src_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ if (%(predicate_test)s)
+ {
+ if (fault == NoFault) {
+ fault = xc->readBytes(EA, NULL, %(size)d, memAccessFlags);
+ }
+ } else if (fault == NoFault && machInst.itstateMask != 0) {
+ xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
+ }
+
+ return fault;
+ }
+}};
+
def template LoadCompleteAcc {{
Fault %(class_name)s::completeAcc(PacketPtr pkt,
%(CPU_exec_context)s *xc,
@@ -395,6 +535,40 @@ def template LoadCompleteAcc {{
}
}};
+def template NeonLoadCompleteAcc {{
+ template <class Element>
+ Fault %(class_name)s<Element>::completeAcc(
+ PacketPtr pkt, %(CPU_exec_context)s *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Fault fault = NoFault;
+
+ %(mem_decl)s;
+ %(op_decl)s;
+ %(op_rd)s;
+
+ if (%(predicate_test)s)
+ {
+ // ARM instructions will not have a pkt if the predicate is false
+ MemUnion &memUnion = *(MemUnion *)pkt->getPtr<uint8_t>();
+
+ if (fault == NoFault) {
+ %(memacc_code)s;
+ }
+
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+ }
+
+ if (fault == NoFault && machInst.itstateMask != 0) {
+ xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
+ }
+
+ return fault;
+ }
+}};
+
def template StoreCompleteAcc {{
Fault %(class_name)s::completeAcc(PacketPtr pkt,
%(CPU_exec_context)s *xc,
@@ -420,6 +594,32 @@ def template StoreCompleteAcc {{
}
}};
+def template NeonStoreCompleteAcc {{
+ template <class Element>
+ Fault %(class_name)s<Element>::completeAcc(
+ PacketPtr pkt, %(CPU_exec_context)s *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Fault fault = NoFault;
+
+ %(op_decl)s;
+ %(op_rd)s;
+
+ if (%(predicate_test)s)
+ {
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+ }
+
+ if (fault == NoFault && machInst.itstateMask != 0) {
+ xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
+ }
+
+ return fault;
+ }
+}};
+
def template StoreExCompleteAcc {{
Fault %(class_name)s::completeAcc(PacketPtr pkt,
%(CPU_exec_context)s *xc,
diff --git a/src/arch/arm/isa/templates/neon.isa b/src/arch/arm/isa/templates/neon.isa
new file mode 100644
index 000000000..e402979dc
--- /dev/null
+++ b/src/arch/arm/isa/templates/neon.isa
@@ -0,0 +1,227 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2010 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+def template NeonRegRegRegOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+ protected:
+ typedef _Element Element;
+ public:
+ // Constructor
+ %(class_name)s(ExtMachInst machInst,
+ IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2)
+ : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+ _dest, _op1, _op2)
+ {
+ %(constructor)s;
+ }
+
+ %(BasicExecDeclare)s
+};
+}};
+
+def template NeonRegRegRegImmOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+ protected:
+ typedef _Element Element;
+ public:
+ // Constructor
+ %(class_name)s(ExtMachInst machInst,
+ IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+ uint64_t _imm)
+ : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+ _dest, _op1, _op2, _imm)
+ {
+ %(constructor)s;
+ }
+
+ %(BasicExecDeclare)s
+};
+}};
+
+def template NeonRegRegImmOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+ protected:
+ typedef _Element Element;
+ public:
+ // Constructor
+ %(class_name)s(ExtMachInst machInst,
+ IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm)
+ : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+ _dest, _op1, _imm)
+ {
+ %(constructor)s;
+ }
+
+ %(BasicExecDeclare)s
+};
+}};
+
+def template NeonRegImmOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+ protected:
+ typedef _Element Element;
+ public:
+ // Constructor
+ %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, uint64_t _imm)
+ : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _imm)
+ {
+ %(constructor)s;
+ }
+
+ %(BasicExecDeclare)s
+};
+}};
+
+def template NeonRegRegOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+ protected:
+ typedef _Element Element;
+ public:
+ // Constructor
+ %(class_name)s(ExtMachInst machInst,
+ IntRegIndex _dest, IntRegIndex _op1)
+ : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+ _dest, _op1)
+ {
+ %(constructor)s;
+ }
+
+ %(BasicExecDeclare)s
+};
+}};
+
+def template NeonExecDeclare {{
+ template
+ Fault %(class_name)s<%(targs)s>::execute(
+ %(CPU_exec_context)s *, Trace::InstRecord *) const;
+}};
+
+def template NeonEqualRegExecute {{
+ template <class Element>
+ Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Fault fault = NoFault;
+ %(op_decl)s;
+ %(op_rd)s;
+
+ const unsigned rCount = %(r_count)d;
+ const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element);
+
+ union RegVect {
+ FloatRegBits regs[rCount];
+ Element elements[eCount];
+ };
+
+ if (%(predicate_test)s)
+ {
+ %(code)s;
+ if (fault == NoFault)
+ {
+ %(op_wb)s;
+ }
+ }
+
+ if (fault == NoFault && machInst.itstateMask != 0) {
+ xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
+ }
+
+ return fault;
+ }
+}};
+
+output header {{
+ uint16_t nextBiggerType(uint8_t);
+ uint32_t nextBiggerType(uint16_t);
+ uint64_t nextBiggerType(uint32_t);
+ int16_t nextBiggerType(int8_t);
+ int32_t nextBiggerType(int16_t);
+ int64_t nextBiggerType(int32_t);
+}};
+
+def template NeonUnequalRegExecute {{
+ template <class Element>
+ Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,
+ Trace::InstRecord *traceData) const
+ {
+ typedef typeof(nextBiggerType((Element)0)) BigElement;
+ Fault fault = NoFault;
+ %(op_decl)s;
+ %(op_rd)s;
+
+ const unsigned rCount = %(r_count)d;
+ const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element);
+
+ union RegVect {
+ FloatRegBits regs[rCount];
+ Element elements[eCount];
+ BigElement bigElements[eCount / 2];
+ };
+
+ union BigRegVect {
+ FloatRegBits regs[2 * rCount];
+ BigElement elements[eCount];
+ };
+
+ if (%(predicate_test)s)
+ {
+ %(code)s;
+ if (fault == NoFault)
+ {
+ %(op_wb)s;
+ }
+ }
+
+ if (fault == NoFault && machInst.itstateMask != 0) {
+ xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
+ }
+
+ return fault;
+ }
+}};
diff --git a/src/arch/arm/isa/templates/templates.isa b/src/arch/arm/isa/templates/templates.isa
index 2584ec1f2..148139225 100644
--- a/src/arch/arm/isa/templates/templates.isa
+++ b/src/arch/arm/isa/templates/templates.isa
@@ -60,3 +60,6 @@
//Templates for VFP instructions
##include "vfp.isa"
+
+//Templates for Neon instructions
+##include "neon.isa"