summaryrefslogtreecommitdiff
path: root/src/arch/arm/insts/macromem.cc
diff options
context:
space:
mode:
authorGabe Black <gblack@eecs.umich.edu>2010-08-25 19:10:42 -0500
committerGabe Black <gblack@eecs.umich.edu>2010-08-25 19:10:42 -0500
commit6368edb281f162e4fbb0a91744992a25134135f4 (patch)
treee84dfa7d10903e6c7a56e01cc6ca23f4b0d41908 /src/arch/arm/insts/macromem.cc
parentf4f6b31df1a8787a12d71108eac24543bdf541e3 (diff)
downloadgem5-6368edb281f162e4fbb0a91744992a25134135f4.tar.xz
ARM: Implement all ARM SIMD instructions.
Diffstat (limited to 'src/arch/arm/insts/macromem.cc')
-rw-r--r--src/arch/arm/insts/macromem.cc684
1 files changed, 673 insertions, 11 deletions
diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc
index 2a2412912..5602231f9 100644
--- a/src/arch/arm/insts/macromem.cc
+++ b/src/arch/arm/insts/macromem.cc
@@ -137,6 +137,647 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
}
}
+VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
+ unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ assert(regs > 0 && regs <= 4);
+ assert(regs % elems == 0);
+
+ numMicroops = (regs > 2) ? 2 : 1;
+ bool wb = (rm != 15);
+ bool deinterleave = (elems > 1);
+
+ if (wb) numMicroops++;
+ if (deinterleave) numMicroops += (regs / elems);
+ microOps = new StaticInstPtr[numMicroops];
+
+ RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2;
+
+ uint32_t noAlign = TLB::MustBeOne;
+
+ unsigned uopIdx = 0;
+ switch (regs) {
+ case 4:
+ microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
+ size, machInst, rMid, rn, 0, align);
+ microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
+ size, machInst, rMid + 4, rn, 16, noAlign);
+ break;
+ case 3:
+ microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
+ size, machInst, rMid, rn, 0, align);
+ microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
+ size, machInst, rMid + 4, rn, 16, noAlign);
+ break;
+ case 2:
+ microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
+ size, machInst, rMid, rn, 0, align);
+ break;
+ case 1:
+ microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
+ size, machInst, rMid, rn, 0, align);
+ break;
+ default:
+ panic("Unrecognized number of registers %d.\n", regs);
+ }
+ if (wb) {
+ if (rm != 15 && rm != 13) {
+ microOps[uopIdx++] =
+ new MicroAddUop(machInst, rn, rn, rm);
+ } else {
+ microOps[uopIdx++] =
+ new MicroAddiUop(machInst, rn, rn, regs * 8);
+ }
+ }
+ if (deinterleave) {
+ switch (elems) {
+ case 4:
+ assert(regs == 4);
+ microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
+ size, machInst, vd * 2, rMid, inc * 2);
+ break;
+ case 3:
+ assert(regs == 3);
+ microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
+ size, machInst, vd * 2, rMid, inc * 2);
+ break;
+ case 2:
+ assert(regs == 4 || regs == 2);
+ if (regs == 4) {
+ microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
+ size, machInst, vd * 2, rMid, inc * 2);
+ microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
+ size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
+ } else {
+ microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
+ size, machInst, vd * 2, rMid, inc * 2);
+ }
+ break;
+ default:
+ panic("Bad number of elements to deinterleave %d.\n", elems);
+ }
+ }
+ assert(uopIdx == numMicroops);
+
+ for (unsigned i = 0; i < numMicroops - 1; i++) {
+ MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
+ assert(uopPtr);
+ uopPtr->setDelayedCommit();
+ }
+ microOps[numMicroops - 1]->setLastMicroop();
+}
+
+VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, bool all, unsigned elems,
+ RegIndex rn, RegIndex vd, unsigned regs,
+ unsigned inc, uint32_t size, uint32_t align,
+ RegIndex rm, unsigned lane) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ assert(regs > 0 && regs <= 4);
+ assert(regs % elems == 0);
+
+ unsigned eBytes = (1 << size);
+ unsigned loadSize = eBytes * elems;
+ unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
+ sizeof(FloatRegBits);
+
+ assert(loadRegs > 0 && loadRegs <= 4);
+
+ numMicroops = 1;
+ bool wb = (rm != 15);
+
+ if (wb) numMicroops++;
+ numMicroops += (regs / elems);
+ microOps = new StaticInstPtr[numMicroops];
+
+ RegIndex ufp0 = NumFloatArchRegs;
+
+ unsigned uopIdx = 0;
+ switch (loadSize) {
+ case 1:
+ microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 2:
+ if (eBytes == 2) {
+ microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
+ machInst, ufp0, rn, 0, align);
+ } else {
+ microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
+ machInst, ufp0, rn, 0, align);
+ }
+ break;
+ case 3:
+ microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 4:
+ switch (eBytes) {
+ case 1:
+ microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 2:
+ microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 4:
+ microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ }
+ break;
+ case 6:
+ microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 8:
+ switch (eBytes) {
+ case 2:
+ microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 4:
+ microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ }
+ break;
+ case 12:
+ microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 16:
+ microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ default:
+ panic("Unrecognized load size %d.\n", regs);
+ }
+ if (wb) {
+ if (rm != 15 && rm != 13) {
+ microOps[uopIdx++] =
+ new MicroAddUop(machInst, rn, rn, rm);
+ } else {
+ microOps[uopIdx++] =
+ new MicroAddiUop(machInst, rn, rn, loadSize);
+ }
+ }
+ switch (elems) {
+ case 4:
+ assert(regs == 4);
+ switch (size) {
+ case 0:
+ if (all) {
+ microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
+ machInst, vd * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
+ machInst, vd * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ case 1:
+ if (all) {
+ microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
+ machInst, vd * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
+ machInst, vd * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ case 2:
+ if (all) {
+ microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
+ machInst, vd * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
+ machInst, vd * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ default:
+ panic("Bad size %d.\n", size);
+ break;
+ }
+ break;
+ case 3:
+ assert(regs == 3);
+ switch (size) {
+ case 0:
+ if (all) {
+ microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
+ machInst, vd * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
+ machInst, vd * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ case 1:
+ if (all) {
+ microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
+ machInst, vd * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
+ machInst, vd * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ case 2:
+ if (all) {
+ microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
+ machInst, vd * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
+ machInst, vd * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ default:
+ panic("Bad size %d.\n", size);
+ break;
+ }
+ break;
+ case 2:
+ assert(regs == 2);
+ assert(loadRegs <= 2);
+ switch (size) {
+ case 0:
+ if (all) {
+ microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
+ machInst, vd * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
+ machInst, vd * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ case 1:
+ if (all) {
+ microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
+ machInst, vd * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
+ machInst, vd * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ case 2:
+ if (all) {
+ microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
+ machInst, vd * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
+ machInst, vd * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ default:
+ panic("Bad size %d.\n", size);
+ break;
+ }
+ break;
+ case 1:
+ assert(regs == 1 || (all && regs == 2));
+ assert(loadRegs <= 2);
+ for (unsigned offset = 0; offset < regs; offset++) {
+ switch (size) {
+ case 0:
+ if (all) {
+ microOps[uopIdx++] =
+ new MicroUnpackAllNeon2to2Uop<uint8_t>(
+ machInst, (vd + offset) * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] =
+ new MicroUnpackNeon2to2Uop<uint8_t>(
+ machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ case 1:
+ if (all) {
+ microOps[uopIdx++] =
+ new MicroUnpackAllNeon2to2Uop<uint16_t>(
+ machInst, (vd + offset) * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] =
+ new MicroUnpackNeon2to2Uop<uint16_t>(
+ machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ case 2:
+ if (all) {
+ microOps[uopIdx++] =
+ new MicroUnpackAllNeon2to2Uop<uint32_t>(
+ machInst, (vd + offset) * 2, ufp0, inc * 2);
+ } else {
+ microOps[uopIdx++] =
+ new MicroUnpackNeon2to2Uop<uint32_t>(
+ machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
+ }
+ break;
+ default:
+ panic("Bad size %d.\n", size);
+ break;
+ }
+ }
+ break;
+ default:
+ panic("Bad number of elements to unpack %d.\n", elems);
+ }
+ assert(uopIdx == numMicroops);
+
+ for (unsigned i = 0; i < numMicroops - 1; i++) {
+ MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
+ assert(uopPtr);
+ uopPtr->setDelayedCommit();
+ }
+ microOps[numMicroops - 1]->setLastMicroop();
+}
+
+VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
+ unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ assert(regs > 0 && regs <= 4);
+ assert(regs % elems == 0);
+
+ numMicroops = (regs > 2) ? 2 : 1;
+ bool wb = (rm != 15);
+ bool interleave = (elems > 1);
+
+ if (wb) numMicroops++;
+ if (interleave) numMicroops += (regs / elems);
+ microOps = new StaticInstPtr[numMicroops];
+
+ uint32_t noAlign = TLB::MustBeOne;
+
+ RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2;
+
+ unsigned uopIdx = 0;
+ if (interleave) {
+ switch (elems) {
+ case 4:
+ assert(regs == 4);
+ microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
+ size, machInst, rMid, vd * 2, inc * 2);
+ break;
+ case 3:
+ assert(regs == 3);
+ microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
+ size, machInst, rMid, vd * 2, inc * 2);
+ break;
+ case 2:
+ assert(regs == 4 || regs == 2);
+ if (regs == 4) {
+ microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
+ size, machInst, rMid, vd * 2, inc * 2);
+ microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
+ size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
+ } else {
+ microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
+ size, machInst, rMid, vd * 2, inc * 2);
+ }
+ break;
+ default:
+ panic("Bad number of elements to interleave %d.\n", elems);
+ }
+ }
+ switch (regs) {
+ case 4:
+ microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
+ size, machInst, rMid, rn, 0, align);
+ microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
+ size, machInst, rMid + 4, rn, 16, noAlign);
+ break;
+ case 3:
+ microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
+ size, machInst, rMid, rn, 0, align);
+ microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
+ size, machInst, rMid + 4, rn, 16, noAlign);
+ break;
+ case 2:
+ microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
+ size, machInst, rMid, rn, 0, align);
+ break;
+ case 1:
+ microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
+ size, machInst, rMid, rn, 0, align);
+ break;
+ default:
+ panic("Unrecognized number of registers %d.\n", regs);
+ }
+ if (wb) {
+ if (rm != 15 && rm != 13) {
+ microOps[uopIdx++] =
+ new MicroAddUop(machInst, rn, rn, rm);
+ } else {
+ microOps[uopIdx++] =
+ new MicroAddiUop(machInst, rn, rn, regs * 8);
+ }
+ }
+ assert(uopIdx == numMicroops);
+
+ for (unsigned i = 0; i < numMicroops - 1; i++) {
+ MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
+ assert(uopPtr);
+ uopPtr->setDelayedCommit();
+ }
+ microOps[numMicroops - 1]->setLastMicroop();
+}
+
+VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, bool all, unsigned elems,
+ RegIndex rn, RegIndex vd, unsigned regs,
+ unsigned inc, uint32_t size, uint32_t align,
+ RegIndex rm, unsigned lane) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ assert(!all);
+ assert(regs > 0 && regs <= 4);
+ assert(regs % elems == 0);
+
+ unsigned eBytes = (1 << size);
+ unsigned storeSize = eBytes * elems;
+ unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
+ sizeof(FloatRegBits);
+
+ assert(storeRegs > 0 && storeRegs <= 4);
+
+ numMicroops = 1;
+ bool wb = (rm != 15);
+
+ if (wb) numMicroops++;
+ numMicroops += (regs / elems);
+ microOps = new StaticInstPtr[numMicroops];
+
+ RegIndex ufp0 = NumFloatArchRegs;
+
+ unsigned uopIdx = 0;
+ switch (elems) {
+ case 4:
+ assert(regs == 4);
+ switch (size) {
+ case 0:
+ microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
+ machInst, ufp0, vd * 2, inc * 2, lane);
+ break;
+ case 1:
+ microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
+ machInst, ufp0, vd * 2, inc * 2, lane);
+ break;
+ case 2:
+ microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
+ machInst, ufp0, vd * 2, inc * 2, lane);
+ break;
+ default:
+ panic("Bad size %d.\n", size);
+ break;
+ }
+ break;
+ case 3:
+ assert(regs == 3);
+ switch (size) {
+ case 0:
+ microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
+ machInst, ufp0, vd * 2, inc * 2, lane);
+ break;
+ case 1:
+ microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
+ machInst, ufp0, vd * 2, inc * 2, lane);
+ break;
+ case 2:
+ microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
+ machInst, ufp0, vd * 2, inc * 2, lane);
+ break;
+ default:
+ panic("Bad size %d.\n", size);
+ break;
+ }
+ break;
+ case 2:
+ assert(regs == 2);
+ assert(storeRegs <= 2);
+ switch (size) {
+ case 0:
+ microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
+ machInst, ufp0, vd * 2, inc * 2, lane);
+ break;
+ case 1:
+ microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
+ machInst, ufp0, vd * 2, inc * 2, lane);
+ break;
+ case 2:
+ microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
+ machInst, ufp0, vd * 2, inc * 2, lane);
+ break;
+ default:
+ panic("Bad size %d.\n", size);
+ break;
+ }
+ break;
+ case 1:
+ assert(regs == 1 || (all && regs == 2));
+ assert(storeRegs <= 2);
+ for (unsigned offset = 0; offset < regs; offset++) {
+ switch (size) {
+ case 0:
+ microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
+ machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
+ break;
+ case 1:
+ microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
+ machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
+ break;
+ case 2:
+ microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
+ machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
+ break;
+ default:
+ panic("Bad size %d.\n", size);
+ break;
+ }
+ }
+ break;
+ default:
+ panic("Bad number of elements to pack %d.\n", elems);
+ }
+ switch (storeSize) {
+ case 1:
+ microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 2:
+ if (eBytes == 2) {
+ microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
+ machInst, ufp0, rn, 0, align);
+ } else {
+ microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
+ machInst, ufp0, rn, 0, align);
+ }
+ break;
+ case 3:
+ microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 4:
+ switch (eBytes) {
+ case 1:
+ microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 2:
+ microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 4:
+ microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ }
+ break;
+ case 6:
+ microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 8:
+ switch (eBytes) {
+ case 2:
+ microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 4:
+ microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ }
+ break;
+ case 12:
+ microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ case 16:
+ microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
+ machInst, ufp0, rn, 0, align);
+ break;
+ default:
+ panic("Unrecognized store size %d.\n", regs);
+ }
+ if (wb) {
+ if (rm != 15 && rm != 13) {
+ microOps[uopIdx++] =
+ new MicroAddUop(machInst, rn, rn, rm);
+ } else {
+ microOps[uopIdx++] =
+ new MicroAddiUop(machInst, rn, rn, storeSize);
+ }
+ }
+ assert(uopIdx == numMicroops);
+
+ for (unsigned i = 0; i < numMicroops - 1; i++) {
+ MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
+ assert(uopPtr);
+ uopPtr->setDelayedCommit();
+ }
+ microOps[numMicroops - 1]->setLastMicroop();
+}
+
MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
OpClass __opClass, IntRegIndex rn,
RegIndex vd, bool single, bool up,
@@ -169,17 +810,25 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
bool tempUp = up;
for (int j = 0; j < count; j++) {
if (load) {
- microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
- tempUp, addr);
- if (!single)
- microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, tempUp,
- addr + (up ? 4 : -4));
+ if (single) {
+ microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
+ tempUp, addr);
+ } else {
+ microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
+ tempUp, addr);
+ microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
+ addr + (up ? 4 : -4));
+ }
} else {
- microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
- tempUp, addr);
- if (!single)
- microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, tempUp,
- addr + (up ? 4 : -4));
+ if (single) {
+ microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
+ tempUp, addr);
+ } else {
+ microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
+ tempUp, addr);
+ microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
+ addr + (up ? 4 : -4));
+ }
}
if (!tempUp) {
addr -= (single ? 4 : 8);
@@ -216,7 +865,7 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
}
std::string
-MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
printMnemonic(ss);
@@ -229,6 +878,19 @@ MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
}
std::string
+MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss);
+ printReg(ss, ura);
+ ss << ", ";
+ printReg(ss, urb);
+ ss << ", ";
+ printReg(ss, urc);
+ return ss.str();
+}
+
+std::string
MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;