summaryrefslogtreecommitdiff
path: root/src/arch/arm/insts/macromem.cc
diff options
context:
space:
mode:
authorMitch Hayenga <mitch.hayenga@arm.com>2014-09-03 07:42:44 -0400
committerMitch Hayenga <mitch.hayenga@arm.com>2014-09-03 07:42:44 -0400
commitbb1e6cf7c4d64a56b80d3d69ba25e8ff7d455bbd (patch)
tree95f328eb1a3c25bb8e67136e5b1ad1173a136143 /src/arch/arm/insts/macromem.cc
parent4a3f11149d791284a012af71067f6b2199aa165c (diff)
downloadgem5-bb1e6cf7c4d64a56b80d3d69ba25e8ff7d455bbd.tar.xz
arm: Fix v8 neon latency issue for loads/stores
Neon memory ops that operate on multiple registers currently have very poor performance because of interleave/deinterleave micro-ops. This patch marks the deinterleave/interleave micro-ops as "No_OpClass" such that they take minumum cycles to execute and are never resource constrained. Additionaly the micro-ops over-read registers. Although one form may need to read up to 20 sources, not all do. This adds in new forms so false dependencies are not modeled. Instructions read their minimum number of sources.
Diffstat (limited to 'src/arch/arm/insts/macromem.cc')
-rw-r--r--src/arch/arm/insts/macromem.cc47
1 files changed, 40 insertions, 7 deletions
diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc
index 2ada29539..65cd2c3b7 100644
--- a/src/arch/arm/insts/macromem.cc
+++ b/src/arch/arm/insts/macromem.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2013 ARM Limited
+ * Copyright (c) 2010-2014 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -1107,9 +1107,26 @@ VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
}
for (int i = 0; i < numMarshalMicroops; ++i) {
- microOps[uopIdx++] = new MicroDeintNeon64(
- machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
- numStructElems, numRegs, i /* step */);
+ switch(numRegs) {
+ case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
+ machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+ numStructElems, 1, i /* step */);
+ break;
+ case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
+ machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+ numStructElems, 2, i /* step */);
+ break;
+ case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
+ machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+ numStructElems, 3, i /* step */);
+ break;
+ case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
+ machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+ numStructElems, 4, i /* step */);
+ break;
+ default: panic("Invalid number of registers");
+ }
+
}
assert(uopIdx == numMicroops);
@@ -1150,9 +1167,25 @@ VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
unsigned uopIdx = 0;
for(int i = 0; i < numMarshalMicroops; ++i) {
- microOps[uopIdx++] = new MicroIntNeon64(
- machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
- numStructElems, numRegs, i /* step */);
+ switch (numRegs) {
+ case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
+ machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+ numStructElems, 1, i /* step */);
+ break;
+ case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
+ machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+ numStructElems, 2, i /* step */);
+ break;
+ case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
+ machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+ numStructElems, 3, i /* step */);
+ break;
+ case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
+ machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+ numStructElems, 4, i /* step */);
+ break;
+ default: panic("Invalid number of registers");
+ }
}
uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |