summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMitch Hayenga <mitch.hayenga@arm.com>2014-09-03 07:42:33 -0400
committerMitch Hayenga <mitch.hayenga@arm.com>2014-09-03 07:42:33 -0400
commit976f27487b57e968a326752fcf74747427733df6 (patch)
tree16c9e61f702f21d82948b1f5b555ef1b7c543b15
parentfd722946dd723bda5bd4aea5eedbda108141a550 (diff)
downloadgem5-976f27487b57e968a326752fcf74747427733df6.tar.xz
cpu: Change writeback modeling for outstanding instructions
As highlighed on the mailing list gem5's writeback modeling can impact performance. This patch removes the limitation on maximum outstanding issued instructions, however the number that can writeback in a single cycle is still respected in instToCommit().
-rw-r--r--configs/common/O3_ARM_v7a.py1
-rw-r--r--src/cpu/o3/O3CPU.py1
-rw-r--r--src/cpu/o3/iew.hh53
-rw-r--r--src/cpu/o3/iew_impl.hh10
-rw-r--r--src/cpu/o3/inst_queue_impl.hh2
-rw-r--r--src/cpu/o3/lsq_unit.hh7
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh5
7 files changed, 1 insertions, 78 deletions
diff --git a/configs/common/O3_ARM_v7a.py b/configs/common/O3_ARM_v7a.py
index 5a94438d7..ae4822a0c 100644
--- a/configs/common/O3_ARM_v7a.py
+++ b/configs/common/O3_ARM_v7a.py
@@ -126,7 +126,6 @@ class O3_ARM_v7a_3(DerivO3CPU):
dispatchWidth = 6
issueWidth = 8
wbWidth = 8
- wbDepth = 1
fuPool = O3_ARM_v7a_FUP()
iewToCommitDelay = 1
renameToROBDelay = 1
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index a6094e47c..fb5b5de2b 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -84,7 +84,6 @@ class DerivO3CPU(BaseCPU):
dispatchWidth = Param.Unsigned(8, "Dispatch width")
issueWidth = Param.Unsigned(8, "Issue width")
wbWidth = Param.Unsigned(8, "Writeback width")
- wbDepth = Param.Unsigned(1, "Writeback depth")
fuPool = Param.FUPool(DefaultFUPool(), "Functional Unit pool")
iewToCommitDelay = Param.Cycles(1, "Issue/Execute/Writeback to commit "
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index 24412e11f..3b752ac99 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -219,49 +219,6 @@ class DefaultIEW
/** Returns if the LSQ has any stores to writeback. */
bool hasStoresToWB(ThreadID tid) { return ldstQueue.hasStoresToWB(tid); }
- void incrWb(InstSeqNum &sn)
- {
- ++wbOutstanding;
- if (wbOutstanding == wbMax)
- ableToIssue = false;
- DPRINTF(IEW, "wbOutstanding: %i [sn:%lli]\n", wbOutstanding, sn);
- assert(wbOutstanding <= wbMax);
-#ifdef DEBUG
- wbList.insert(sn);
-#endif
- }
-
- void decrWb(InstSeqNum &sn)
- {
- if (wbOutstanding == wbMax)
- ableToIssue = true;
- wbOutstanding--;
- DPRINTF(IEW, "wbOutstanding: %i [sn:%lli]\n", wbOutstanding, sn);
- assert(wbOutstanding >= 0);
-#ifdef DEBUG
- assert(wbList.find(sn) != wbList.end());
- wbList.erase(sn);
-#endif
- }
-
-#ifdef DEBUG
- std::set<InstSeqNum> wbList;
-
- void dumpWb()
- {
- std::set<InstSeqNum>::iterator wb_it = wbList.begin();
- while (wb_it != wbList.end()) {
- cprintf("[sn:%lli]\n",
- (*wb_it));
- wb_it++;
- }
- }
-#endif
-
- bool canIssue() { return ableToIssue; }
-
- bool ableToIssue;
-
/** Check misprediction */
void checkMisprediction(DynInstPtr &inst);
@@ -452,19 +409,9 @@ class DefaultIEW
*/
unsigned wbCycle;
- /** Number of instructions in flight that will writeback. */
-
- /** Number of instructions in flight that will writeback. */
- int wbOutstanding;
-
/** Writeback width. */
unsigned wbWidth;
- /** Writeback width * writeback depth, where writeback depth is
- * the number of cycles of writing back instructions that can be
- * buffered. */
- unsigned wbMax;
-
/** Number of active threads. */
ThreadID numThreads;
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 9c6a44bf2..cf2d5be5e 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -76,7 +76,6 @@ DefaultIEW<Impl>::DefaultIEW(O3CPU *_cpu, DerivO3CPUParams *params)
issueToExecuteDelay(params->issueToExecuteDelay),
dispatchWidth(params->dispatchWidth),
issueWidth(params->issueWidth),
- wbOutstanding(0),
wbWidth(params->wbWidth),
numThreads(params->numThreads)
{
@@ -109,12 +108,8 @@ DefaultIEW<Impl>::DefaultIEW(O3CPU *_cpu, DerivO3CPUParams *params)
fetchRedirect[tid] = false;
}
- wbMax = wbWidth * params->wbDepth;
-
updateLSQNextCycle = false;
- ableToIssue = true;
-
skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth;
}
@@ -635,8 +630,6 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
++wbCycle;
wbNumInst = 0;
}
-
- assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
}
DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
@@ -1263,7 +1256,6 @@ DefaultIEW<Impl>::executeInsts()
++iewExecSquashedInsts;
- decrWb(inst->seqNum);
continue;
}
@@ -1502,8 +1494,6 @@ DefaultIEW<Impl>::writebackInsts()
}
writebackCount[tid]++;
}
-
- decrWb(inst->seqNum);
}
}
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index ab3861add..22f384cf5 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -756,7 +756,6 @@ InstructionQueue<Impl>::scheduleReadyInsts()
int total_issued = 0;
while (total_issued < (totalWidth - total_deferred_mem_issued) &&
- iewStage->canIssue() &&
order_it != order_end_it) {
OpClass op_class = (*order_it).queueType;
@@ -861,7 +860,6 @@ InstructionQueue<Impl>::scheduleReadyInsts()
listOrder.erase(order_it++);
statIssuedInstType[tid][op_class]++;
- iewStage->incrWb(issuing_inst->seqNum);
} else {
statFuBusy[op_class]++;
fuBusy[tid]++;
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 00469197d..fcefa42fd 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -762,7 +762,6 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
iewStage->rescheduleMemInst(load_inst);
- iewStage->decrWb(load_inst->seqNum);
load_inst->clearIssued();
++lsqRescheduledLoads;
@@ -889,12 +888,6 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
++lsqCacheBlocked;
- // If the first part of a split access succeeds, then let the LSQ
- // handle the decrWb when completeDataAccess is called upon return
- // of the requested first part of data
- if (!completedFirst)
- iewStage->decrWb(load_inst->seqNum);
-
// There's an older load that's already going to squash.
if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
return NoFault;
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index e6bb560af..b805ed4be 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -109,9 +109,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
}
assert(!cpu->switchedOut());
- if (inst->isSquashed()) {
- iewStage->decrWb(inst->seqNum);
- } else {
+ if (!inst->isSquashed()) {
if (!state->noWB) {
if (!TheISA::HasUnalignedMemAcc || !state->isSplit ||
!state->isLoad) {
@@ -1130,7 +1128,6 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
// Squashed instructions do not need to complete their access.
if (inst->isSquashed()) {
- iewStage->decrWb(inst->seqNum);
assert(!inst->isStore());
++lsqIgnoredResponses;
return;