summaryrefslogtreecommitdiff
path: root/src/cpu/o3/lsq_unit.hh
diff options
context:
space:
mode:
authorMitch Hayenga <mitch.hayenga@arm.com>2014-09-03 07:42:39 -0400
committerMitch Hayenga <mitch.hayenga@arm.com>2014-09-03 07:42:39 -0400
commit4f13f676aa71efaaae2fcd2587cf032a1d70f774 (patch)
tree1a8dec232d4bd77df2e773e824510959c643d091 /src/cpu/o3/lsq_unit.hh
parent283935a6f0a17afe4574cc3c50c043515c866dfa (diff)
downloadgem5-4f13f676aa71efaaae2fcd2587cf032a1d70f774.tar.xz
cpu: Fix cache blocked load behavior in o3 cpu
This patch fixes the load blocked/replay mechanism in the o3 cpu. Rather than flushing the entire pipeline, this patch replays loads once the cache becomes unblocked. Additionally, deferred memory instructions (loads which had conflicting stores), when replayed would not respect the number of functional units (only respected issue width). This patch also corrects that. Improvements over 20% have been observed on a microbenchmark designed to exercise this behavior.
Diffstat (limited to 'src/cpu/o3/lsq_unit.hh')
-rw-r--r--src/cpu/o3/lsq_unit.hh202
1 files changed, 85 insertions, 117 deletions
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index fcefa42fd..cb892a4fd 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2013 ARM Limited
+ * Copyright (c) 2012-2014 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -183,22 +183,6 @@ class LSQUnit {
/** Returns the memory ordering violator. */
DynInstPtr getMemDepViolator();
- /** Returns if a load became blocked due to the memory system. */
- bool loadBlocked()
- { return isLoadBlocked; }
-
- /** Clears the signal that a load became blocked. */
- void clearLoadBlocked()
- { isLoadBlocked = false; }
-
- /** Returns if the blocked load was handled. */
- bool isLoadBlockedHandled()
- { return loadBlockedHandled; }
-
- /** Records the blocked load as being handled. */
- void setLoadBlockedHandled()
- { loadBlockedHandled = true; }
-
/** Returns the number of free LQ entries. */
unsigned numFreeLoadEntries();
@@ -298,7 +282,7 @@ class LSQUnit {
/** Default constructor. */
LSQSenderState()
: mainPkt(NULL), pendingPacket(NULL), outstanding(1),
- noWB(false), isSplit(false), pktToSend(false)
+ noWB(false), isSplit(false), pktToSend(false), cacheBlocked(false)
{ }
/** Instruction who initiated the access to memory. */
@@ -319,6 +303,8 @@ class LSQUnit {
bool isSplit;
/** Whether or not there is a packet that needs sending. */
bool pktToSend;
+ /** Whether or not the second packet of this split load was blocked */
+ bool cacheBlocked;
/** Completes a packet and returns whether the access is finished. */
inline bool complete() { return --outstanding == 0; }
@@ -473,18 +459,9 @@ class LSQUnit {
/** Whehter or not a store is blocked due to the memory system. */
bool isStoreBlocked;
- /** Whether or not a load is blocked due to the memory system. */
- bool isLoadBlocked;
-
- /** Has the blocked load been handled. */
- bool loadBlockedHandled;
-
/** Whether or not a store is in flight. */
bool storeInFlight;
- /** The sequence number of the blocked load. */
- InstSeqNum blockedLoadSeqNum;
-
/** The oldest load that caused a memory ordering violation. */
DynInstPtr memDepViolator;
@@ -706,8 +683,10 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
memcpy(data, storeQueue[store_idx].data + shift_amt,
req->getSize());
- assert(!load_inst->memData);
- load_inst->memData = new uint8_t[req->getSize()];
+ // Allocate memory if this is the first time a load is issued.
+ if (!load_inst->memData) {
+ load_inst->memData = new uint8_t[req->getSize()];
+ }
if (storeQueue[store_idx].isAllZeros)
memset(load_inst->memData, 0, req->getSize());
else
@@ -788,116 +767,105 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
load_inst->seqNum, load_inst->pcState());
- assert(!load_inst->memData);
- load_inst->memData = new uint8_t[req->getSize()];
+ // Allocate memory if this is the first time a load is issued.
+ if (!load_inst->memData) {
+ load_inst->memData = new uint8_t[req->getSize()];
+ }
++usedPorts;
// if we the cache is not blocked, do cache access
bool completedFirst = false;
- if (!lsq->cacheBlocked()) {
- MemCmd command =
- req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
- PacketPtr data_pkt = new Packet(req, command);
- PacketPtr fst_data_pkt = NULL;
- PacketPtr snd_data_pkt = NULL;
-
- data_pkt->dataStatic(load_inst->memData);
-
- LSQSenderState *state = new LSQSenderState;
- state->isLoad = true;
- state->idx = load_idx;
- state->inst = load_inst;
- data_pkt->senderState = state;
-
- if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
-
- // Point the first packet at the main data packet.
- fst_data_pkt = data_pkt;
- } else {
-
- // Create the split packets.
- fst_data_pkt = new Packet(sreqLow, command);
- snd_data_pkt = new Packet(sreqHigh, command);
-
- fst_data_pkt->dataStatic(load_inst->memData);
- snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
-
- fst_data_pkt->senderState = state;
- snd_data_pkt->senderState = state;
+ MemCmd command = req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
+ PacketPtr data_pkt = new Packet(req, command);
+ PacketPtr fst_data_pkt = NULL;
+ PacketPtr snd_data_pkt = NULL;
+
+ data_pkt->dataStatic(load_inst->memData);
+
+ LSQSenderState *state = new LSQSenderState;
+ state->isLoad = true;
+ state->idx = load_idx;
+ state->inst = load_inst;
+ data_pkt->senderState = state;
+
+ if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
+ // Point the first packet at the main data packet.
+ fst_data_pkt = data_pkt;
+ } else {
+ // Create the split packets.
+ fst_data_pkt = new Packet(sreqLow, command);
+ snd_data_pkt = new Packet(sreqHigh, command);
+
+ fst_data_pkt->dataStatic(load_inst->memData);
+ snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
+
+ fst_data_pkt->senderState = state;
+ snd_data_pkt->senderState = state;
+
+ state->isSplit = true;
+ state->outstanding = 2;
+ state->mainPkt = data_pkt;
+ }
- state->isSplit = true;
- state->outstanding = 2;
- state->mainPkt = data_pkt;
+ bool successful_load = true;
+ if (!dcachePort->sendTimingReq(fst_data_pkt)) {
+ successful_load = false;
+ } else if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ completedFirst = true;
+
+ // The first packet was sent without problems, so send this one
+ // too. If there is a problem with this packet then the whole
+ // load will be squashed, so indicate this to the state object.
+ // The first packet will return in completeDataAccess and be
+ // handled there.
+ ++usedPorts;
+ if (!dcachePort->sendTimingReq(snd_data_pkt)) {
+ // The main packet will be deleted in completeDataAccess.
+ state->complete();
+ // Signify to 1st half that the 2nd half was blocked via state
+ state->cacheBlocked = true;
+ successful_load = false;
}
+ }
- if (!dcachePort->sendTimingReq(fst_data_pkt)) {
- // Delete state and data packet because a load retry
- // initiates a pipeline restart; it does not retry.
+ // If the cache was blocked, or has become blocked due to the access,
+ // handle it.
+ if (!successful_load) {
+ if (!sreqLow) {
+ // Packet wasn't split, just delete main packet info
delete state;
- delete data_pkt->req;
+ delete req;
delete data_pkt;
- if (TheISA::HasUnalignedMemAcc && sreqLow) {
- delete fst_data_pkt->req;
+ }
+
+ if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ if (!completedFirst) {
+ // Split packet, but first failed. Delete all state.
+ delete state;
+ delete req;
+ delete data_pkt;
delete fst_data_pkt;
- delete snd_data_pkt->req;
delete snd_data_pkt;
+ delete sreqLow;
+ delete sreqHigh;
sreqLow = NULL;
sreqHigh = NULL;
- }
-
- req = NULL;
-
- // If the access didn't succeed, tell the LSQ by setting
- // the retry thread id.
- lsq->setRetryTid(lsqID);
- } else if (TheISA::HasUnalignedMemAcc && sreqLow) {
- completedFirst = true;
-
- // The first packet was sent without problems, so send this one
- // too. If there is a problem with this packet then the whole
- // load will be squashed, so indicate this to the state object.
- // The first packet will return in completeDataAccess and be
- // handled there.
- ++usedPorts;
- if (!dcachePort->sendTimingReq(snd_data_pkt)) {
-
- // The main packet will be deleted in completeDataAccess.
- delete snd_data_pkt->req;
+ } else {
+ // Can't delete main packet data or state because first packet
+ // was sent to the memory system
+ delete data_pkt;
+ delete req;
+ delete sreqHigh;
delete snd_data_pkt;
-
- state->complete();
-
- req = NULL;
sreqHigh = NULL;
-
- lsq->setRetryTid(lsqID);
}
}
- }
-
- // If the cache was blocked, or has become blocked due to the access,
- // handle it.
- if (lsq->cacheBlocked()) {
- if (req)
- delete req;
- if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) {
- delete sreqLow;
- delete sreqHigh;
- }
++lsqCacheBlocked;
- // There's an older load that's already going to squash.
- if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
- return NoFault;
+ iewStage->blockMemInst(load_inst);
- // Record that the load was blocked due to memory. This
- // load will squash all instructions after it, be
- // refetched, and re-executed.
- isLoadBlocked = true;
- loadBlockedHandled = false;
- blockedLoadSeqNum = load_inst->seqNum;
// No fault occurred, even though the interface is blocked.
return NoFault;
}