summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/arch/alpha/isa_traits.hh3
-rw-r--r--src/arch/arm/isa_traits.hh3
-rw-r--r--src/arch/mips/isa_traits.hh3
-rw-r--r--src/arch/power/isa_traits.hh3
-rw-r--r--src/arch/sparc/isa_traits.hh3
-rw-r--r--src/arch/x86/isa_traits.hh3
-rw-r--r--src/cpu/base_dyn_inst.hh75
-rw-r--r--src/cpu/o3/cpu.hh15
-rw-r--r--src/cpu/o3/lsq.hh22
-rw-r--r--src/cpu/o3/lsq_unit.hh138
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh163
11 files changed, 378 insertions, 53 deletions
diff --git a/src/arch/alpha/isa_traits.hh b/src/arch/alpha/isa_traits.hh
index 66c240ef3..a5a8bf5a0 100644
--- a/src/arch/alpha/isa_traits.hh
+++ b/src/arch/alpha/isa_traits.hh
@@ -131,6 +131,9 @@ enum {
// Alpha UNOP (ldq_u r31,0(r0))
const ExtMachInst NoopMachInst = 0x2ffe0000;
+// Memory accesses cannot be unaligned
+const bool HasUnalignedMemAcc = false;
+
} // namespace AlphaISA
#endif // __ARCH_ALPHA_ISA_TRAITS_HH__
diff --git a/src/arch/arm/isa_traits.hh b/src/arch/arm/isa_traits.hh
index 91c51c46b..59eaeaa5c 100644
--- a/src/arch/arm/isa_traits.hh
+++ b/src/arch/arm/isa_traits.hh
@@ -106,6 +106,9 @@ namespace ArmISA
const int ByteBytes = 1;
const uint32_t HighVecs = 0xFFFF0000;
+
+ // Memory accesses cannot be unaligned
+ const bool HasUnalignedMemAcc = false;
};
using namespace ArmISA;
diff --git a/src/arch/mips/isa_traits.hh b/src/arch/mips/isa_traits.hh
index 38b43af9d..aa64be71d 100644
--- a/src/arch/mips/isa_traits.hh
+++ b/src/arch/mips/isa_traits.hh
@@ -164,6 +164,9 @@ const int ByteBytes = 1;
const int ANNOTE_NONE = 0;
const uint32_t ITOUCH_ANNOTE = 0xffffffff;
+// Memory accesses cannot be unaligned
+const bool HasUnalignedMemAcc = false;
+
};
#endif // __ARCH_MIPS_ISA_TRAITS_HH__
diff --git a/src/arch/power/isa_traits.hh b/src/arch/power/isa_traits.hh
index 886c2cb0b..ab6a56760 100644
--- a/src/arch/power/isa_traits.hh
+++ b/src/arch/power/isa_traits.hh
@@ -70,6 +70,9 @@ const int MachineBytes = 4;
// This is ori 0, 0, 0
const ExtMachInst NoopMachInst = 0x60000000;
+// Memory accesses can be unaligned
+const bool HasUnalignedMemAcc = true;
+
} // PowerISA namespace
#endif // __ARCH_POWER_ISA_TRAITS_HH__
diff --git a/src/arch/sparc/isa_traits.hh b/src/arch/sparc/isa_traits.hh
index 2af624d39..a4dc7322d 100644
--- a/src/arch/sparc/isa_traits.hh
+++ b/src/arch/sparc/isa_traits.hh
@@ -98,6 +98,9 @@ namespace SparcISA
};
#endif
+
+// Memory accesses cannot be unaligned
+const bool HasUnalignedMemAcc = false;
}
#endif // __ARCH_SPARC_ISA_TRAITS_HH__
diff --git a/src/arch/x86/isa_traits.hh b/src/arch/x86/isa_traits.hh
index 9f1b7b7c4..80af12c91 100644
--- a/src/arch/x86/isa_traits.hh
+++ b/src/arch/x86/isa_traits.hh
@@ -91,6 +91,9 @@ namespace X86ISA
StaticInstPtr decodeInst(ExtMachInst);
const Addr LoadAddrMask = ULL(-1);
+
+ // Memory accesses can be unaligned
+ const bool HasUnalignedMemAcc = true;
};
#endif // __ARCH_X86_ISATRAITS_HH__
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 7732b71f8..65578379b 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -131,8 +131,13 @@ class BaseDynInst : public FastAlloc, public RefCounted
template <class T>
Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+ /** Splits a request in two if it crosses a dcache block. */
+ void splitRequest(RequestPtr req, RequestPtr &sreqLow,
+ RequestPtr &sreqHigh);
+
/** Initiate a DTB address translation. */
- void initiateTranslation(RequestPtr req, uint64_t *res,
+ void initiateTranslation(RequestPtr req, RequestPtr sreqLow,
+ RequestPtr sreqHigh, uint64_t *res,
BaseTLB::Mode mode);
/** Finish a DTB address translation. */
@@ -870,12 +875,19 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
Request *req = new Request(asid, addr, sizeof(T), flags, this->PC,
thread->contextId(), threadNumber);
- initiateTranslation(req, NULL, BaseTLB::Read);
+ Request *sreqLow = NULL;
+ Request *sreqHigh = NULL;
+
+ // Only split the request if the ISA supports unaligned accesses.
+ if (TheISA::HasUnalignedMemAcc) {
+ splitRequest(req, sreqLow, sreqHigh);
+ }
+ initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
if (fault == NoFault) {
effAddr = req->getVaddr();
effAddrValid = true;
- cpu->read(req, data, lqIdx);
+ cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
} else {
// Return a fixed value to keep simulation deterministic even
@@ -909,12 +921,19 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
Request *req = new Request(asid, addr, sizeof(T), flags, this->PC,
thread->contextId(), threadNumber);
- initiateTranslation(req, res, BaseTLB::Write);
+ Request *sreqLow = NULL;
+ Request *sreqHigh = NULL;
+
+ // Only split the request if the ISA supports unaligned accesses.
+ if (TheISA::HasUnalignedMemAcc) {
+ splitRequest(req, sreqLow, sreqHigh);
+ }
+ initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write);
if (fault == NoFault) {
effAddr = req->getVaddr();
effAddrValid = true;
- cpu->write(req, data, sqIdx);
+ cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
}
return fault;
@@ -922,14 +941,48 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
template<class Impl>
inline void
-BaseDynInst<Impl>::initiateTranslation(RequestPtr req, uint64_t *res,
+BaseDynInst<Impl>::splitRequest(RequestPtr req, RequestPtr &sreqLow,
+ RequestPtr &sreqHigh)
+{
+ // Check to see if the request crosses the next level block boundary.
+ unsigned block_size = cpu->getDcachePort()->peerBlockSize();
+ Addr addr = req->getVaddr();
+ Addr split_addr = roundDown(addr + req->getSize() - 1, block_size);
+ assert(split_addr <= addr || split_addr - addr < block_size);
+
+ // Spans two blocks.
+ if (split_addr > addr) {
+ req->splitOnVaddr(split_addr, sreqLow, sreqHigh);
+ }
+}
+
+template<class Impl>
+inline void
+BaseDynInst<Impl>::initiateTranslation(RequestPtr req, RequestPtr sreqLow,
+ RequestPtr sreqHigh, uint64_t *res,
BaseTLB::Mode mode)
{
- WholeTranslationState *state =
- new WholeTranslationState(req, NULL, res, mode);
- DataTranslation<BaseDynInst<Impl> > *trans =
- new DataTranslation<BaseDynInst<Impl> >(this, state);
- cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
+ if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) {
+ WholeTranslationState *state =
+ new WholeTranslationState(req, NULL, res, mode);
+
+ // One translation if the request isn't split.
+ DataTranslation<BaseDynInst<Impl> > *trans =
+ new DataTranslation<BaseDynInst<Impl> >(this, state);
+ cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
+ } else {
+ WholeTranslationState *state =
+ new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode);
+
+ // Two translations when the request is split.
+ DataTranslation<BaseDynInst<Impl> > *stransLow =
+ new DataTranslation<BaseDynInst<Impl> >(this, state, 0);
+ DataTranslation<BaseDynInst<Impl> > *stransHigh =
+ new DataTranslation<BaseDynInst<Impl> >(this, state, 1);
+
+ cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode);
+ cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode);
+ }
}
template<class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 2ea918983..82d4ca25b 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -703,18 +703,25 @@ class FullO3CPU : public BaseO3CPU
/** CPU read function, forwards read to LSQ. */
template <class T>
- Fault read(RequestPtr &req, T &data, int load_idx)
+ Fault read(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh,
+ T &data, int load_idx)
{
- return this->iew.ldstQueue.read(req, data, load_idx);
+ return this->iew.ldstQueue.read(req, sreqLow, sreqHigh,
+ data, load_idx);
}
/** CPU write function, forwards write to LSQ. */
template <class T>
- Fault write(RequestPtr &req, T &data, int store_idx)
+ Fault write(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh,
+ T &data, int store_idx)
{
- return this->iew.ldstQueue.write(req, data, store_idx);
+ return this->iew.ldstQueue.write(req, sreqLow, sreqHigh,
+ data, store_idx);
}
+ /** Get the dcache port (used to find block size for translations). */
+ Port *getDcachePort() { return this->iew.ldstQueue.getDcachePort(); }
+
Addr lockAddr;
/** Temporary fix for the lock flag, works in the UP case. */
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index a0bae058c..7a7ea917f 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -270,15 +270,19 @@ class LSQ {
void dumpInsts(ThreadID tid)
{ thread[tid].dumpInsts(); }
- /** Executes a read operation, using the load specified at the load index. */
+ /** Executes a read operation, using the load specified at the load
+ * index.
+ */
template <class T>
- Fault read(RequestPtr req, T &data, int load_idx);
+ Fault read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
+ T &data, int load_idx);
/** Executes a store operation, using the store specified at the store
- * index.
+ * index.
*/
template <class T>
- Fault write(RequestPtr req, T &data, int store_idx);
+ Fault write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
+ T &data, int store_idx);
/** The CPU pointer. */
O3CPU *cpu;
@@ -369,21 +373,23 @@ class LSQ {
template <class Impl>
template <class T>
Fault
-LSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
+LSQ<Impl>::read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
+ T &data, int load_idx)
{
ThreadID tid = req->threadId();
- return thread[tid].read(req, data, load_idx);
+ return thread[tid].read(req, sreqLow, sreqHigh, data, load_idx);
}
template <class Impl>
template <class T>
Fault
-LSQ<Impl>::write(RequestPtr req, T &data, int store_idx)
+LSQ<Impl>::write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
+ T &data, int store_idx)
{
ThreadID tid = req->threadId();
- return thread[tid].write(req, data, store_idx);
+ return thread[tid].write(req, sreqLow, sreqHigh, data, store_idx);
}
#endif // __CPU_O3_LSQ_HH__
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 6ff36d929..cf51f8eab 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -216,12 +216,18 @@ class LSQUnit {
/** Writes back the instruction, sending it to IEW. */
void writeback(DynInstPtr &inst, PacketPtr pkt);
+ /** Writes back a store that couldn't be completed the previous cycle. */
+ void writebackPendingStore();
+
/** Handles completing the send of a store to memory. */
void storePostSend(PacketPtr pkt);
/** Completes the store at the specified index. */
void completeStore(int store_idx);
+ /** Attempts to send a store to the cache. */
+ bool sendStore(PacketPtr data_pkt);
+
/** Increments the given store index (circular queue). */
inline void incrStIdx(int &store_idx);
/** Decrements the given store index (circular queue). */
@@ -254,7 +260,8 @@ class LSQUnit {
public:
/** Default constructor. */
LSQSenderState()
- : noWB(false)
+ : noWB(false), isSplit(false), pktToSend(false), outstanding(1),
+ mainPkt(NULL), pendingPacket(NULL)
{ }
/** Instruction who initiated the access to memory. */
@@ -265,6 +272,19 @@ class LSQUnit {
int idx;
/** Whether or not the instruction will need to writeback. */
bool noWB;
+ /** Whether or not this access is split in two. */
+ bool isSplit;
+ /** Whether or not there is a packet that needs sending. */
+ bool pktToSend;
+ /** Number of outstanding packets to complete. */
+ int outstanding;
+ /** The main packet from a split load, used during writeback. */
+ PacketPtr mainPkt;
+ /** A second packet from a split store that needs sending. */
+ PacketPtr pendingPacket;
+
+ /** Completes a packet and returns whether the access is finished. */
+ inline bool complete() { return --outstanding == 0; }
};
/** Writeback event, specifically for when stores forward data to loads. */
@@ -302,8 +322,8 @@ class LSQUnit {
/** Constructs a store queue entry for a given instruction. */
SQEntry(DynInstPtr &_inst)
- : inst(_inst), req(NULL), size(0),
- canWB(0), committed(0), completed(0)
+ : inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0),
+ isSplit(0), canWB(0), committed(0), completed(0)
{
std::memset(data, 0, sizeof(data));
}
@@ -312,10 +332,15 @@ class LSQUnit {
DynInstPtr inst;
/** The request for the store. */
RequestPtr req;
+ /** The split requests for the store. */
+ RequestPtr sreqLow;
+ RequestPtr sreqHigh;
/** The size of the store. */
int size;
/** The store data. */
char data[sizeof(IntReg)];
+ /** Whether or not the store is split into two requests. */
+ bool isSplit;
/** Whether or not the store can writeback. */
bool canWB;
/** Whether or not the store is committed. */
@@ -406,6 +431,13 @@ class LSQUnit {
/** The oldest load that caused a memory ordering violation. */
DynInstPtr memDepViolator;
+ /** Whether or not there is a packet that couldn't be sent because of
+ * a lack of cache ports. */
+ bool hasPendingPkt;
+
+ /** The packet that is pending free cache ports. */
+ PacketPtr pendingPkt;
+
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
@@ -443,11 +475,13 @@ class LSQUnit {
public:
/** Executes the load at the given index. */
template <class T>
- Fault read(Request *req, T &data, int load_idx);
+ Fault read(Request *req, Request *sreqLow, Request *sreqHigh, T &data,
+ int load_idx);
/** Executes the store at the given index. */
template <class T>
- Fault write(Request *req, T &data, int store_idx);
+ Fault write(Request *req, Request *sreqLow, Request *sreqHigh, T &data,
+ int store_idx);
/** Returns the index of the head load instruction. */
int getLoadHead() { return loadHead; }
@@ -482,7 +516,8 @@ class LSQUnit {
template <class Impl>
template <class T>
Fault
-LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
+LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
+ T &data, int load_idx)
{
DynInstPtr load_inst = loadQueue[load_idx];
@@ -503,6 +538,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// memory. This is quite ugly. @todo: Figure out the proper
// place to really handle request deletes.
delete req;
+ if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ delete sreqLow;
+ delete sreqHigh;
+ }
return TheISA::genMachineCheckFault();
}
@@ -512,10 +551,12 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
int store_size = 0;
DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
- "storeHead: %i addr: %#x\n",
- load_idx, store_idx, storeHead, req->getPaddr());
+ "storeHead: %i addr: %#x%s\n",
+ load_idx, store_idx, storeHead, req->getPaddr(),
+ sreqLow ? " split" : "");
if (req->isLLSC()) {
+ assert(!sreqLow);
// Disable recording the result temporarily. Writing to misc
// regs normally updates the result, but this is not the
// desired behavior when handling store conditionals.
@@ -587,6 +628,12 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// @todo: Need to make this a parameter.
cpu->schedule(wb, curTick);
+ // Don't need to do anything special for split loads.
+ if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ delete sreqLow;
+ delete sreqHigh;
+ }
+
++lsqForwLoads;
return NoFault;
} else if ((store_has_lower_limit && lower_load_has_store_part) ||
@@ -630,6 +677,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// memory. This is quite ugly. @todo: Figure out the
// proper place to really handle request deletes.
delete req;
+ if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ delete sreqLow;
+ delete sreqHigh;
+ }
return NoFault;
}
@@ -645,12 +696,14 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
++usedPorts;
// if we the cache is not blocked, do cache access
+ bool completedFirst = false;
if (!lsq->cacheBlocked()) {
- PacketPtr data_pkt =
- new Packet(req,
- (req->isLLSC() ?
- MemCmd::LoadLockedReq : MemCmd::ReadReq),
- Packet::Broadcast);
+ MemCmd command =
+ req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
+ PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast);
+ PacketPtr fst_data_pkt = NULL;
+ PacketPtr snd_data_pkt = NULL;
+
data_pkt->dataStatic(load_inst->memData);
LSQSenderState *state = new LSQSenderState;
@@ -659,18 +712,66 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
state->inst = load_inst;
data_pkt->senderState = state;
- if (!dcachePort->sendTiming(data_pkt)) {
+ if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
+
+ // Point the first packet at the main data packet.
+ fst_data_pkt = data_pkt;
+ } else {
+
+ // Create the split packets.
+ fst_data_pkt = new Packet(sreqLow, command, Packet::Broadcast);
+ snd_data_pkt = new Packet(sreqHigh, command, Packet::Broadcast);
+
+ fst_data_pkt->dataStatic(load_inst->memData);
+ snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
+
+ fst_data_pkt->senderState = state;
+ snd_data_pkt->senderState = state;
+
+ state->isSplit = true;
+ state->outstanding = 2;
+ state->mainPkt = data_pkt;
+ }
+
+ if (!dcachePort->sendTiming(fst_data_pkt)) {
// Delete state and data packet because a load retry
// initiates a pipeline restart; it does not retry.
delete state;
delete data_pkt->req;
delete data_pkt;
+ if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ delete fst_data_pkt->req;
+ delete fst_data_pkt;
+ delete snd_data_pkt->req;
+ delete snd_data_pkt;
+ }
req = NULL;
// If the access didn't succeed, tell the LSQ by setting
// the retry thread id.
lsq->setRetryTid(lsqID);
+ } else if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ completedFirst = true;
+
+ // The first packet was sent without problems, so send this one
+ // too. If there is a problem with this packet then the whole
+ // load will be squashed, so indicate this to the state object.
+ // The first packet will return in completeDataAccess and be
+ // handled there.
+ ++usedPorts;
+ if (!dcachePort->sendTiming(snd_data_pkt)) {
+
+ // The main packet will be deleted in completeDataAccess.
+ delete snd_data_pkt->req;
+ delete snd_data_pkt;
+
+ state->complete();
+
+ req = NULL;
+
+ lsq->setRetryTid(lsqID);
+ }
}
}
@@ -679,6 +780,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
if (lsq->cacheBlocked()) {
if (req)
delete req;
+ if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) {
+ delete sreqLow;
+ delete sreqHigh;
+ }
++lsqCacheBlocked;
@@ -703,7 +808,8 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
template <class Impl>
template <class T>
Fault
-LSQUnit<Impl>::write(Request *req, T &data, int store_idx)
+LSQUnit<Impl>::write(Request *req, Request *sreqLow, Request *sreqHigh,
+ T &data, int store_idx)
{
assert(storeQueue[store_idx].inst);
@@ -713,6 +819,8 @@ LSQUnit<Impl>::write(Request *req, T &data, int store_idx)
storeQueue[store_idx].inst->seqNum);
storeQueue[store_idx].req = req;
+ storeQueue[store_idx].sreqLow = sreqLow;
+ storeQueue[store_idx].sreqHigh = sreqHigh;
storeQueue[store_idx].size = sizeof(T);
assert(sizeof(T) <= sizeof(storeQueue[store_idx].data));
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 9ee1de45a..fcc57ab09 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -85,11 +85,23 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
assert(!pkt->wasNacked());
+ // If this is a split access, wait until all packets are received.
+ if (TheISA::HasUnalignedMemAcc && !state->complete()) {
+ delete pkt->req;
+ delete pkt;
+ return;
+ }
+
if (isSwitchedOut() || inst->isSquashed()) {
iewStage->decrWb(inst->seqNum);
} else {
if (!state->noWB) {
- writeback(inst, pkt);
+ if (!TheISA::HasUnalignedMemAcc || !state->isSplit ||
+ !state->isLoad) {
+ writeback(inst, pkt);
+ } else {
+ writeback(inst, state->mainPkt);
+ }
}
if (inst->isStore()) {
@@ -97,6 +109,10 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
}
}
+ if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) {
+ delete state->mainPkt->req;
+ delete state->mainPkt;
+ }
delete state;
delete pkt->req;
delete pkt;
@@ -106,7 +122,7 @@ template <class Impl>
LSQUnit<Impl>::LSQUnit()
: loads(0), stores(0), storesToWB(0), stalled(false),
isStoreBlocked(false), isLoadBlocked(false),
- loadBlockedHandled(false)
+ loadBlockedHandled(false), hasPendingPkt(false)
{
}
@@ -605,8 +621,30 @@ LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
template <class Impl>
void
+LSQUnit<Impl>::writebackPendingStore()
+{
+ if (hasPendingPkt) {
+ assert(pendingPkt != NULL);
+
+ // If the cache is blocked, this will store the packet for retry.
+ if (sendStore(pendingPkt)) {
+ storePostSend(pendingPkt);
+ }
+ pendingPkt = NULL;
+ hasPendingPkt = false;
+ }
+}
+
+template <class Impl>
+void
LSQUnit<Impl>::writebackStores()
{
+ // First writeback the second packet from any split store that didn't
+ // complete last cycle because there weren't enough cache ports available.
+ if (TheISA::HasUnalignedMemAcc) {
+ writebackPendingStore();
+ }
+
while (storesToWB > 0 &&
storeWBIdx != storeTail &&
storeQueue[storeWBIdx].inst &&
@@ -640,6 +678,11 @@ LSQUnit<Impl>::writebackStores()
assert(storeQueue[storeWBIdx].req);
assert(!storeQueue[storeWBIdx].committed);
+ if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) {
+ assert(storeQueue[storeWBIdx].sreqLow);
+ assert(storeQueue[storeWBIdx].sreqHigh);
+ }
+
DynInstPtr inst = storeQueue[storeWBIdx].inst;
Request *req = storeQueue[storeWBIdx].req;
@@ -653,15 +696,41 @@ LSQUnit<Impl>::writebackStores()
MemCmd command =
req->isSwap() ? MemCmd::SwapReq :
(req->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq);
- PacketPtr data_pkt = new Packet(req, command,
- Packet::Broadcast);
- data_pkt->dataStatic(inst->memData);
+ PacketPtr data_pkt;
+ PacketPtr snd_data_pkt = NULL;
LSQSenderState *state = new LSQSenderState;
state->isLoad = false;
state->idx = storeWBIdx;
state->inst = inst;
- data_pkt->senderState = state;
+
+ if (!TheISA::HasUnalignedMemAcc || !storeQueue[storeWBIdx].isSplit) {
+
+ // Build a single data packet if the store isn't split.
+ data_pkt = new Packet(req, command, Packet::Broadcast);
+ data_pkt->dataStatic(inst->memData);
+ data_pkt->senderState = state;
+ } else {
+ RequestPtr sreqLow = storeQueue[storeWBIdx].sreqLow;
+ RequestPtr sreqHigh = storeQueue[storeWBIdx].sreqHigh;
+
+ // Create two packets if the store is split in two.
+ data_pkt = new Packet(sreqLow, command, Packet::Broadcast);
+ snd_data_pkt = new Packet(sreqHigh, command, Packet::Broadcast);
+
+ data_pkt->dataStatic(inst->memData);
+ snd_data_pkt->dataStatic(inst->memData + sreqLow->getSize());
+
+ data_pkt->senderState = state;
+ snd_data_pkt->senderState = state;
+
+ state->isSplit = true;
+ state->outstanding = 2;
+
+ // Can delete the main request now.
+ delete req;
+ req = sreqLow;
+ }
DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
"to Addr:%#x, data:%#x [sn:%lli]\n",
@@ -671,6 +740,7 @@ LSQUnit<Impl>::writebackStores()
// @todo: Remove this SC hack once the memory system handles it.
if (inst->isStoreConditional()) {
+ assert(!storeQueue[storeWBIdx].isSplit);
// Disable recording the result temporarily. Writing to
// misc regs normally updates the result, but this is not
// the desired behavior when handling store conditionals.
@@ -694,18 +764,44 @@ LSQUnit<Impl>::writebackStores()
state->noWB = true;
}
- if (!dcachePort->sendTiming(data_pkt)) {
- // Need to handle becoming blocked on a store.
+ if (!sendStore(data_pkt)) {
DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
"retry later\n",
inst->seqNum);
- isStoreBlocked = true;
- ++lsqCacheBlocked;
- assert(retryPkt == NULL);
- retryPkt = data_pkt;
- lsq->setRetryTid(lsqID);
+
+ // Need to store the second packet, if split.
+ if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) {
+ state->pktToSend = true;
+ state->pendingPacket = snd_data_pkt;
+ }
} else {
- storePostSend(data_pkt);
+
+ // If split, try to send the second packet too
+ if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) {
+ assert(snd_data_pkt);
+
+ // Ensure there are enough ports to use.
+ if (usedPorts < cachePorts) {
+ ++usedPorts;
+ if (sendStore(snd_data_pkt)) {
+ storePostSend(snd_data_pkt);
+ } else {
+ DPRINTF(IEW, "D-Cache became blocked when writing"
+ " [sn:%lli] second packet, will retry later\n",
+ inst->seqNum);
+ }
+ } else {
+
+ // Store the packet for when there's free ports.
+ assert(pendingPkt == NULL);
+ pendingPkt = snd_data_pkt;
+ hasPendingPkt = true;
+ }
+ } else {
+
+ // Not a split store.
+ storePostSend(data_pkt);
+ }
}
}
@@ -808,6 +904,13 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
// memory. This is quite ugly. @todo: Figure out the proper
// place to really handle request deletes.
delete storeQueue[store_idx].req;
+ if (TheISA::HasUnalignedMemAcc && storeQueue[store_idx].isSplit) {
+ delete storeQueue[store_idx].sreqLow;
+ delete storeQueue[store_idx].sreqHigh;
+
+ storeQueue[store_idx].sreqLow = NULL;
+ storeQueue[store_idx].sreqHigh = NULL;
+ }
storeQueue[store_idx].req = NULL;
--stores;
@@ -927,6 +1030,22 @@ LSQUnit<Impl>::completeStore(int store_idx)
}
template <class Impl>
+bool
+LSQUnit<Impl>::sendStore(PacketPtr data_pkt)
+{
+ if (!dcachePort->sendTiming(data_pkt)) {
+ // Need to handle becoming blocked on a store.
+ isStoreBlocked = true;
+ ++lsqCacheBlocked;
+ assert(retryPkt == NULL);
+ retryPkt = data_pkt;
+ lsq->setRetryTid(lsqID);
+ return false;
+ }
+ return true;
+}
+
+template <class Impl>
void
LSQUnit<Impl>::recvRetry()
{
@@ -935,10 +1054,24 @@ LSQUnit<Impl>::recvRetry()
assert(retryPkt != NULL);
if (dcachePort->sendTiming(retryPkt)) {
- storePostSend(retryPkt);
+ LSQSenderState *state =
+ dynamic_cast<LSQSenderState *>(retryPkt->senderState);
+
+ // Don't finish the store unless this is the last packet.
+ if (!TheISA::HasUnalignedMemAcc || !state->pktToSend) {
+ storePostSend(retryPkt);
+ }
retryPkt = NULL;
isStoreBlocked = false;
lsq->setRetryTid(InvalidThreadID);
+
+ // Send any outstanding packet.
+ if (TheISA::HasUnalignedMemAcc && state->pktToSend) {
+ assert(state->pendingPacket);
+ if (sendStore(state->pendingPacket)) {
+ storePostSend(state->pendingPacket);
+ }
+ }
} else {
// Still blocked!
++lsqCacheBlocked;