diff options
Diffstat (limited to 'src/cpu/o3')
-rw-r--r-- | src/cpu/o3/cpu.hh | 15 | ||||
-rw-r--r-- | src/cpu/o3/lsq.hh | 22 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit.hh | 138 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit_impl.hh | 163 |
4 files changed, 296 insertions, 42 deletions
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 2ea918983..82d4ca25b 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -703,18 +703,25 @@ class FullO3CPU : public BaseO3CPU /** CPU read function, forwards read to LSQ. */ template <class T> - Fault read(RequestPtr &req, T &data, int load_idx) + Fault read(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh, + T &data, int load_idx) { - return this->iew.ldstQueue.read(req, data, load_idx); + return this->iew.ldstQueue.read(req, sreqLow, sreqHigh, + data, load_idx); } /** CPU write function, forwards write to LSQ. */ template <class T> - Fault write(RequestPtr &req, T &data, int store_idx) + Fault write(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh, + T &data, int store_idx) { - return this->iew.ldstQueue.write(req, data, store_idx); + return this->iew.ldstQueue.write(req, sreqLow, sreqHigh, + data, store_idx); } + /** Get the dcache port (used to find block size for translations). */ + Port *getDcachePort() { return this->iew.ldstQueue.getDcachePort(); } + Addr lockAddr; /** Temporary fix for the lock flag, works in the UP case. */ diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index a0bae058c..7a7ea917f 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -270,15 +270,19 @@ class LSQ { void dumpInsts(ThreadID tid) { thread[tid].dumpInsts(); } - /** Executes a read operation, using the load specified at the load index. */ + /** Executes a read operation, using the load specified at the load + * index. + */ template <class T> - Fault read(RequestPtr req, T &data, int load_idx); + Fault read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, + T &data, int load_idx); /** Executes a store operation, using the store specified at the store - * index. + * index. */ template <class T> - Fault write(RequestPtr req, T &data, int store_idx); + Fault write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, + T &data, int store_idx); /** The CPU pointer. */ O3CPU *cpu; @@ -369,21 +373,23 @@ class LSQ { template <class Impl> template <class T> Fault -LSQ<Impl>::read(RequestPtr req, T &data, int load_idx) +LSQ<Impl>::read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, + T &data, int load_idx) { ThreadID tid = req->threadId(); - return thread[tid].read(req, data, load_idx); + return thread[tid].read(req, sreqLow, sreqHigh, data, load_idx); } template <class Impl> template <class T> Fault -LSQ<Impl>::write(RequestPtr req, T &data, int store_idx) +LSQ<Impl>::write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, + T &data, int store_idx) { ThreadID tid = req->threadId(); - return thread[tid].write(req, data, store_idx); + return thread[tid].write(req, sreqLow, sreqHigh, data, store_idx); } #endif // __CPU_O3_LSQ_HH__ diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 6ff36d929..cf51f8eab 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -216,12 +216,18 @@ class LSQUnit { /** Writes back the instruction, sending it to IEW. */ void writeback(DynInstPtr &inst, PacketPtr pkt); + /** Writes back a store that couldn't be completed the previous cycle. */ + void writebackPendingStore(); + /** Handles completing the send of a store to memory. */ void storePostSend(PacketPtr pkt); /** Completes the store at the specified index. */ void completeStore(int store_idx); + /** Attempts to send a store to the cache. */ + bool sendStore(PacketPtr data_pkt); + /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx); /** Decrements the given store index (circular queue). */ @@ -254,7 +260,8 @@ class LSQUnit { public: /** Default constructor. */ LSQSenderState() - : noWB(false) + : noWB(false), isSplit(false), pktToSend(false), outstanding(1), + mainPkt(NULL), pendingPacket(NULL) { } /** Instruction who initiated the access to memory. */ @@ -265,6 +272,19 @@ class LSQUnit { int idx; /** Whether or not the instruction will need to writeback. */ bool noWB; + /** Whether or not this access is split in two. */ + bool isSplit; + /** Whether or not there is a packet that needs sending. */ + bool pktToSend; + /** Number of outstanding packets to complete. */ + int outstanding; + /** The main packet from a split load, used during writeback. */ + PacketPtr mainPkt; + /** A second packet from a split store that needs sending. */ + PacketPtr pendingPacket; + + /** Completes a packet and returns whether the access is finished. */ + inline bool complete() { return --outstanding == 0; } }; /** Writeback event, specifically for when stores forward data to loads. */ @@ -302,8 +322,8 @@ class LSQUnit { /** Constructs a store queue entry for a given instruction. */ SQEntry(DynInstPtr &_inst) - : inst(_inst), req(NULL), size(0), - canWB(0), committed(0), completed(0) + : inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0), + isSplit(0), canWB(0), committed(0), completed(0) { std::memset(data, 0, sizeof(data)); } @@ -312,10 +332,15 @@ class LSQUnit { DynInstPtr inst; /** The request for the store. */ RequestPtr req; + /** The split requests for the store. */ + RequestPtr sreqLow; + RequestPtr sreqHigh; /** The size of the store. */ int size; /** The store data. */ char data[sizeof(IntReg)]; + /** Whether or not the store is split into two requests. */ + bool isSplit; /** Whether or not the store can writeback. */ bool canWB; /** Whether or not the store is committed. */ @@ -406,6 +431,13 @@ class LSQUnit { /** The oldest load that caused a memory ordering violation. */ DynInstPtr memDepViolator; + /** Whether or not there is a packet that couldn't be sent because of + * a lack of cache ports. */ + bool hasPendingPkt; + + /** The packet that is pending free cache ports. */ + PacketPtr pendingPkt; + // Will also need how many read/write ports the Dcache has. Or keep track // of that in stage that is one level up, and only call executeLoad/Store // the appropriate number of times. @@ -443,11 +475,13 @@ class LSQUnit { public: /** Executes the load at the given index. */ template <class T> - Fault read(Request *req, T &data, int load_idx); + Fault read(Request *req, Request *sreqLow, Request *sreqHigh, T &data, + int load_idx); /** Executes the store at the given index. */ template <class T> - Fault write(Request *req, T &data, int store_idx); + Fault write(Request *req, Request *sreqLow, Request *sreqHigh, T &data, + int store_idx); /** Returns the index of the head load instruction. */ int getLoadHead() { return loadHead; } @@ -482,7 +516,8 @@ class LSQUnit { template <class Impl> template <class T> Fault -LSQUnit<Impl>::read(Request *req, T &data, int load_idx) +LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh, + T &data, int load_idx) { DynInstPtr load_inst = loadQueue[load_idx]; @@ -503,6 +538,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // memory. This is quite ugly. @todo: Figure out the proper // place to really handle request deletes. delete req; + if (TheISA::HasUnalignedMemAcc && sreqLow) { + delete sreqLow; + delete sreqHigh; + } return TheISA::genMachineCheckFault(); } @@ -512,10 +551,12 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) int store_size = 0; DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " - "storeHead: %i addr: %#x\n", - load_idx, store_idx, storeHead, req->getPaddr()); + "storeHead: %i addr: %#x%s\n", + load_idx, store_idx, storeHead, req->getPaddr(), + sreqLow ? " split" : ""); if (req->isLLSC()) { + assert(!sreqLow); // Disable recording the result temporarily. Writing to misc // regs normally updates the result, but this is not the // desired behavior when handling store conditionals. @@ -587,6 +628,12 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // @todo: Need to make this a parameter. cpu->schedule(wb, curTick); + // Don't need to do anything special for split loads. + if (TheISA::HasUnalignedMemAcc && sreqLow) { + delete sreqLow; + delete sreqHigh; + } + ++lsqForwLoads; return NoFault; } else if ((store_has_lower_limit && lower_load_has_store_part) || @@ -630,6 +677,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // memory. This is quite ugly. @todo: Figure out the // proper place to really handle request deletes. delete req; + if (TheISA::HasUnalignedMemAcc && sreqLow) { + delete sreqLow; + delete sreqHigh; + } return NoFault; } @@ -645,12 +696,14 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) ++usedPorts; // if we the cache is not blocked, do cache access + bool completedFirst = false; if (!lsq->cacheBlocked()) { - PacketPtr data_pkt = - new Packet(req, - (req->isLLSC() ? - MemCmd::LoadLockedReq : MemCmd::ReadReq), - Packet::Broadcast); + MemCmd command = + req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq; + PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast); + PacketPtr fst_data_pkt = NULL; + PacketPtr snd_data_pkt = NULL; + data_pkt->dataStatic(load_inst->memData); LSQSenderState *state = new LSQSenderState; @@ -659,18 +712,66 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) state->inst = load_inst; data_pkt->senderState = state; - if (!dcachePort->sendTiming(data_pkt)) { + if (!TheISA::HasUnalignedMemAcc || !sreqLow) { + + // Point the first packet at the main data packet. + fst_data_pkt = data_pkt; + } else { + + // Create the split packets. + fst_data_pkt = new Packet(sreqLow, command, Packet::Broadcast); + snd_data_pkt = new Packet(sreqHigh, command, Packet::Broadcast); + + fst_data_pkt->dataStatic(load_inst->memData); + snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize()); + + fst_data_pkt->senderState = state; + snd_data_pkt->senderState = state; + + state->isSplit = true; + state->outstanding = 2; + state->mainPkt = data_pkt; + } + + if (!dcachePort->sendTiming(fst_data_pkt)) { // Delete state and data packet because a load retry // initiates a pipeline restart; it does not retry. delete state; delete data_pkt->req; delete data_pkt; + if (TheISA::HasUnalignedMemAcc && sreqLow) { + delete fst_data_pkt->req; + delete fst_data_pkt; + delete snd_data_pkt->req; + delete snd_data_pkt; + } req = NULL; // If the access didn't succeed, tell the LSQ by setting // the retry thread id. lsq->setRetryTid(lsqID); + } else if (TheISA::HasUnalignedMemAcc && sreqLow) { + completedFirst = true; + + // The first packet was sent without problems, so send this one + // too. If there is a problem with this packet then the whole + // load will be squashed, so indicate this to the state object. + // The first packet will return in completeDataAccess and be + // handled there. + ++usedPorts; + if (!dcachePort->sendTiming(snd_data_pkt)) { + + // The main packet will be deleted in completeDataAccess. + delete snd_data_pkt->req; + delete snd_data_pkt; + + state->complete(); + + req = NULL; + + lsq->setRetryTid(lsqID); + } } } @@ -679,6 +780,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) if (lsq->cacheBlocked()) { if (req) delete req; + if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) { + delete sreqLow; + delete sreqHigh; + } ++lsqCacheBlocked; @@ -703,7 +808,8 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) template <class Impl> template <class T> Fault -LSQUnit<Impl>::write(Request *req, T &data, int store_idx) +LSQUnit<Impl>::write(Request *req, Request *sreqLow, Request *sreqHigh, + T &data, int store_idx) { assert(storeQueue[store_idx].inst); @@ -713,6 +819,8 @@ LSQUnit<Impl>::write(Request *req, T &data, int store_idx) storeQueue[store_idx].inst->seqNum); storeQueue[store_idx].req = req; + storeQueue[store_idx].sreqLow = sreqLow; + storeQueue[store_idx].sreqHigh = sreqHigh; storeQueue[store_idx].size = sizeof(T); assert(sizeof(T) <= sizeof(storeQueue[store_idx].data)); diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 9ee1de45a..fcc57ab09 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -85,11 +85,23 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) assert(!pkt->wasNacked()); + // If this is a split access, wait until all packets are received. + if (TheISA::HasUnalignedMemAcc && !state->complete()) { + delete pkt->req; + delete pkt; + return; + } + if (isSwitchedOut() || inst->isSquashed()) { iewStage->decrWb(inst->seqNum); } else { if (!state->noWB) { - writeback(inst, pkt); + if (!TheISA::HasUnalignedMemAcc || !state->isSplit || + !state->isLoad) { + writeback(inst, pkt); + } else { + writeback(inst, state->mainPkt); + } } if (inst->isStore()) { @@ -97,6 +109,10 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) } } + if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) { + delete state->mainPkt->req; + delete state->mainPkt; + } delete state; delete pkt->req; delete pkt; @@ -106,7 +122,7 @@ template <class Impl> LSQUnit<Impl>::LSQUnit() : loads(0), stores(0), storesToWB(0), stalled(false), isStoreBlocked(false), isLoadBlocked(false), - loadBlockedHandled(false) + loadBlockedHandled(false), hasPendingPkt(false) { } @@ -605,8 +621,30 @@ LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst) template <class Impl> void +LSQUnit<Impl>::writebackPendingStore() +{ + if (hasPendingPkt) { + assert(pendingPkt != NULL); + + // If the cache is blocked, this will store the packet for retry. + if (sendStore(pendingPkt)) { + storePostSend(pendingPkt); + } + pendingPkt = NULL; + hasPendingPkt = false; + } +} + +template <class Impl> +void LSQUnit<Impl>::writebackStores() { + // First writeback the second packet from any split store that didn't + // complete last cycle because there weren't enough cache ports available. + if (TheISA::HasUnalignedMemAcc) { + writebackPendingStore(); + } + while (storesToWB > 0 && storeWBIdx != storeTail && storeQueue[storeWBIdx].inst && @@ -640,6 +678,11 @@ LSQUnit<Impl>::writebackStores() assert(storeQueue[storeWBIdx].req); assert(!storeQueue[storeWBIdx].committed); + if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) { + assert(storeQueue[storeWBIdx].sreqLow); + assert(storeQueue[storeWBIdx].sreqHigh); + } + DynInstPtr inst = storeQueue[storeWBIdx].inst; Request *req = storeQueue[storeWBIdx].req; @@ -653,15 +696,41 @@ LSQUnit<Impl>::writebackStores() MemCmd command = req->isSwap() ? MemCmd::SwapReq : (req->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq); - PacketPtr data_pkt = new Packet(req, command, - Packet::Broadcast); - data_pkt->dataStatic(inst->memData); + PacketPtr data_pkt; + PacketPtr snd_data_pkt = NULL; LSQSenderState *state = new LSQSenderState; state->isLoad = false; state->idx = storeWBIdx; state->inst = inst; - data_pkt->senderState = state; + + if (!TheISA::HasUnalignedMemAcc || !storeQueue[storeWBIdx].isSplit) { + + // Build a single data packet if the store isn't split. + data_pkt = new Packet(req, command, Packet::Broadcast); + data_pkt->dataStatic(inst->memData); + data_pkt->senderState = state; + } else { + RequestPtr sreqLow = storeQueue[storeWBIdx].sreqLow; + RequestPtr sreqHigh = storeQueue[storeWBIdx].sreqHigh; + + // Create two packets if the store is split in two. + data_pkt = new Packet(sreqLow, command, Packet::Broadcast); + snd_data_pkt = new Packet(sreqHigh, command, Packet::Broadcast); + + data_pkt->dataStatic(inst->memData); + snd_data_pkt->dataStatic(inst->memData + sreqLow->getSize()); + + data_pkt->senderState = state; + snd_data_pkt->senderState = state; + + state->isSplit = true; + state->outstanding = 2; + + // Can delete the main request now. + delete req; + req = sreqLow; + } DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " "to Addr:%#x, data:%#x [sn:%lli]\n", @@ -671,6 +740,7 @@ LSQUnit<Impl>::writebackStores() // @todo: Remove this SC hack once the memory system handles it. if (inst->isStoreConditional()) { + assert(!storeQueue[storeWBIdx].isSplit); // Disable recording the result temporarily. Writing to // misc regs normally updates the result, but this is not // the desired behavior when handling store conditionals. @@ -694,18 +764,44 @@ LSQUnit<Impl>::writebackStores() state->noWB = true; } - if (!dcachePort->sendTiming(data_pkt)) { - // Need to handle becoming blocked on a store. + if (!sendStore(data_pkt)) { DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will" "retry later\n", inst->seqNum); - isStoreBlocked = true; - ++lsqCacheBlocked; - assert(retryPkt == NULL); - retryPkt = data_pkt; - lsq->setRetryTid(lsqID); + + // Need to store the second packet, if split. + if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) { + state->pktToSend = true; + state->pendingPacket = snd_data_pkt; + } } else { - storePostSend(data_pkt); + + // If split, try to send the second packet too + if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) { + assert(snd_data_pkt); + + // Ensure there are enough ports to use. + if (usedPorts < cachePorts) { + ++usedPorts; + if (sendStore(snd_data_pkt)) { + storePostSend(snd_data_pkt); + } else { + DPRINTF(IEW, "D-Cache became blocked when writing" + " [sn:%lli] second packet, will retry later\n", + inst->seqNum); + } + } else { + + // Store the packet for when there's free ports. + assert(pendingPkt == NULL); + pendingPkt = snd_data_pkt; + hasPendingPkt = true; + } + } else { + + // Not a split store. + storePostSend(data_pkt); + } } } @@ -808,6 +904,13 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) // memory. This is quite ugly. @todo: Figure out the proper // place to really handle request deletes. delete storeQueue[store_idx].req; + if (TheISA::HasUnalignedMemAcc && storeQueue[store_idx].isSplit) { + delete storeQueue[store_idx].sreqLow; + delete storeQueue[store_idx].sreqHigh; + + storeQueue[store_idx].sreqLow = NULL; + storeQueue[store_idx].sreqHigh = NULL; + } storeQueue[store_idx].req = NULL; --stores; @@ -927,6 +1030,22 @@ LSQUnit<Impl>::completeStore(int store_idx) } template <class Impl> +bool +LSQUnit<Impl>::sendStore(PacketPtr data_pkt) +{ + if (!dcachePort->sendTiming(data_pkt)) { + // Need to handle becoming blocked on a store. + isStoreBlocked = true; + ++lsqCacheBlocked; + assert(retryPkt == NULL); + retryPkt = data_pkt; + lsq->setRetryTid(lsqID); + return false; + } + return true; +} + +template <class Impl> void LSQUnit<Impl>::recvRetry() { @@ -935,10 +1054,24 @@ LSQUnit<Impl>::recvRetry() assert(retryPkt != NULL); if (dcachePort->sendTiming(retryPkt)) { - storePostSend(retryPkt); + LSQSenderState *state = + dynamic_cast<LSQSenderState *>(retryPkt->senderState); + + // Don't finish the store unless this is the last packet. + if (!TheISA::HasUnalignedMemAcc || !state->pktToSend) { + storePostSend(retryPkt); + } retryPkt = NULL; isStoreBlocked = false; lsq->setRetryTid(InvalidThreadID); + + // Send any outstanding packet. + if (TheISA::HasUnalignedMemAcc && state->pktToSend) { + assert(state->pendingPacket); + if (sendStore(state->pendingPacket)) { + storePostSend(state->pendingPacket); + } + } } else { // Still blocked! ++lsqCacheBlocked; |