summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/cpu.cc1
-rw-r--r--src/cpu/o3/cpu.hh25
-rw-r--r--src/cpu/o3/iew_impl.hh16
-rw-r--r--src/cpu/o3/inst_queue_impl.hh7
-rw-r--r--src/cpu/o3/lsq.hh773
-rw-r--r--src/cpu/o3/lsq_impl.hh537
-rw-r--r--src/cpu/o3/lsq_unit.hh852
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh762
-rw-r--r--src/cpu/o3/probe/elastic_trace.cc2
9 files changed, 1903 insertions, 1072 deletions
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 600c89aa5..7261f0c9e 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -850,7 +850,6 @@ FullO3CPU<Impl>::insertThread(ThreadID tid)
//Reset ROB/IQ/LSQ Entries
commit.rob->resetEntries();
- iew.resetEntries();
}
template <class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 90024bc84..1159850f8 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2013, 2016 ARM Limited
+ * Copyright (c) 2011-2013, 2016-2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -125,6 +125,7 @@ class FullO3CPU : public BaseO3CPU
BaseTLB *itb;
BaseTLB *dtb;
+ using LSQRequest = typename LSQ<Impl>::LSQRequest;
/** Overall CPU status. */
Status _status;
@@ -733,21 +734,25 @@ class FullO3CPU : public BaseO3CPU
/** Available thread ids in the cpu*/
std::vector<ThreadID> tids;
+ /** CPU pushRequest function, forwards request to LSQ. */
+ Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
+ unsigned int size, Addr addr, Request::Flags flags,
+ uint64_t *res)
+ {
+ return iew.ldstQueue.pushRequest(inst, isLoad, data, size, addr,
+ flags, res);
+ }
+
/** CPU read function, forwards read to LSQ. */
- Fault read(const RequestPtr &req,
- RequestPtr &sreqLow, RequestPtr &sreqHigh,
- int load_idx)
+ Fault read(LSQRequest* req, int load_idx)
{
- return this->iew.ldstQueue.read(req, sreqLow, sreqHigh, load_idx);
+ return this->iew.ldstQueue.read(req, load_idx);
}
/** CPU write function, forwards write to LSQ. */
- Fault write(const RequestPtr &req,
- const RequestPtr &sreqLow, const RequestPtr &sreqHigh,
- uint8_t *data, int store_idx)
+ Fault write(LSQRequest* req, uint8_t *data, int store_idx)
{
- return this->iew.ldstQueue.write(req, sreqLow, sreqHigh,
- data, store_idx);
+ return this->iew.ldstQueue.write(req, data, store_idx);
}
/** Used by the fetch unit to get a hold of the instruction port. */
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index e706b09a1..3d5d84886 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2013 ARM Limited
+ * Copyright (c) 2010-2013, 2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved.
*
@@ -744,14 +744,6 @@ DefaultIEW<Impl>::updateStatus()
}
template <class Impl>
-void
-DefaultIEW<Impl>::resetEntries()
-{
- instQueue.resetEntries();
- ldstQueue.resetEntries();
-}
-
-template <class Impl>
bool
DefaultIEW<Impl>::checkStall(ThreadID tid)
{
@@ -1353,7 +1345,7 @@ DefaultIEW<Impl>::executeInsts()
DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: %s "
"[sn:%lli], inst PC: %s [sn:%lli]. Addr is: %#x.\n",
violator->pcState(), violator->seqNum,
- inst->pcState(), inst->seqNum, inst->physEffAddrLow);
+ inst->pcState(), inst->seqNum, inst->physEffAddr);
fetchRedirect[tid] = true;
@@ -1376,7 +1368,7 @@ DefaultIEW<Impl>::executeInsts()
DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: "
"%s, inst PC: %s. Addr is: %#x.\n",
violator->pcState(), inst->pcState(),
- inst->physEffAddrLow);
+ inst->physEffAddr);
DPRINTF(IEW, "Violation will not be handled because "
"already squashing\n");
@@ -1460,6 +1452,8 @@ DefaultIEW<Impl>::tick()
wroteToTimeBuffer = false;
updatedQueues = false;
+ ldstQueue.tick();
+
sortInsts();
// Free function units marked as being freed this cycle.
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index a8895f8ff..4a55a91ea 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2014 ARM Limited
+ * Copyright (c) 2011-2014, 2017-2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved.
*
@@ -1140,9 +1140,6 @@ template <class Impl>
void
InstructionQueue<Impl>::blockMemInst(const DynInstPtr &blocked_inst)
{
- blocked_inst->translationStarted(false);
- blocked_inst->translationCompleted(false);
-
blocked_inst->clearIssued();
blocked_inst->clearCanIssue();
blockedMemInsts.push_back(blocked_inst);
@@ -1285,9 +1282,9 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
squashed_inst);
}
-
++iqSquashedOperandsExamined;
}
+
} else if (!squashed_inst->isStoreConditional() ||
!squashed_inst->isCompleted()) {
NonSpecMapIt ns_inst_it =
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 2b2d39bf7..003726c7c 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012, 2014 ARM Limited
+ * Copyright (c) 2011-2012, 2014, 2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -47,8 +47,9 @@
#include <map>
#include <queue>
-#include "cpu/o3/lsq_unit.hh"
+#include "arch/generic/tlb.hh"
#include "cpu/inst_seq.hh"
+#include "cpu/o3/lsq_unit.hh"
#include "enums/SMTQueuePolicy.hh"
#include "mem/port.hh"
#include "sim/sim_object.hh"
@@ -56,13 +57,659 @@
struct DerivO3CPUParams;
template <class Impl>
-class LSQ {
+class LSQ
+
+{
public:
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::CPUPol::IEW IEW;
typedef typename Impl::CPUPol::LSQUnit LSQUnit;
+ class LSQRequest;
+ /** Derived class to hold any sender state the LSQ needs. */
+ class LSQSenderState : public Packet::SenderState
+ {
+ protected:
+ /** The senderState needs to know the LSQRequest who owns it. */
+ LSQRequest* _request;
+
+ /** Default constructor. */
+ LSQSenderState(LSQRequest* request, bool isLoad_)
+ : _request(request), mainPkt(nullptr), pendingPacket(nullptr),
+ outstanding(0), isLoad(isLoad_), needWB(isLoad_), isSplit(false),
+ pktToSend(false), deleted(false)
+ { }
+ public:
+
+ /** Instruction which initiated the access to memory. */
+ DynInstPtr inst;
+ /** The main packet from a split load, used during writeback. */
+ PacketPtr mainPkt;
+ /** A second packet from a split store that needs sending. */
+ PacketPtr pendingPacket;
+ /** Number of outstanding packets to complete. */
+ uint8_t outstanding;
+ /** Whether or not it is a load. */
+ bool isLoad;
+ /** Whether or not the instruction will need to writeback. */
+ bool needWB;
+ /** Whether or not this access is split in two. */
+ bool isSplit;
+ /** Whether or not there is a packet that needs sending. */
+ bool pktToSend;
+ /** Has the request been deleted?
+ * LSQ entries can be squashed before the response comes back. in that
+ * case the SenderState knows.
+ */
+ bool deleted;
+ ContextID contextId() { return inst->contextId(); }
+
+ /** Completes a packet and returns whether the access is finished. */
+ inline bool isComplete() { return outstanding == 0; }
+ inline void deleteRequest() { deleted = true; }
+ inline bool alive() { return !deleted; }
+ LSQRequest* request() { return _request; }
+ virtual void complete() = 0;
+ void writebackDone() { _request->writebackDone(); }
+ };
+
+ /** Memory operation metadata.
+ * This class holds the information about a memory operation. It lives
+ * from initiateAcc to resource deallocation at commit or squash.
+ * LSQRequest objects are owned by the LQ/SQ Entry in the LSQUnit that
+ * holds the operation. It is also used by the LSQSenderState. In addition,
+ * the LSQRequest is a TranslationState, therefore, upon squash, there must
+ * be a defined ownership transferal in case the LSQ resources are
+ * deallocated before the TLB is done using the TranslationState. If that
+ * happens, the LSQRequest will be self-owned, and responsible to detect
+ * that its services are no longer required and self-destruct.
+ *
+ * Lifetime of a LSQRequest:
+ * +--------------------+
+ * |LSQ creates and owns|
+ * +--------------------+
+ * |
+ * +--------------------+
+ * | Initate translation|
+ * +--------------------+
+ * |
+ * ___^___
+ * ___/ \___
+ * ______/ Squashed? \
+ * | \___ ___/
+ * | \___ ___/
+ * | v
+ * | |
+ * | +--------------------+
+ * | | Translation done |
+ * | +--------------------+
+ * | |
+ * | +--------------------+
+ * | | Send packet |<------+
+ * | +--------------------+ |
+ * | | |
+ * | ___^___ |
+ * | ___/ \___ |
+ * | ____/ Squashed? \ |
+ * | | \___ ___/ |
+ * | | \___ ___/ |
+ * | | v |
+ * | | | |
+ * | | ___^___ |
+ * | | ___/ \___ |
+ * | | / Done? \__________|
+ * | | \___ ___/
+ * | | \___ ___/
+ * | | v
+ * | | |
+ * | | +--------------------+
+ * | | | Manage stuff |
+ * | | | Free resources |
+ * | | +--------------------+
+ * | |
+ * | | +--------------------+
+ * | | | senderState owns |
+ * | +->| onRecvTimingResp |
+ * | | free resources |
+ * | +--------------------+
+ * |
+ * | +----------------------+
+ * | | self owned (Trans) |
+ * +-->| on TranslationFinish |
+ * | free resources |
+ * +----------------------+
+ *
+ *
+ */
+ class LSQRequest : public BaseTLB::Translation
+ {
+ protected:
+ typedef uint32_t FlagsStorage;
+ typedef ::Flags<FlagsStorage> FlagsType;
+
+ enum Flag : FlagsStorage
+ {
+ IsLoad = 0x00000001,
+ /** True if this is a store that writes registers (SC). */
+ WbStore = 0x00000002,
+ Delayed = 0x00000004,
+ IsSplit = 0x00000008,
+ /** True if any translation has been sent to TLB. */
+ TranslationStarted = 0x00000010,
+ /** True if there are un-replied outbound translations.. */
+ TranslationFinished = 0x00000020,
+ Sent = 0x00000040,
+ Retry = 0x00000080,
+ Complete = 0x00000100,
+ /** Ownership tracking flags. */
+ /** Translation squashed. */
+ TranslationSquashed = 0x00000200,
+ /** Request discarded */
+ Discarded = 0x00000400,
+ /** LSQ resources freed. */
+ LSQEntryFreed = 0x00000800,
+ /** Store written back. */
+ WritebackScheduled = 0x00001000,
+ WritebackDone = 0x00002000
+ };
+ FlagsType flags;
+
+ enum class State
+ {
+ NotIssued,
+ Translation,
+ Request,
+ Complete,
+ Squashed,
+ Fault,
+ };
+ State _state;
+ LSQSenderState* _senderState;
+ void setState(const State& newState) { _state = newState; }
+
+ uint32_t numTranslatedFragments;
+ uint32_t numInTranslationFragments;
+
+ /** LQ/SQ entry idx. */
+ uint32_t _entryIdx;
+
+ void markDelayed() { flags.set(Flag::Delayed); }
+ bool isDelayed() { return flags.isSet(Flag::Delayed); }
+
+ public:
+ LSQUnit& _port;
+ const DynInstPtr _inst;
+ uint32_t _taskId;
+ PacketDataPtr _data;
+ std::vector<PacketPtr> _packets;
+ std::vector<RequestPtr> _requests;
+ std::vector<Fault> _fault;
+ uint64_t* _res;
+ const Addr _addr;
+ const uint32_t _size;
+ const Request::Flags _flags;
+ uint32_t _numOutstandingPackets;
+ protected:
+ LSQUnit* lsqUnit() { return &_port; }
+ LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad) :
+ _state(State::NotIssued), _senderState(nullptr),
+ _port(*port), _inst(inst), _data(nullptr),
+ _res(nullptr), _addr(0), _size(0), _flags(0),
+ _numOutstandingPackets(0)
+ {
+ flags.set(Flag::IsLoad, isLoad);
+ flags.set(Flag::WbStore, _inst->isStoreConditional());
+ install();
+ }
+ LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
+ const Addr& addr, const uint32_t& size,
+ const Request::Flags& flags_,
+ PacketDataPtr data = nullptr, uint64_t* res = nullptr)
+ : _state(State::NotIssued), _senderState(nullptr),
+ numTranslatedFragments(0),
+ numInTranslationFragments(0),
+ _port(*port), _inst(inst), _data(data),
+ _res(res), _addr(addr), _size(size),
+ _flags(flags_),
+ _numOutstandingPackets(0)
+ {
+ flags.set(Flag::IsLoad, isLoad);
+ flags.set(Flag::WbStore, _inst->isStoreConditional());
+ install();
+ }
+
+ bool
+ isLoad() const
+ {
+ return flags.isSet(Flag::IsLoad);
+ }
+
+ /** Install the request in the LQ/SQ. */
+ void install()
+ {
+ if (isLoad()) {
+ _port.loadQueue[_inst->lqIdx].setRequest(this);
+ } else {
+ _port.storeQueue[_inst->sqIdx].setRequest(this);
+ }
+ }
+ virtual bool
+ squashed() const override
+ {
+ return _inst->isSquashed();
+ }
+
+ /**
+ * Test if the LSQRequest has been released, i.e. self-owned.
+ * An LSQRequest manages itself when the resources on the LSQ are freed
+ * but the translation is still going on and the LSQEntry was freed.
+ */
+ bool
+ isReleased()
+ {
+ return flags.isSet(Flag::LSQEntryFreed) ||
+ flags.isSet(Flag::Discarded);
+ }
+
+ /** Release the LSQRequest.
+ * Notify the sender state that the request it points to is not valid
+ * anymore. Understand if the request is orphan (self-managed) and if
+ * so, mark it as freed, else destroy it, as this means
+ * the end of its life cycle.
+ * An LSQRequest is orphan when its resources are released
+ * but there is any in-flight translation request to the TLB or access
+ * request to the memory.
+ */
+ void release(Flag reason)
+ {
+ assert(reason == Flag::LSQEntryFreed || reason == Flag::Discarded);
+ if (!isAnyOutstandingRequest()) {
+ delete this;
+ } else {
+ if (_senderState) {
+ _senderState->deleteRequest();
+ }
+ flags.set(reason);
+ }
+ }
+
+ /** Destructor.
+ * The LSQRequest owns the request. If the packet has already been
+ * sent, the sender state will be deleted upon receiving the reply.
+ */
+ virtual ~LSQRequest()
+ {
+ assert(!isAnyOutstandingRequest());
+ _inst->savedReq = nullptr;
+ if (_senderState)
+ delete _senderState;
+
+ for (auto r: _packets)
+ delete r;
+ };
+
+
+ public:
+ /** Convenience getters/setters. */
+ /** @{ */
+ /** Set up Context numbers. */
+ void
+ setContext(const ContextID& context_id)
+ {
+ request()->setContext(context_id);
+ }
+
+ const DynInstPtr&
+ instruction()
+ {
+ return _inst;
+ }
+
+ /** Set up virtual request.
+ * For a previously allocated Request objects.
+ */
+ void
+ setVirt(int asid, Addr vaddr, unsigned size, Request::Flags flags_,
+ MasterID mid, Addr pc)
+ {
+ request()->setVirt(asid, vaddr, size, flags_, mid, pc);
+ }
+
+ void
+ taskId(const uint32_t& v)
+ {
+ _taskId = v;
+ for (auto& r: _requests)
+ r->taskId(v);
+ }
+
+ uint32_t taskId() const { return _taskId; }
+ RequestPtr request(int idx = 0) { return _requests.at(idx); }
+
+ const RequestPtr
+ request(int idx = 0) const
+ {
+ return _requests.at(idx);
+ }
+
+ Addr getVaddr(int idx = 0) const { return request(idx)->getVaddr(); }
+ virtual void initiateTranslation() = 0;
+
+ PacketPtr packet(int idx = 0) { return _packets.at(idx); }
+
+ virtual PacketPtr
+ mainPacket()
+ {
+ assert (_packets.size() == 1);
+ return packet();
+ }
+
+ virtual RequestPtr
+ mainRequest()
+ {
+ assert (_requests.size() == 1);
+ return request();
+ }
+
+ void
+ senderState(LSQSenderState* st)
+ {
+ _senderState = st;
+ for (auto& pkt: _packets) {
+ if (pkt)
+ pkt->senderState = st;
+ }
+ }
+
+ const LSQSenderState*
+ senderState() const
+ {
+ return _senderState;
+ }
+
+ /**
+ * Mark senderState as discarded. This will cause to discard response
+ * packets from the cache.
+ */
+ void
+ discardSenderState()
+ {
+ assert(_senderState);
+ _senderState->deleteRequest();
+ }
+
+ /**
+ * Test if there is any in-flight translation or mem access request
+ */
+ bool
+ isAnyOutstandingRequest()
+ {
+ return numInTranslationFragments > 0 ||
+ _numOutstandingPackets > 0 ||
+ (flags.isSet(Flag::WritebackScheduled) &&
+ !flags.isSet(Flag::WritebackDone));
+ }
+
+ bool
+ isSplit() const
+ {
+ return flags.isSet(Flag::IsSplit);
+ }
+ /** @} */
+ virtual bool recvTimingResp(PacketPtr pkt) = 0;
+ virtual void sendPacketToCache() = 0;
+ virtual void buildPackets() = 0;
+
+ /**
+ * Memory mapped IPR accesses
+ */
+ virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt) = 0;
+ virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt) = 0;
+
+ /**
+ * Test if the request accesses a particular cache line.
+ */
+ virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask) = 0;
+
+ /** Update the status to reflect that a packet was sent. */
+ void
+ packetSent()
+ {
+ flags.set(Flag::Sent);
+ }
+ /** Update the status to reflect that a packet was not sent.
+ * When a packet fails to be sent, we mark the request as needing a
+ * retry. Note that Retry flag is sticky.
+ */
+ void
+ packetNotSent()
+ {
+ flags.set(Flag::Retry);
+ flags.clear(Flag::Sent);
+ }
+
+ void sendFragmentToTranslation(int i);
+ bool
+ isComplete()
+ {
+ return flags.isSet(Flag::Complete);
+ }
+
+ bool
+ isInTranslation()
+ {
+ return _state == State::Translation;
+ }
+
+ bool
+ isTranslationComplete()
+ {
+ return flags.isSet(Flag::TranslationStarted) &&
+ !isInTranslation();
+ }
+
+ bool
+ isTranslationBlocked()
+ {
+ return _state == State::Translation &&
+ flags.isSet(Flag::TranslationStarted) &&
+ !flags.isSet(Flag::TranslationFinished);
+ }
+
+ bool
+ isSent()
+ {
+ return flags.isSet(Flag::Sent);
+ }
+
+ /**
+ * The LSQ entry is cleared
+ */
+ void
+ freeLSQEntry()
+ {
+ release(Flag::LSQEntryFreed);
+ }
+
+ /**
+ * The request is discarded (e.g. partial store-load forwarding)
+ */
+ void
+ discard()
+ {
+ release(Flag::Discarded);
+ }
+
+ void
+ packetReplied()
+ {
+ assert(_numOutstandingPackets > 0);
+ _numOutstandingPackets--;
+ if (_numOutstandingPackets == 0 && isReleased())
+ delete this;
+ }
+
+ void
+ writebackScheduled()
+ {
+ assert(!flags.isSet(Flag::WritebackScheduled));
+ flags.set(Flag::WritebackScheduled);
+ }
+
+ void
+ writebackDone()
+ {
+ flags.set(Flag::WritebackDone);
+ /* If the lsq resources are already free */
+ if (isReleased()) {
+ delete this;
+ }
+ }
+
+ void
+ squashTranslation()
+ {
+ assert(numInTranslationFragments == 0);
+ flags.set(Flag::TranslationSquashed);
+ /* If we are on our own, self-destruct. */
+ if (isReleased()) {
+ delete this;
+ }
+ }
+
+ void
+ complete()
+ {
+ flags.set(Flag::Complete);
+ }
+ };
+
+ class SingleDataRequest : public LSQRequest
+ {
+ protected:
+ /* Given that we are inside templates, children need explicit
+ * declaration of the names in the parent class. */
+ using Flag = typename LSQRequest::Flag;
+ using State = typename LSQRequest::State;
+ using LSQRequest::_fault;
+ using LSQRequest::_inst;
+ using LSQRequest::_packets;
+ using LSQRequest::_port;
+ using LSQRequest::_res;
+ using LSQRequest::_senderState;
+ using LSQRequest::_state;
+ using LSQRequest::flags;
+ using LSQRequest::isLoad;
+ using LSQRequest::isTranslationComplete;
+ using LSQRequest::lsqUnit;
+ using LSQRequest::request;
+ using LSQRequest::sendFragmentToTranslation;
+ using LSQRequest::setState;
+ using LSQRequest::numInTranslationFragments;
+ using LSQRequest::numTranslatedFragments;
+ using LSQRequest::_numOutstandingPackets;
+ public:
+ SingleDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
+ const Addr& addr, const uint32_t& size,
+ const Request::Flags& flags_,
+ PacketDataPtr data = nullptr,
+ uint64_t* res = nullptr) :
+ LSQRequest(port, inst, isLoad, addr, size, flags_, data, res)
+ {
+ LSQRequest::_requests.push_back(
+ std::make_shared<Request>(inst->getASID(), addr, size, flags_,
+ inst->masterId(), inst->instAddr(), inst->contextId()));
+ LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum);
+ }
+ inline virtual ~SingleDataRequest() {}
+ virtual void initiateTranslation();
+ virtual void finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode);
+ virtual bool recvTimingResp(PacketPtr pkt);
+ virtual void sendPacketToCache();
+ virtual void buildPackets();
+ virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt);
+ virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt);
+ virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask);
+ };
+
+ class SplitDataRequest : public LSQRequest
+ {
+ protected:
+ /* Given that we are inside templates, children need explicit
+ * declaration of the names in the parent class. */
+ using Flag = typename LSQRequest::Flag;
+ using State = typename LSQRequest::State;
+ using LSQRequest::_addr;
+ using LSQRequest::_data;
+ using LSQRequest::_fault;
+ using LSQRequest::_flags;
+ using LSQRequest::_inst;
+ using LSQRequest::_packets;
+ using LSQRequest::_port;
+ using LSQRequest::_requests;
+ using LSQRequest::_res;
+ using LSQRequest::_senderState;
+ using LSQRequest::_size;
+ using LSQRequest::_state;
+ using LSQRequest::_taskId;
+ using LSQRequest::flags;
+ using LSQRequest::isLoad;
+ using LSQRequest::isTranslationComplete;
+ using LSQRequest::lsqUnit;
+ using LSQRequest::numInTranslationFragments;
+ using LSQRequest::numTranslatedFragments;
+ using LSQRequest::request;
+ using LSQRequest::sendFragmentToTranslation;
+ using LSQRequest::setState;
+ using LSQRequest::_numOutstandingPackets;
+
+ uint32_t numFragments;
+ uint32_t numReceivedPackets;
+ RequestPtr mainReq;
+ PacketPtr _mainPacket;
+
+
+ public:
+ SplitDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
+ const Addr& addr, const uint32_t& size,
+ const Request::Flags & flags_,
+ PacketDataPtr data = nullptr,
+ uint64_t* res = nullptr) :
+ LSQRequest(port, inst, isLoad, addr, size, flags_, data, res),
+ numFragments(0),
+ numReceivedPackets(0),
+ mainReq(nullptr),
+ _mainPacket(nullptr)
+ {
+ flags.set(Flag::IsSplit);
+ }
+ virtual ~SplitDataRequest()
+ {
+ if (mainReq) {
+ mainReq = nullptr;
+ }
+ if (_mainPacket) {
+ delete _mainPacket;
+ _mainPacket = nullptr;
+ }
+ }
+ virtual void finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode);
+ virtual bool recvTimingResp(PacketPtr pkt);
+ virtual void initiateTranslation();
+ virtual void sendPacketToCache();
+ virtual void buildPackets();
+
+ virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt);
+ virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt);
+ virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask);
+
+ virtual RequestPtr mainRequest();
+ virtual PacketPtr mainPacket();
+ };
+
/** Constructs an LSQ with the given parameters. */
LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params);
~LSQ() { }
@@ -85,17 +732,9 @@ class LSQ {
/** Number of entries needed for the given amount of threads.*/
int entryAmount(ThreadID num_threads);
- void removeEntries(ThreadID tid);
- /** Reset the max entries for each thread. */
- void resetEntries();
- /** Resize the max entries for a thread. */
- void resizeEntries(unsigned size, ThreadID tid);
/** Ticks the LSQ. */
- void tick();
- /** Ticks a specific LSQ Unit. */
- void tick(ThreadID tid)
- { thread[tid].tick(); }
+ void tick() { usedStorePorts = 0; }
/** Inserts a load into the LSQ. */
void insertLoad(const DynInstPtr &load_inst);
@@ -112,13 +751,13 @@ class LSQ {
* Commits loads up until the given sequence number for a specific thread.
*/
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
- { thread[tid].commitLoads(youngest_inst); }
+ { thread.at(tid).commitLoads(youngest_inst); }
/**
* Commits stores up until the given sequence number for a specific thread.
*/
void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
- { thread[tid].commitStores(youngest_inst); }
+ { thread.at(tid).commitStores(youngest_inst); }
/**
* Attempts to write back stores until all cache ports are used or the
@@ -131,8 +770,11 @@ class LSQ {
/**
* Squash instructions from a thread until the specified sequence number.
*/
- void squash(const InstSeqNum &squashed_num, ThreadID tid)
- { thread[tid].squash(squashed_num); }
+ void
+ squash(const InstSeqNum &squashed_num, ThreadID tid)
+ {
+ thread.at(tid).squash(squashed_num);
+ }
/** Returns whether or not there was a memory ordering violation. */
bool violation();
@@ -140,50 +782,49 @@ class LSQ {
* Returns whether or not there was a memory ordering violation for a
* specific thread.
*/
- bool violation(ThreadID tid)
- { return thread[tid].violation(); }
+ bool violation(ThreadID tid) { return thread.at(tid).violation(); }
/** Gets the instruction that caused the memory ordering violation. */
- DynInstPtr getMemDepViolator(ThreadID tid)
- { return thread[tid].getMemDepViolator(); }
+ DynInstPtr
+ getMemDepViolator(ThreadID tid)
+ {
+ return thread.at(tid).getMemDepViolator();
+ }
/** Returns the head index of the load queue for a specific thread. */
- int getLoadHead(ThreadID tid)
- { return thread[tid].getLoadHead(); }
+ int getLoadHead(ThreadID tid) { return thread.at(tid).getLoadHead(); }
/** Returns the sequence number of the head of the load queue. */
- InstSeqNum getLoadHeadSeqNum(ThreadID tid)
+ InstSeqNum
+ getLoadHeadSeqNum(ThreadID tid)
{
- return thread[tid].getLoadHeadSeqNum();
+ return thread.at(tid).getLoadHeadSeqNum();
}
/** Returns the head index of the store queue. */
- int getStoreHead(ThreadID tid)
- { return thread[tid].getStoreHead(); }
+ int getStoreHead(ThreadID tid) { return thread.at(tid).getStoreHead(); }
/** Returns the sequence number of the head of the store queue. */
- InstSeqNum getStoreHeadSeqNum(ThreadID tid)
+ InstSeqNum
+ getStoreHeadSeqNum(ThreadID tid)
{
- return thread[tid].getStoreHeadSeqNum();
+ return thread.at(tid).getStoreHeadSeqNum();
}
/** Returns the number of instructions in all of the queues. */
int getCount();
/** Returns the number of instructions in the queues of one thread. */
- int getCount(ThreadID tid)
- { return thread[tid].getCount(); }
+ int getCount(ThreadID tid) { return thread.at(tid).getCount(); }
/** Returns the total number of loads in the load queue. */
int numLoads();
/** Returns the total number of loads for a single thread. */
- int numLoads(ThreadID tid)
- { return thread[tid].numLoads(); }
+ int numLoads(ThreadID tid) { return thread.at(tid).numLoads(); }
/** Returns the total number of stores in the store queue. */
int numStores();
/** Returns the total number of stores for a single thread. */
- int numStores(ThreadID tid)
- { return thread[tid].numStores(); }
+ int numStores(ThreadID tid) { return thread.at(tid).numStores(); }
/** Returns the number of free load entries. */
unsigned numFreeLoadEntries();
@@ -242,46 +883,39 @@ class LSQ {
/** Returns whether or not a specific thread has any stores to write back
* to memory.
*/
- bool hasStoresToWB(ThreadID tid)
- { return thread[tid].hasStoresToWB(); }
+ bool hasStoresToWB(ThreadID tid) { return thread.at(tid).hasStoresToWB(); }
/** Returns the number of stores a specific thread has to write back. */
- int numStoresToWB(ThreadID tid)
- { return thread[tid].numStoresToWB(); }
+ int numStoresToWB(ThreadID tid) { return thread.at(tid).numStoresToWB(); }
/** Returns if the LSQ will write back to memory this cycle. */
bool willWB();
/** Returns if the LSQ of a specific thread will write back to memory this
* cycle.
*/
- bool willWB(ThreadID tid)
- { return thread[tid].willWB(); }
+ bool willWB(ThreadID tid) { return thread.at(tid).willWB(); }
/** Debugging function to print out all instructions. */
void dumpInsts() const;
/** Debugging function to print out instructions from a specific thread. */
- void dumpInsts(ThreadID tid) const
- { thread[tid].dumpInsts(); }
+ void dumpInsts(ThreadID tid) const { thread.at(tid).dumpInsts(); }
/** Executes a read operation, using the load specified at the load
* index.
*/
- Fault read(const RequestPtr &req,
- RequestPtr &sreqLow, RequestPtr &sreqHigh,
- int load_idx);
+ Fault read(LSQRequest* req, int load_idx);
/** Executes a store operation, using the store specified at the store
* index.
*/
- Fault write(const RequestPtr &req,
- const RequestPtr &sreqLow, const RequestPtr &sreqHigh,
- uint8_t *data, int store_idx);
+ Fault write(LSQRequest* req, uint8_t *data, int store_idx);
/**
* Retry the previous send that failed.
*/
void recvReqRetry();
+ void completeDataAccess(PacketPtr pkt);
/**
* Handles writing back and completing the load or store that has
* returned from memory.
@@ -292,13 +926,34 @@ class LSQ {
void recvTimingSnoopReq(PacketPtr pkt);
+ Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
+ unsigned int size, Addr addr, Request::Flags flags,
+ uint64_t *res);
+
/** The CPU pointer. */
O3CPU *cpu;
/** The IEW stage pointer. */
IEW *iewStage;
+ /** Is D-cache blocked? */
+ bool cacheBlocked() const;
+ /** Set D-cache blocked status */
+ void cacheBlocked(bool v);
+ /** Is any store port available to use? */
+ bool storePortAvailable() const;
+ /** Another store port is in use */
+ void storePortBusy();
+
protected:
+ /** D-cache is blocked */
+ bool _cacheBlocked;
+ /** The number of cache ports available each cycle (stores only). */
+ int cacheStorePorts;
+ /** The number of used cache ports in this cycle by stores. */
+ int usedStorePorts;
+
+
/** The LSQ policy for SMT mode. */
SMTQueuePolicy lsqPolicy;
@@ -307,8 +962,10 @@ class LSQ {
* and threshold, this function calculates how many resources each thread
* can occupy at most.
*/
- static uint32_t maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries,
- uint32_t numThreads, uint32_t SMTThreshold) {
+ static uint32_t
+ maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries,
+ uint32_t numThreads, uint32_t SMTThreshold)
+ {
if (pol == SMTQueuePolicy::Dynamic) {
return entries;
} else if (pol == SMTQueuePolicy::Partitioned) {
@@ -346,24 +1003,20 @@ class LSQ {
template <class Impl>
Fault
-LSQ<Impl>::read(const RequestPtr &req,
- RequestPtr &sreqLow, RequestPtr &sreqHigh,
- int load_idx)
+LSQ<Impl>::read(LSQRequest* req, int load_idx)
{
- ThreadID tid = cpu->contextToThread(req->contextId());
+ ThreadID tid = cpu->contextToThread(req->request()->contextId());
- return thread[tid].read(req, sreqLow, sreqHigh, load_idx);
+ return thread.at(tid).read(req, load_idx);
}
template <class Impl>
Fault
-LSQ<Impl>::write(const RequestPtr &req,
- const RequestPtr &sreqLow, const RequestPtr &sreqHigh,
- uint8_t *data, int store_idx)
+LSQ<Impl>::write(LSQRequest* req, uint8_t *data, int store_idx)
{
- ThreadID tid = cpu->contextToThread(req->contextId());
+ ThreadID tid = cpu->contextToThread(req->request()->contextId());
- return thread[tid].write(req, sreqLow, sreqHigh, data, store_idx);
+ return thread.at(tid).write(req, data, store_idx);
}
#endif // __CPU_O3_LSQ_HH__
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index edc3f469b..8a221a8d5 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012, 2014 ARM Limited
+ * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -61,6 +61,8 @@ using namespace std;
template <class Impl>
LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
: cpu(cpu_ptr), iewStage(iew_ptr),
+ _cacheBlocked(false),
+ cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
lsqPolicy(params->smtLSQPolicy),
LQEntries(params->LQEntries),
SQEntries(params->SQEntries),
@@ -76,8 +78,8 @@ LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
//************ Handle SMT Parameters ***********/
//**********************************************/
- //Figure out fetch policy
- if (lsqPolicy == SMTQueuePolicy::Dynamic) {
+ /* Run SMT olicy checks. */
+ if (lsqPolicy == SMTQueuePolicy::Dynamic) {
DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
} else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
@@ -85,8 +87,8 @@ LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
maxLQEntries,maxSQEntries);
} else if (lsqPolicy == SMTQueuePolicy::Threshold) {
- assert(params->smtLSQThreshold > LQEntries);
- assert(params->smtLSQThreshold > SQEntries);
+ assert(params->smtLSQThreshold > params->LQEntries);
+ assert(params->smtLSQThreshold > params->SQEntries);
DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
"%i entries per LQ | %i entries per SQ\n",
@@ -163,79 +165,41 @@ template <class Impl>
void
LSQ<Impl>::takeOverFrom()
{
+ usedStorePorts = 0;
+ _cacheBlocked = false;
+
for (ThreadID tid = 0; tid < numThreads; tid++) {
thread[tid].takeOverFrom();
}
}
-template <class Impl>
-int
-LSQ<Impl>::entryAmount(ThreadID num_threads)
-{
- if (lsqPolicy == SMTQueuePolicy::Partitioned) {
- return LQEntries / num_threads;
- } else {
- return 0;
- }
-}
-
-template <class Impl>
-void
-LSQ<Impl>::resetEntries()
+template<class Impl>
+bool
+LSQ<Impl>::cacheBlocked() const
{
- if (lsqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) {
- int active_threads = activeThreads->size();
-
- int maxEntries;
-
- if (lsqPolicy == SMTQueuePolicy::Partitioned) {
- maxEntries = LQEntries / active_threads;
- } else if (lsqPolicy == SMTQueuePolicy::Threshold &&
- active_threads == 1) {
- maxEntries = LQEntries;
- } else {
- maxEntries = LQEntries;
- }
-
- list<ThreadID>::iterator threads = activeThreads->begin();
- list<ThreadID>::iterator end = activeThreads->end();
-
- while (threads != end) {
- ThreadID tid = *threads++;
-
- resizeEntries(maxEntries, tid);
- }
- }
+ return _cacheBlocked;
}
template<class Impl>
void
-LSQ<Impl>::removeEntries(ThreadID tid)
+LSQ<Impl>::cacheBlocked(bool v)
{
- thread[tid].clearLQ();
- thread[tid].clearSQ();
+ _cacheBlocked = v;
}
template<class Impl>
-void
-LSQ<Impl>::resizeEntries(unsigned size, ThreadID tid)
+bool
+LSQ<Impl>::storePortAvailable() const
{
- thread[tid].resizeLQ(size);
- thread[tid].resizeSQ(size);
+ return usedStorePorts < cacheStorePorts;
}
template<class Impl>
void
-LSQ<Impl>::tick()
+LSQ<Impl>::storePortBusy()
{
- list<ThreadID>::iterator threads = activeThreads->begin();
- list<ThreadID>::iterator end = activeThreads->end();
-
- while (threads != end) {
- ThreadID tid = *threads++;
-
- thread[tid].tick();
- }
+ usedStorePorts++;
+ assert(usedStorePorts <= cacheStorePorts);
}
template<class Impl>
@@ -316,6 +280,7 @@ void
LSQ<Impl>::recvReqRetry()
{
iewStage->cacheUnblocked();
+ cacheBlocked(false);
for (ThreadID tid : *activeThreads) {
thread[tid].recvRetry();
@@ -323,6 +288,15 @@ LSQ<Impl>::recvReqRetry()
}
template <class Impl>
+void
+LSQ<Impl>::completeDataAccess(PacketPtr pkt)
+{
+ auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ thread[cpu->contextToThread(senderState->contextId())]
+ .completeDataAccess(pkt);
+}
+
+template <class Impl>
bool
LSQ<Impl>::recvTimingResp(PacketPtr pkt)
{
@@ -330,8 +304,10 @@ LSQ<Impl>::recvTimingResp(PacketPtr pkt)
DPRINTF(LSQ, "Got error packet back for address: %#X\n",
pkt->getAddr());
- thread[cpu->contextToThread(pkt->req->contextId())]
- .completeDataAccess(pkt);
+ auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ panic_if(!senderState, "Got packet back with unknown sender state\n");
+
+ thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
if (pkt->isInvalidate()) {
// This response also contains an invalidate; e.g. this can be the case
@@ -352,8 +328,9 @@ LSQ<Impl>::recvTimingResp(PacketPtr pkt)
thread[tid].checkSnoop(pkt);
}
}
+ // Update the LSQRequest state (this may delete the request)
+ senderState->request()->packetReplied();
- delete pkt;
return true;
}
@@ -681,4 +658,442 @@ LSQ<Impl>::dumpInsts() const
}
}
+static Addr
+addrBlockOffset(Addr addr, unsigned int block_size)
+{
+ return addr & (block_size - 1);
+}
+
+static Addr
+addrBlockAlign(Addr addr, uint64_t block_size)
+{
+ return addr & ~(block_size - 1);
+}
+
+static bool
+transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size)
+{
+ return (addrBlockOffset(addr, block_size) + size) > block_size;
+}
+
+template<class Impl>
+Fault
+LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
+ unsigned int size, Addr addr, Request::Flags flags,
+ uint64_t *res)
+{
+ ThreadID tid = cpu->contextToThread(inst->contextId());
+ auto cacheLineSize = cpu->cacheLineSize();
+ bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
+ LSQRequest* req = nullptr;
+
+ if (inst->translationStarted()) {
+ req = inst->savedReq;
+ assert(req);
+ } else {
+ if (needs_burst) {
+ req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
+ size, flags, data, res);
+ } else {
+ req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
+ size, flags, data, res);
+ }
+ assert(req);
+ inst->setRequest();
+ req->taskId(cpu->taskId());
+
+ req->initiateTranslation();
+ }
+
+ /* This is the place were instructions get the effAddr. */
+ if (req->isTranslationComplete()) {
+ if (inst->getFault() == NoFault) {
+ inst->effAddr = req->getVaddr();
+ inst->effSize = size;
+ inst->effAddrValid(true);
+
+ if (cpu->checker) {
+ inst->reqToVerify = std::make_shared<Request>(*req->request());
+ }
+ if (isLoad)
+ inst->getFault() = cpu->read(req, inst->lqIdx);
+ else
+ inst->getFault() = cpu->write(req, data, inst->sqIdx);
+ } else if (isLoad) {
+ // Commit will have to clean up whatever happened. Set this
+ // instruction as executed.
+ inst->setExecuted();
+ }
+ }
+
+ if (inst->traceData)
+ inst->traceData->setMem(addr, size, flags);
+
+ return inst->getFault();
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode)
+{
+ _fault.push_back(fault);
+ numInTranslationFragments = 0;
+ numTranslatedFragments = 1;
+ /* If the instruction has been squahsed, let the request know
+ * as it may have to self-destruct. */
+ if (_inst->isSquashed()) {
+ this->squashTranslation();
+ } else {
+ _inst->strictlyOrdered(req->isStrictlyOrdered());
+
+ flags.set(Flag::TranslationFinished);
+ if (fault == NoFault) {
+ _inst->physEffAddr = req->getPaddr();
+ _inst->memReqFlags = req->getFlags();
+ if (req->isCondSwap()) {
+ assert(_res);
+ req->setExtraData(*_res);
+ }
+ setState(State::Request);
+ } else {
+ setState(State::Fault);
+ }
+
+ LSQRequest::_inst->fault = fault;
+ LSQRequest::_inst->translationCompleted(true);
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode)
+{
+ _fault.push_back(fault);
+ assert(req == _requests[numTranslatedFragments] || this->isDelayed());
+
+ numInTranslationFragments--;
+ numTranslatedFragments++;
+
+ mainReq->setFlags(req->getFlags());
+
+ if (numTranslatedFragments == _requests.size()) {
+ if (_inst->isSquashed()) {
+ this->squashTranslation();
+ } else {
+ _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
+ flags.set(Flag::TranslationFinished);
+ auto fault_it = _fault.begin();
+ /* Ffwd to the first NoFault. */
+ while (fault_it != _fault.end() && *fault_it == NoFault)
+ fault_it++;
+ /* If none of the fragments faulted: */
+ if (fault_it == _fault.end()) {
+ _inst->physEffAddr = request(0)->getPaddr();
+
+ _inst->memReqFlags = mainReq->getFlags();
+ if (mainReq->isCondSwap()) {
+ assert(_res);
+ mainReq->setExtraData(*_res);
+ }
+ setState(State::Request);
+ _inst->fault = NoFault;
+ } else {
+ setState(State::Fault);
+ _inst->fault = *fault_it;
+ }
+ _inst->translationCompleted(true);
+ }
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::initiateTranslation()
+{
+ _inst->translationStarted(true);
+ setState(State::Translation);
+ flags.set(Flag::TranslationStarted);
+
+ _inst->savedReq = this;
+ sendFragmentToTranslation(0);
+
+ if (isTranslationComplete()) {
+ }
+}
+
+template<class Impl>
+PacketPtr
+LSQ<Impl>::SplitDataRequest::mainPacket()
+{
+ return _mainPacket;
+}
+
+template<class Impl>
+RequestPtr
+LSQ<Impl>::SplitDataRequest::mainRequest()
+{
+ return mainReq;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::initiateTranslation()
+{
+ _inst->translationStarted(true);
+ setState(State::Translation);
+ flags.set(Flag::TranslationStarted);
+
+ unsigned int cacheLineSize = _port.cacheLineSize();
+ Addr base_addr = _addr;
+ Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
+ Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
+ uint32_t size_so_far = 0;
+
+ mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
+ _size, _flags, _inst->masterId(),
+ _inst->instAddr(), _inst->contextId());
+
+ // Paddr is not used in mainReq. However, we will accumulate the flags
+ // from the sub requests into mainReq by calling setFlags() in finish().
+ // setFlags() assumes that paddr is set so flip the paddr valid bit here to
+ // avoid a potential assert in setFlags() when we call it from finish().
+ mainReq->setPaddr(0);
+
+ /* Get the pre-fix, possibly unaligned. */
+ _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr,
+ next_addr - base_addr, _flags, _inst->masterId(),
+ _inst->instAddr(), _inst->contextId()));
+ size_so_far = next_addr - base_addr;
+
+ /* We are block aligned now, reading whole blocks. */
+ base_addr = next_addr;
+ while (base_addr != final_addr) {
+ _requests.push_back(std::make_shared<Request>(_inst->getASID(),
+ base_addr, cacheLineSize, _flags, _inst->masterId(),
+ _inst->instAddr(), _inst->contextId()));
+ size_so_far += cacheLineSize;
+ base_addr += cacheLineSize;
+ }
+
+ /* Deal with the tail. */
+ if (size_so_far < _size) {
+ _requests.push_back(std::make_shared<Request>(_inst->getASID(),
+ base_addr, _size - size_so_far, _flags, _inst->masterId(),
+ _inst->instAddr(), _inst->contextId()));
+ }
+
+ /* Setup the requests and send them to translation. */
+ for (auto& r: _requests) {
+ r->setReqInstSeqNum(_inst->seqNum);
+ r->taskId(_taskId);
+ }
+ this->_inst->savedReq = this;
+ numInTranslationFragments = 0;
+ numTranslatedFragments = 0;
+
+ for (uint32_t i = 0; i < _requests.size(); i++) {
+ sendFragmentToTranslation(i);
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
+{
+ numInTranslationFragments++;
+ _port.dTLB()->translateTiming(
+ this->request(i),
+ this->_inst->thread->getTC(), this,
+ this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
+{
+ assert(_numOutstandingPackets == 1);
+ auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ setState(State::Complete);
+ flags.set(Flag::Complete);
+ state->outstanding--;
+ assert(pkt == _packets.front());
+ _port.completeDataAccess(pkt);
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
+{
+ auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ uint32_t pktIdx = 0;
+ while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
+ pktIdx++;
+ assert(pktIdx < _packets.size());
+ assert(pkt->req == _requests[pktIdx]);
+ assert(pkt == _packets[pktIdx]);
+ numReceivedPackets++;
+ state->outstanding--;
+ if (numReceivedPackets == _packets.size()) {
+ setState(State::Complete);
+ flags.set(Flag::Complete);
+ /* Assemble packets. */
+ PacketPtr resp = isLoad()
+ ? Packet::createRead(mainReq)
+ : Packet::createWrite(mainReq);
+ if (isLoad())
+ resp->dataStatic(_inst->memData);
+ else
+ resp->dataStatic(_data);
+ resp->senderState = _senderState;
+ _port.completeDataAccess(resp);
+ delete resp;
+ }
+ return true;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::buildPackets()
+{
+ assert(_senderState);
+ /* Retries do not create new packets. */
+ if (_packets.size() == 0) {
+ _packets.push_back(
+ isLoad()
+ ? Packet::createRead(request())
+ : Packet::createWrite(request()));
+ _packets.back()->dataStatic(_inst->memData);
+ _packets.back()->senderState = _senderState;
+ }
+ assert(_packets.size() == 1);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::buildPackets()
+{
+ /* Extra data?? */
+ ptrdiff_t offset = 0;
+ if (_packets.size() == 0) {
+ /* New stuff */
+ if (isLoad()) {
+ _mainPacket = Packet::createRead(mainReq);
+ _mainPacket->dataStatic(_inst->memData);
+ }
+ for (auto& r: _requests) {
+ PacketPtr pkt = isLoad() ? Packet::createRead(r)
+ : Packet::createWrite(r);
+ if (isLoad()) {
+ pkt->dataStatic(_inst->memData + offset);
+ } else {
+ uint8_t* req_data = new uint8_t[r->getSize()];
+ std::memcpy(req_data,
+ _inst->memData + offset,
+ r->getSize());
+ pkt->dataDynamic(req_data);
+ }
+ offset += r->getSize();
+ pkt->senderState = _senderState;
+ _packets.push_back(pkt);
+ }
+ }
+ assert(_packets.size() == _requests.size());
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::sendPacketToCache()
+{
+ assert(_numOutstandingPackets == 0);
+ if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
+ _numOutstandingPackets = 1;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::sendPacketToCache()
+{
+ /* Try to send the packets. */
+ while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
+ lsqUnit()->trySendPacket(isLoad(),
+ _packets.at(numReceivedPackets + _numOutstandingPackets))) {
+ _numOutstandingPackets++;
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread,
+ PacketPtr pkt)
+{
+ TheISA::handleIprWrite(thread, pkt);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread,
+ PacketPtr mainPkt)
+{
+ unsigned offset = 0;
+ for (auto r: _requests) {
+ PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
+ pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
+ TheISA::handleIprWrite(thread, pkt);
+ offset += r->getSize();
+ delete pkt;
+ }
+}
+
+template<class Impl>
+Cycles
+LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread,
+ PacketPtr pkt)
+{
+ return TheISA::handleIprRead(thread, pkt);
+}
+
+template<class Impl>
+Cycles
+LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread,
+ PacketPtr mainPkt)
+{
+ Cycles delay(0);
+ unsigned offset = 0;
+
+ for (auto r: _requests) {
+ PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
+ pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
+ Cycles d = TheISA::handleIprRead(thread, pkt);
+ if (d > delay)
+ delay = d;
+ offset += r->getSize();
+ delete pkt;
+ }
+ return delay;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
+{
+ return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
+{
+ bool is_hit = false;
+ for (auto &r: _requests) {
+ if ((r->getPaddr() & blockMask) == blockAddr) {
+ is_hit = true;
+ break;
+ }
+ }
+ return is_hit;
+}
+
#endif//__CPU_O3_LSQ_IMPL_HH__
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 48a06b386..5b90da4f5 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2014,2017 ARM Limited
+ * Copyright (c) 2012-2014,2017-2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -62,6 +62,7 @@
#include "mem/port.hh"
struct DerivO3CPUParams;
+#include "base/circular_queue.hh"
/**
* Class that implements the actual LQ and SQ for each specific
@@ -76,7 +77,8 @@ struct DerivO3CPUParams;
* replayed.
*/
template <class Impl>
-class LSQUnit {
+class LSQUnit
+{
public:
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
@@ -84,6 +86,130 @@ class LSQUnit {
typedef typename Impl::CPUPol::LSQ LSQ;
typedef typename Impl::CPUPol::IssueStruct IssueStruct;
+ using LSQSenderState = typename LSQ::LSQSenderState;
+ using LSQRequest = typename Impl::CPUPol::LSQ::LSQRequest;
+ private:
+ class LSQEntry
+ {
+ private:
+ /** The instruction. */
+ DynInstPtr inst;
+ /** The request. */
+ LSQRequest* req;
+ /** The size of the operation. */
+ uint8_t _size;
+ /** Valid entry. */
+ bool _valid;
+ public:
+ /** Constructs an empty store queue entry. */
+ LSQEntry()
+ : inst(nullptr), req(nullptr), _size(0), _valid(false)
+ {
+ }
+
+ ~LSQEntry()
+ {
+ inst = nullptr;
+ if (req != nullptr) {
+ req->freeLSQEntry();
+ req = nullptr;
+ }
+ }
+
+ void
+ clear()
+ {
+ inst = nullptr;
+ if (req != nullptr) {
+ req->freeLSQEntry();
+ }
+ req = nullptr;
+ _valid = false;
+ _size = 0;
+ }
+
+ void
+ set(const DynInstPtr& inst)
+ {
+ assert(!_valid);
+ this->inst = inst;
+ _valid = true;
+ _size = 0;
+ }
+ LSQRequest* request() { return req; }
+ void setRequest(LSQRequest* r) { req = r; }
+ bool hasRequest() { return req != nullptr; }
+ /** Member accessors. */
+ /** @{ */
+ bool valid() const { return _valid; }
+ uint8_t& size() { return _size; }
+ const uint8_t& size() const { return _size; }
+ const DynInstPtr& instruction() const { return inst; }
+ /** @} */
+ };
+
+ class SQEntry : public LSQEntry
+ {
+ private:
+ /** The store data. */
+ char _data[64]; // TODO: 64 should become a parameter
+ /** Whether or not the store can writeback. */
+ bool _canWB;
+ /** Whether or not the store is committed. */
+ bool _committed;
+ /** Whether or not the store is completed. */
+ bool _completed;
+ /** Does this request write all zeros and thus doesn't
+ * have any data attached to it. Used for cache block zero
+ * style instructs (ARM DC ZVA; ALPHA WH64)
+ */
+ bool _isAllZeros;
+ public:
+ static constexpr size_t DataSize = sizeof(_data);
+ /** Constructs an empty store queue entry. */
+ SQEntry()
+ : _canWB(false), _committed(false), _completed(false),
+ _isAllZeros(false)
+ {
+ std::memset(_data, 0, DataSize);
+ }
+
+ ~SQEntry()
+ {
+ }
+
+ void
+ set(const DynInstPtr& inst)
+ {
+ LSQEntry::set(inst);
+ }
+
+ void
+ clear()
+ {
+ LSQEntry::clear();
+ _canWB = _completed = _committed = _isAllZeros = false;
+ }
+ /** Member accessors. */
+ /** @{ */
+ bool& canWB() { return _canWB; }
+ const bool& canWB() const { return _canWB; }
+ bool& completed() { return _completed; }
+ const bool& completed() const { return _completed; }
+ bool& committed() { return _committed; }
+ const bool& committed() const { return _committed; }
+ bool& isAllZeros() { return _isAllZeros; }
+ const bool& isAllZeros() const { return _isAllZeros; }
+ char* data() { return _data; }
+ const char* data() const { return _data; }
+ /** @} */
+ };
+ using LQEntry = LSQEntry;
+
+ public:
+ using LoadQueue = CircularQueue<LQEntry>;
+ using StoreQueue = CircularQueue<SQEntry>;
+
public:
/** Constructs an LSQ unit. init() must be called prior to use. */
LSQUnit(uint32_t lqEntries, uint32_t sqEntries);
@@ -113,13 +239,6 @@ class LSQUnit {
/** Takes over from another CPU's thread. */
void takeOverFrom();
- /** Ticks the LSQ unit, which in this case only resets the number of
- * used cache ports.
- * @todo: Move the number of used ports up to the LSQ level so it can
- * be shared by all LSQ units.
- */
- void tick() { usedStorePorts = 0; }
-
/** Inserts an instruction. */
void insert(const DynInstPtr &inst);
/** Inserts a load instruction. */
@@ -133,7 +252,8 @@ class LSQUnit {
* @param load_idx index to start checking at
* @param inst the instruction to check
*/
- Fault checkViolations(int load_idx, const DynInstPtr &inst);
+ Fault checkViolations(typename LoadQueue::iterator& loadIt,
+ const DynInstPtr& inst);
/** Check if an incoming invalidate hits in the lsq on a load
* that might have issued out of order wrt another load beacuse
@@ -163,18 +283,6 @@ class LSQUnit {
* memory system. */
void completeDataAccess(PacketPtr pkt);
- /** Clears all the entries in the LQ. */
- void clearLQ();
-
- /** Clears all the entries in the SQ. */
- void clearSQ();
-
- /** Resizes the LQ to a given size. */
- void resizeLQ(unsigned size);
-
- /** Resizes the SQ to a given size. */
- void resizeSQ(unsigned size);
-
/** Squashes all instructions younger than a specific sequence number. */
void squash(const InstSeqNum &squashed_num);
@@ -205,10 +313,10 @@ class LSQUnit {
bool isEmpty() const { return lqEmpty() && sqEmpty(); }
/** Returns if the LQ is full. */
- bool lqFull() { return loads >= (LQEntries - 1); }
+ bool lqFull() { return loadQueue.full(); }
/** Returns if the SQ is full. */
- bool sqFull() { return stores >= (SQEntries - 1); }
+ bool sqFull() { return storeQueue.full(); }
/** Returns if the LQ is empty. */
bool lqEmpty() const { return loads == 0; }
@@ -226,13 +334,20 @@ class LSQUnit {
int numStoresToWB() { return storesToWB; }
/** Returns if the LSQ unit will writeback on this cycle. */
- bool willWB() { return storeQueue[storeWBIdx].canWB &&
- !storeQueue[storeWBIdx].completed &&
- !isStoreBlocked; }
+ bool
+ willWB()
+ {
+ return storeWBIt.dereferenceable() &&
+ storeWBIt->valid() &&
+ storeWBIt->canWB() &&
+ !storeWBIt->completed() &&
+ !isStoreBlocked;
+ }
/** Handles doing the retry. */
void recvRetry();
+ unsigned int cacheLineSize();
private:
/** Reset the LSQ state */
void resetState();
@@ -240,31 +355,31 @@ class LSQUnit {
/** Writes back the instruction, sending it to IEW. */
void writeback(const DynInstPtr &inst, PacketPtr pkt);
- /** Writes back a store that couldn't be completed the previous cycle. */
- void writebackPendingStore();
-
- /** Handles completing the send of a store to memory. */
- void storePostSend(PacketPtr pkt);
+ /** Try to finish a previously blocked write back attempt */
+ void writebackBlockedStore();
/** Completes the store at the specified index. */
- void completeStore(int store_idx);
-
- /** Attempts to send a store to the cache. */
- bool sendStore(PacketPtr data_pkt);
+ void completeStore(typename StoreQueue::iterator store_idx);
- /** Increments the given store index (circular queue). */
- inline void incrStIdx(int &store_idx) const;
- /** Decrements the given store index (circular queue). */
- inline void decrStIdx(int &store_idx) const;
- /** Increments the given load index (circular queue). */
- inline void incrLdIdx(int &load_idx) const;
- /** Decrements the given load index (circular queue). */
- inline void decrLdIdx(int &load_idx) const;
+ /** Handles completing the send of a store to memory. */
+ void storePostSend();
public:
+ /** Attempts to send a packet to the cache.
+ * Check if there are ports available. Return true if
+ * there are, false if there are not.
+ */
+ bool trySendPacket(bool isLoad, PacketPtr data_pkt);
+
+
/** Debugging function to dump instructions in the LSQ. */
void dumpInsts() const;
+ /** Schedule event for the cpu. */
+ void schedule(Event& ev, Tick when) { cpu->schedule(ev, when); }
+
+ BaseTLB* dTLB() { return cpu->dtb; }
+
private:
/** Pointer to the CPU. */
O3CPU *cpu;
@@ -278,44 +393,46 @@ class LSQUnit {
/** Pointer to the dcache port. Used only for sending. */
MasterPort *dcachePort;
- /** Derived class to hold any sender state the LSQ needs. */
- class LSQSenderState : public Packet::SenderState
+ /** Particularisation of the LSQSenderState to the LQ. */
+ class LQSenderState : public LSQSenderState
{
+ using LSQSenderState::alive;
public:
- /** Default constructor. */
- LSQSenderState()
- : mainPkt(NULL), pendingPacket(NULL), idx(0), outstanding(1),
- isLoad(false), noWB(false), isSplit(false),
- pktToSend(false), cacheBlocked(false)
- { }
-
- /** Instruction who initiated the access to memory. */
- DynInstPtr inst;
- /** The main packet from a split load, used during writeback. */
- PacketPtr mainPkt;
- /** A second packet from a split store that needs sending. */
- PacketPtr pendingPacket;
- /** The LQ/SQ index of the instruction. */
- uint8_t idx;
- /** Number of outstanding packets to complete. */
- uint8_t outstanding;
- /** Whether or not it is a load. */
- bool isLoad;
- /** Whether or not the instruction will need to writeback. */
- bool noWB;
- /** Whether or not this access is split in two. */
- bool isSplit;
- /** Whether or not there is a packet that needs sending. */
- bool pktToSend;
- /** Whether or not the second packet of this split load was blocked */
- bool cacheBlocked;
-
- /** Completes a packet and returns whether the access is finished. */
- inline bool complete() { return --outstanding == 0; }
+ LQSenderState(typename LoadQueue::iterator idx_)
+ : LSQSenderState(idx_->request(), true), idx(idx_) { }
+
+ /** The LQ index of the instruction. */
+ typename LoadQueue::iterator idx;
+ //virtual LSQRequest* request() { return idx->request(); }
+ virtual void
+ complete()
+ {
+ //if (alive())
+ // idx->request()->senderState(nullptr);
+ }
+ };
+
+ /** Particularisation of the LSQSenderState to the SQ. */
+ class SQSenderState : public LSQSenderState
+ {
+ using LSQSenderState::alive;
+ public:
+ SQSenderState(typename StoreQueue::iterator idx_)
+ : LSQSenderState(idx_->request(), false), idx(idx_) { }
+ /** The SQ index of the instruction. */
+ typename StoreQueue::iterator idx;
+ //virtual LSQRequest* request() { return idx->request(); }
+ virtual void
+ complete()
+ {
+ //if (alive())
+ // idx->request()->senderState(nullptr);
+ }
};
/** Writeback event, specifically for when stores forward data to loads. */
- class WritebackEvent : public Event {
+ class WritebackEvent : public Event
+ {
public:
/** Constructs a writeback event. */
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt,
@@ -339,72 +456,25 @@ class LSQUnit {
};
public:
- struct SQEntry {
- /** Constructs an empty store queue entry. */
- SQEntry()
- : inst(NULL), req(NULL), size(0),
- canWB(0), committed(0), completed(0)
- {
- std::memset(data, 0, sizeof(data));
- }
-
- ~SQEntry()
- {
- inst = NULL;
- }
-
- /** Constructs a store queue entry for a given instruction. */
- SQEntry(const DynInstPtr &_inst)
- : inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0),
- isSplit(0), canWB(0), committed(0), completed(0), isAllZeros(0)
- {
- std::memset(data, 0, sizeof(data));
- }
- /** The store data. */
- char data[16];
- /** The store instruction. */
- DynInstPtr inst;
- /** The request for the store. */
- RequestPtr req;
- /** The split requests for the store. */
- RequestPtr sreqLow;
- RequestPtr sreqHigh;
- /** The size of the store. */
- uint8_t size;
- /** Whether or not the store is split into two requests. */
- bool isSplit;
- /** Whether or not the store can writeback. */
- bool canWB;
- /** Whether or not the store is committed. */
- bool committed;
- /** Whether or not the store is completed. */
- bool completed;
- /** Does this request write all zeros and thus doesn't
- * have any data attached to it. Used for cache block zero
- * style instructs (ARM DC ZVA; ALPHA WH64)
- */
- bool isAllZeros;
- };
+ /**
+ * Handles writing back and completing the load or store that has
+ * returned from memory.
+ *
+ * @param pkt Response packet from the memory sub-system
+ */
+ bool recvTimingResp(PacketPtr pkt);
private:
/** The LSQUnit thread id. */
ThreadID lsqID;
-
+ public:
/** The store queue. */
- std::vector<SQEntry> storeQueue;
+ CircularQueue<SQEntry> storeQueue;
/** The load queue. */
- std::vector<DynInstPtr> loadQueue;
-
- /** The number of LQ entries, plus a sentinel entry (circular queue).
- * @todo: Consider having var that records the true number of LQ entries.
- */
- unsigned LQEntries;
- /** The number of SQ entries, plus a sentinel entry (circular queue).
- * @todo: Consider having var that records the true number of SQ entries.
- */
- unsigned SQEntries;
+ LoadQueue loadQueue;
+ private:
/** The number of places to shift addresses in the LSQ before checking
* for dependency violations
*/
@@ -420,28 +490,10 @@ class LSQUnit {
/** The number of store instructions in the SQ waiting to writeback. */
int storesToWB;
- /** The index of the head instruction in the LQ. */
- int loadHead;
- /** The index of the tail instruction in the LQ. */
- int loadTail;
-
- /** The index of the head instruction in the SQ. */
- int storeHead;
/** The index of the first instruction that may be ready to be
* written back, and has not yet been written back.
*/
- int storeWBIdx;
- /** The index of the tail instruction in the SQ. */
- int storeTail;
-
- /// @todo Consider moving to a more advanced model with write vs read ports
- /** The number of cache ports available each cycle (stores only). */
- int cacheStorePorts;
-
- /** The number of used cache ports in this cycle by stores. */
- int usedStorePorts;
-
- //list<InstSeqNum> mshrSeqNums;
+ typename StoreQueue::iterator storeWBIt;
/** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
Addr cacheBlockMask;
@@ -472,10 +524,10 @@ class LSQUnit {
/** Whether or not there is a packet that couldn't be sent because of
* a lack of cache ports. */
- bool hasPendingPkt;
+ bool hasPendingRequest;
/** The packet that is pending free cache ports. */
- PacketPtr pendingPkt;
+ LSQRequest* pendingRequest;
/** Flag for memory model. */
bool needsTSO;
@@ -516,53 +568,51 @@ class LSQUnit {
public:
/** Executes the load at the given index. */
- Fault read(const RequestPtr &req,
- RequestPtr &sreqLow, RequestPtr &sreqHigh,
- int load_idx);
+ Fault read(LSQRequest *req, int load_idx);
/** Executes the store at the given index. */
- Fault write(const RequestPtr &req,
- const RequestPtr &sreqLow, const RequestPtr &sreqHigh,
- uint8_t *data, int store_idx);
+ Fault write(LSQRequest *req, uint8_t *data, int store_idx);
/** Returns the index of the head load instruction. */
- int getLoadHead() { return loadHead; }
+ int getLoadHead() { return loadQueue.head(); }
+
/** Returns the sequence number of the head load instruction. */
- InstSeqNum getLoadHeadSeqNum()
+ InstSeqNum
+ getLoadHeadSeqNum()
{
- if (loadQueue[loadHead]) {
- return loadQueue[loadHead]->seqNum;
- } else {
- return 0;
- }
-
+ return loadQueue.front().valid()
+ ? loadQueue.front().instruction()->seqNum
+ : 0;
}
/** Returns the index of the head store instruction. */
- int getStoreHead() { return storeHead; }
+ int getStoreHead() { return storeQueue.head(); }
/** Returns the sequence number of the head store instruction. */
- InstSeqNum getStoreHeadSeqNum()
+ InstSeqNum
+ getStoreHeadSeqNum()
{
- if (storeQueue[storeHead].inst) {
- return storeQueue[storeHead].inst->seqNum;
- } else {
- return 0;
- }
-
+ return storeQueue.front().valid()
+ ? storeQueue.front().instruction()->seqNum
+ : 0;
}
/** Returns whether or not the LSQ unit is stalled. */
bool isStalled() { return stalled; }
+ public:
+ typedef typename CircularQueue<LQEntry>::iterator LQIterator;
+ typedef typename CircularQueue<SQEntry>::iterator SQIterator;
+ typedef CircularQueue<LQEntry> LQueue;
+ typedef CircularQueue<SQEntry> SQueue;
};
template <class Impl>
Fault
-LSQUnit<Impl>::read(const RequestPtr &req,
- RequestPtr &sreqLow, RequestPtr &sreqHigh,
- int load_idx)
+LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
{
- DynInstPtr load_inst = loadQueue[load_idx];
+ LQEntry& load_req = loadQueue[load_idx];
+ const DynInstPtr& load_inst = load_req.instruction();
+ load_req.setRequest(req);
assert(load_inst);
assert(!load_inst->isExecuted());
@@ -571,184 +621,188 @@ LSQUnit<Impl>::read(const RequestPtr &req,
// A bit of a hackish way to get strictly ordered accesses to work
// only if they're at the head of the LSQ and are ready to commit
// (at the head of the ROB too).
- if (req->isStrictlyOrdered() &&
- (load_idx != loadHead || !load_inst->isAtCommit())) {
+
+ if (req->mainRequest()->isStrictlyOrdered() &&
+ (load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
+ // Tell IQ/mem dep unit that this instruction will need to be
+ // rescheduled eventually
iewStage->rescheduleMemInst(load_inst);
+ load_inst->clearIssued();
+ load_inst->effAddrValid(false);
++lsqRescheduledLoads;
DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
load_inst->seqNum, load_inst->pcState());
+ // Must delete request now that it wasn't handed off to
+ // memory. This is quite ugly. @todo: Figure out the proper
+ // place to really handle request deletes.
+ load_req.setRequest(nullptr);
+ req->discard();
return std::make_shared<GenericISA::M5PanicFault>(
"Strictly ordered load [sn:%llx] PC %s\n",
load_inst->seqNum, load_inst->pcState());
}
- // Check the SQ for any previous stores that might lead to forwarding
- int store_idx = load_inst->sqIdx;
-
- int store_size = 0;
-
DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
"storeHead: %i addr: %#x%s\n",
- load_idx, store_idx, storeHead, req->getPaddr(),
- sreqLow ? " split" : "");
+ load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
+ req->mainRequest()->getPaddr(), req->isSplit() ? " split" : "");
- if (req->isLLSC()) {
- assert(!sreqLow);
+ if (req->mainRequest()->isLLSC()) {
// Disable recording the result temporarily. Writing to misc
// regs normally updates the result, but this is not the
// desired behavior when handling store conditionals.
load_inst->recordResult(false);
- TheISA::handleLockedRead(load_inst.get(), req);
+ TheISA::handleLockedRead(load_inst.get(), req->mainRequest());
load_inst->recordResult(true);
}
- if (req->isMmappedIpr()) {
+ if (req->mainRequest()->isMmappedIpr()) {
assert(!load_inst->memData);
load_inst->memData = new uint8_t[64];
ThreadContext *thread = cpu->tcBase(lsqID);
- Cycles delay(0);
- PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
-
- data_pkt->dataStatic(load_inst->memData);
- if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
- delay = TheISA::handleIprRead(thread, data_pkt);
- } else {
- assert(sreqLow->isMmappedIpr() && sreqHigh->isMmappedIpr());
- PacketPtr fst_data_pkt = new Packet(sreqLow, MemCmd::ReadReq);
- PacketPtr snd_data_pkt = new Packet(sreqHigh, MemCmd::ReadReq);
-
- fst_data_pkt->dataStatic(load_inst->memData);
- snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
-
- delay = TheISA::handleIprRead(thread, fst_data_pkt);
- Cycles delay2 = TheISA::handleIprRead(thread, snd_data_pkt);
- if (delay2 > delay)
- delay = delay2;
-
- delete fst_data_pkt;
- delete snd_data_pkt;
- }
- WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
+ PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq);
+
+ Cycles delay = req->handleIprRead(thread, main_pkt);
+
+ WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);
cpu->schedule(wb, cpu->clockEdge(delay));
return NoFault;
}
- while (store_idx != -1) {
- // End once we've reached the top of the LSQ
- if (store_idx == storeWBIdx) {
- break;
- }
-
+ // Check the SQ for any previous stores that might lead to forwarding
+ auto store_it = load_inst->sqIt;
+ assert (store_it >= storeWBIt);
+ // End once we've reached the top of the LSQ
+ while (store_it != storeWBIt) {
// Move the index to one younger
- if (--store_idx < 0)
- store_idx += SQEntries;
-
- assert(storeQueue[store_idx].inst);
-
- store_size = storeQueue[store_idx].size;
-
- if (!store_size || storeQueue[store_idx].inst->strictlyOrdered() ||
- (storeQueue[store_idx].req &&
- storeQueue[store_idx].req->isCacheMaintenance())) {
- // Cache maintenance instructions go down via the store
- // path but they carry no data and they shouldn't be
- // considered for forwarding
- continue;
- }
-
- assert(storeQueue[store_idx].inst->effAddrValid());
-
- // Check if the store data is within the lower and upper bounds of
- // addresses that the request needs.
- bool store_has_lower_limit =
- req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
- bool store_has_upper_limit =
- (req->getVaddr() + req->getSize()) <=
- (storeQueue[store_idx].inst->effAddr + store_size);
- bool lower_load_has_store_part =
- req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
- store_size);
- bool upper_load_has_store_part =
- (req->getVaddr() + req->getSize()) >
- storeQueue[store_idx].inst->effAddr;
-
- // If the store's data has all of the data needed and the load isn't
- // LLSC, we can forward.
- if (store_has_lower_limit && store_has_upper_limit && !req->isLLSC()) {
- // Get shift amount for offset into the store's data.
- int shift_amt = req->getVaddr() - storeQueue[store_idx].inst->effAddr;
-
- // Allocate memory if this is the first time a load is issued.
- if (!load_inst->memData) {
- load_inst->memData = new uint8_t[req->getSize()];
- }
- if (storeQueue[store_idx].isAllZeros)
- memset(load_inst->memData, 0, req->getSize());
- else
- memcpy(load_inst->memData,
- storeQueue[store_idx].data + shift_amt, req->getSize());
-
- DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
- "addr %#x\n", store_idx, req->getVaddr());
-
- PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
- data_pkt->dataStatic(load_inst->memData);
-
- WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
-
- // We'll say this has a 1 cycle load-store forwarding latency
- // for now.
- // @todo: Need to make this a parameter.
- cpu->schedule(wb, curTick());
-
- ++lsqForwLoads;
- return NoFault;
- } else if (
- (!req->isLLSC() &&
+ store_it--;
+ assert(store_it->valid());
+ assert(store_it->instruction()->seqNum < load_inst->seqNum);
+ int store_size = store_it->size();
+
+ // Cache maintenance instructions go down via the store
+ // path but they carry no data and they shouldn't be
+ // considered for forwarding
+ if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
+ !(store_it->request()->mainRequest() &&
+ store_it->request()->mainRequest()->isCacheMaintenance())) {
+ assert(store_it->instruction()->effAddrValid());
+
+ // Check if the store data is within the lower and upper bounds of
+ // addresses that the request needs.
+ auto req_s = req->mainRequest()->getVaddr();
+ auto req_e = req_s + req->mainRequest()->getSize();
+ auto st_s = store_it->instruction()->effAddr;
+ auto st_e = st_s + store_size;
+
+ bool store_has_lower_limit = req_s >= st_s;
+ bool store_has_upper_limit = req_e <= st_e;
+ bool lower_load_has_store_part = req_s < st_e;
+ bool upper_load_has_store_part = req_e > st_s;
+
+ // If the store's data has all of the data needed and the load
+ // isn't LLSC then
+ // we can forward.
+ if (store_has_lower_limit && store_has_upper_limit &&
+ !req->mainRequest()->isLLSC()) {
+
+ // Get shift amount for offset into the store's data.
+ int shift_amt = req->mainRequest()->getVaddr() -
+ store_it->instruction()->effAddr;
+
+ // Allocate memory if this is the first time a load is issued.
+ if (!load_inst->memData) {
+ load_inst->memData =
+ new uint8_t[req->mainRequest()->getSize()];
+ }
+ if (store_it->isAllZeros())
+ memset(load_inst->memData, 0,
+ req->mainRequest()->getSize());
+ else
+ memcpy(load_inst->memData,
+ store_it->data() + shift_amt,
+ req->mainRequest()->getSize());
+
+ DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
+ "addr %#x\n", store_it._idx,
+ req->mainRequest()->getVaddr());
+
+ PacketPtr data_pkt = new Packet(req->mainRequest(),
+ MemCmd::ReadReq);
+ data_pkt->dataStatic(load_inst->memData);
+
+ if (req->isAnyOutstandingRequest()) {
+ assert(req->_numOutstandingPackets > 0);
+ // There are memory requests packets in flight already.
+ // This may happen if the store was not complete the
+ // first time this load got executed. Signal the senderSate
+ // that response packets should be discarded.
+ req->discardSenderState();
+ }
+
+ WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt,
+ this);
+
+ // We'll say this has a 1 cycle load-store forwarding latency
+ // for now.
+ // @todo: Need to make this a parameter.
+ cpu->schedule(wb, curTick());
+
+ // Don't need to do anything special for split loads.
+ ++lsqForwLoads;
+
+ return NoFault;
+ } else if (
+ (!req->mainRequest()->isLLSC() &&
((store_has_lower_limit && lower_load_has_store_part) ||
(store_has_upper_limit && upper_load_has_store_part) ||
(lower_load_has_store_part && upper_load_has_store_part))) ||
- (req->isLLSC() &&
+ (req->mainRequest()->isLLSC() &&
((store_has_lower_limit || upper_load_has_store_part) &&
(store_has_upper_limit || lower_load_has_store_part)))) {
- // This is the partial store-load forwarding case where a store
- // has only part of the load's data and the load isn't LLSC or
- // the load is LLSC and the store has all or part of the load's
- // data
-
- // If it's already been written back, then don't worry about
- // stalling on it.
- if (storeQueue[store_idx].completed) {
- panic("Should not check one of these");
- continue;
+ // This is the partial store-load forwarding case where a store
+ // has only part of the load's data and the load isn't LLSC or
+ // the load is LLSC and the store has all or part of the load's
+ // data
+
+ // If it's already been written back, then don't worry about
+ // stalling on it.
+ if (store_it->completed()) {
+ panic("Should not check one of these");
+ continue;
+ }
+
+ // Must stall load and force it to retry, so long as it's the
+ // oldest load that needs to do so.
+ if (!stalled ||
+ (stalled &&
+ load_inst->seqNum <
+ loadQueue[stallingLoadIdx].instruction()->seqNum)) {
+ stalled = true;
+ stallingStoreIsn = store_it->instruction()->seqNum;
+ stallingLoadIdx = load_idx;
+ }
+
+ // Tell IQ/mem dep unit that this instruction will need to be
+ // rescheduled eventually
+ iewStage->rescheduleMemInst(load_inst);
+ load_inst->clearIssued();
+ load_inst->effAddrValid(false);
+ ++lsqRescheduledLoads;
+
+ // Do not generate a writeback event as this instruction is not
+ // complete.
+ DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
+ "Store idx %i to load addr %#x\n",
+ store_it._idx, req->mainRequest()->getVaddr());
+
+ // Must discard the request.
+ req->discard();
+ load_req.setRequest(nullptr);
+ return NoFault;
}
-
- // Must stall load and force it to retry, so long as it's the oldest
- // load that needs to do so.
- if (!stalled ||
- (stalled &&
- load_inst->seqNum <
- loadQueue[stallingLoadIdx]->seqNum)) {
- stalled = true;
- stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
- stallingLoadIdx = load_idx;
- }
-
- // Tell IQ/mem dep unit that this instruction will need to be
- // rescheduled eventually
- iewStage->rescheduleMemInst(load_inst);
- load_inst->clearIssued();
- ++lsqRescheduledLoads;
-
- // Do not generate a writeback event as this instruction is not
- // complete.
- DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
- "Store idx %i to load addr %#x\n",
- store_idx, req->getVaddr());
-
- return NoFault;
}
}
@@ -758,40 +812,7 @@ LSQUnit<Impl>::read(const RequestPtr &req,
// Allocate memory if this is the first time a load is issued.
if (!load_inst->memData) {
- load_inst->memData = new uint8_t[req->getSize()];
- }
-
- // if we the cache is not blocked, do cache access
- bool completedFirst = false;
- PacketPtr data_pkt = Packet::createRead(req);
- PacketPtr fst_data_pkt = NULL;
- PacketPtr snd_data_pkt = NULL;
-
- data_pkt->dataStatic(load_inst->memData);
-
- LSQSenderState *state = new LSQSenderState;
- state->isLoad = true;
- state->idx = load_idx;
- state->inst = load_inst;
- data_pkt->senderState = state;
-
- if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
- // Point the first packet at the main data packet.
- fst_data_pkt = data_pkt;
- } else {
- // Create the split packets.
- fst_data_pkt = Packet::createRead(sreqLow);
- snd_data_pkt = Packet::createRead(sreqHigh);
-
- fst_data_pkt->dataStatic(load_inst->memData);
- snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
-
- fst_data_pkt->senderState = state;
- snd_data_pkt->senderState = state;
-
- state->isSplit = true;
- state->outstanding = 2;
- state->mainPkt = data_pkt;
+ load_inst->memData = new uint8_t[req->mainRequest()->getSize()];
}
// For now, load throughput is constrained by the number of
@@ -799,97 +820,46 @@ LSQUnit<Impl>::read(const RequestPtr &req,
// stores do).
// @todo We should account for cache port contention
// and arbitrate between loads and stores.
- bool successful_load = true;
- if (!dcachePort->sendTimingReq(fst_data_pkt)) {
- successful_load = false;
- } else if (TheISA::HasUnalignedMemAcc && sreqLow) {
- completedFirst = true;
-
- // The first packet was sent without problems, so send this one
- // too. If there is a problem with this packet then the whole
- // load will be squashed, so indicate this to the state object.
- // The first packet will return in completeDataAccess and be
- // handled there.
- // @todo We should also account for cache port contention
- // here.
- if (!dcachePort->sendTimingReq(snd_data_pkt)) {
- // The main packet will be deleted in completeDataAccess.
- state->complete();
- // Signify to 1st half that the 2nd half was blocked via state
- state->cacheBlocked = true;
- successful_load = false;
- }
- }
-
- // If the cache was blocked, or has become blocked due to the access,
- // handle it.
- if (!successful_load) {
- if (!sreqLow) {
- // Packet wasn't split, just delete main packet info
- delete state;
- delete data_pkt;
- }
-
- if (TheISA::HasUnalignedMemAcc && sreqLow) {
- if (!completedFirst) {
- // Split packet, but first failed. Delete all state.
- delete state;
- delete data_pkt;
- delete fst_data_pkt;
- delete snd_data_pkt;
- sreqLow.reset();
- sreqHigh.reset();
- } else {
- // Can't delete main packet data or state because first packet
- // was sent to the memory system
- delete data_pkt;
- delete snd_data_pkt;
- sreqHigh.reset();
- }
- }
-
- ++lsqCacheBlocked;
-
- iewStage->blockMemInst(load_inst);
- // No fault occurred, even though the interface is blocked.
- return NoFault;
+ // if we the cache is not blocked, do cache access
+ if (req->senderState() == nullptr) {
+ LQSenderState *state = new LQSenderState(
+ loadQueue.getIterator(load_idx));
+ state->isLoad = true;
+ state->inst = load_inst;
+ state->isSplit = req->isSplit();
+ req->senderState(state);
}
+ req->buildPackets();
+ req->sendPacketToCache();
+ if (!req->isSent())
+ iewStage->blockMemInst(load_inst);
return NoFault;
}
template <class Impl>
Fault
-LSQUnit<Impl>::write(const RequestPtr &req,
- const RequestPtr &sreqLow, const RequestPtr &sreqHigh,
- uint8_t *data, int store_idx)
+LSQUnit<Impl>::write(LSQRequest *req, uint8_t *data, int store_idx)
{
- assert(storeQueue[store_idx].inst);
-
- DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x"
- " | storeHead:%i [sn:%i]\n",
- store_idx, req->getPaddr(), storeHead,
- storeQueue[store_idx].inst->seqNum);
-
- storeQueue[store_idx].req = req;
- storeQueue[store_idx].sreqLow = sreqLow;
- storeQueue[store_idx].sreqHigh = sreqHigh;
- unsigned size = req->getSize();
- storeQueue[store_idx].size = size;
- bool store_no_data = req->getFlags() & Request::STORE_NO_DATA;
- storeQueue[store_idx].isAllZeros = store_no_data;
- assert(size <= sizeof(storeQueue[store_idx].data) || store_no_data);
-
- // Split stores can only occur in ISAs with unaligned memory accesses. If
- // a store request has been split, sreqLow and sreqHigh will be non-null.
- if (TheISA::HasUnalignedMemAcc && sreqLow) {
- storeQueue[store_idx].isSplit = true;
- }
-
- if (!(req->getFlags() & Request::CACHE_BLOCK_ZERO) && \
- !req->isCacheMaintenance())
- memcpy(storeQueue[store_idx].data, data, size);
+ assert(storeQueue[store_idx].valid());
+
+ DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i "
+ "[sn:%i]\n",
+ store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1,
+ storeQueue[store_idx].instruction()->seqNum);
+
+ storeQueue[store_idx].setRequest(req);
+ unsigned size = req->_size;
+ storeQueue[store_idx].size() = size;
+ bool store_no_data =
+ req->mainRequest()->getFlags() & Request::STORE_NO_DATA;
+ storeQueue[store_idx].isAllZeros() = store_no_data;
+ assert(size <= SQEntry::DataSize || store_no_data);
+
+ if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
+ !req->request()->isCacheMaintenance())
+ memcpy(storeQueue[store_idx].data(), data, size);
// This function only writes the data to the store queue, so no fault
// can happen here.
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 13b148768..9756a9ef1 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2010-2014, 2017 ARM Limited
+ * Copyright (c) 2010-2014, 2017-2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -66,6 +66,8 @@ LSQUnit<Impl>::WritebackEvent::WritebackEvent(const DynInstPtr &_inst,
: Event(Default_Pri, AutoDelete),
inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
{
+ assert(_inst->savedReq);
+ _inst->savedReq->writebackScheduled();
}
template<class Impl>
@@ -76,9 +78,8 @@ LSQUnit<Impl>::WritebackEvent::process()
lsqPtr->writeback(inst, pkt);
- if (pkt->senderState)
- delete pkt->senderState;
-
+ assert(inst->savedReq);
+ inst->savedReq->writebackDone();
delete pkt;
}
@@ -89,65 +90,61 @@ LSQUnit<Impl>::WritebackEvent::description() const
return "Store writeback";
}
+template <class Impl>
+bool
+LSQUnit<Impl>::recvTimingResp(PacketPtr pkt)
+{
+ auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ LSQRequest* req = senderState->request();
+ assert(req != nullptr);
+ bool ret = true;
+ /* Check that the request is still alive before any further action. */
+ if (senderState->alive()) {
+ ret = req->recvTimingResp(pkt);
+ } else {
+ senderState->outstanding--;
+ }
+ return ret;
+
+}
+
template<class Impl>
void
LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
{
LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
DynInstPtr inst = state->inst;
- DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum);
- DPRINTF(Activity, "Activity: Writeback event [sn:%lli].\n", inst->seqNum);
-
- if (state->cacheBlocked) {
- // This is the first half of a previous split load,
- // where the 2nd half blocked, ignore this response
- DPRINTF(IEW, "[sn:%lli]: Response from first half of earlier "
- "blocked split load recieved. Ignoring.\n", inst->seqNum);
- delete state;
- return;
- }
- // If this is a split access, wait until all packets are received.
- if (TheISA::HasUnalignedMemAcc && !state->complete()) {
- return;
- }
+ cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
+
+ /* Notify the sender state that the access is complete (for ownership
+ * tracking). */
+ state->complete();
assert(!cpu->switchedOut());
if (!inst->isSquashed()) {
- if (!state->noWB) {
+ if (state->needWB) {
// Only loads and store conditionals perform the writeback
// after receving the response from the memory
assert(inst->isLoad() || inst->isStoreConditional());
- if (!TheISA::HasUnalignedMemAcc || !state->isSplit ||
- !state->isLoad) {
- writeback(inst, pkt);
- } else {
- writeback(inst, state->mainPkt);
+ writeback(inst, state->request()->mainPacket());
+ if (inst->isStore()) {
+ auto ss = dynamic_cast<SQSenderState*>(state);
+ ss->writebackDone();
+ completeStore(ss->idx);
}
- }
-
- if (inst->isStore()) {
- completeStore(state->idx);
+ } else if (inst->isStore()) {
+ completeStore(dynamic_cast<SQSenderState*>(state)->idx);
}
}
-
- if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) {
- delete state->mainPkt;
- }
-
- pkt->req->setAccessLatency();
- cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
-
- delete state;
}
template <class Impl>
LSQUnit<Impl>::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
: lsqID(-1), storeQueue(sqEntries+1), loadQueue(lqEntries+1),
- LQEntries(lqEntries+1), SQEntries(sqEntries+1),
loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
- isStoreBlocked(false), storeInFlight(false), hasPendingPkt(false),
- pendingPkt(nullptr)
+ isStoreBlocked(false), storeInFlight(false), hasPendingRequest(false),
+ pendingRequest(nullptr)
{
}
@@ -167,7 +164,6 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
depCheckShift = params->LSQDepCheckShift;
checkLoads = params->LSQCheckLoads;
- cacheStorePorts = params->cacheStorePorts;
needsTSO = params->needsTSO;
resetState();
@@ -180,11 +176,8 @@ LSQUnit<Impl>::resetState()
{
loads = stores = storesToWB = 0;
- loadHead = loadTail = 0;
-
- storeHead = storeWBIdx = storeTail = 0;
- usedStorePorts = 0;
+ storeWBIt = storeQueue.begin();
retryPkt = NULL;
memDepViolator = NULL;
@@ -259,24 +252,10 @@ LSQUnit<Impl>::setDcachePort(MasterPort *dcache_port)
template<class Impl>
void
-LSQUnit<Impl>::clearLQ()
-{
- loadQueue.clear();
-}
-
-template<class Impl>
-void
-LSQUnit<Impl>::clearSQ()
-{
- storeQueue.clear();
-}
-
-template<class Impl>
-void
LSQUnit<Impl>::drainSanityCheck() const
{
- for (int i = 0; i < loadQueue.size(); ++i)
- assert(!loadQueue[i]);
+ for (int i = 0; i < loadQueue.capacity(); ++i)
+ assert(!loadQueue[i].valid());
assert(storesToWB == 0);
assert(!retryPkt);
@@ -289,44 +268,6 @@ LSQUnit<Impl>::takeOverFrom()
resetState();
}
-template<class Impl>
-void
-LSQUnit<Impl>::resizeLQ(unsigned size)
-{
- unsigned size_plus_sentinel = size + 1;
- assert(size_plus_sentinel >= LQEntries);
-
- if (size_plus_sentinel > LQEntries) {
- while (size_plus_sentinel > loadQueue.size()) {
- DynInstPtr dummy;
- loadQueue.push_back(dummy);
- LQEntries++;
- }
- } else {
- LQEntries = size_plus_sentinel;
- }
-
- assert(LQEntries <= 256);
-}
-
-template<class Impl>
-void
-LSQUnit<Impl>::resizeSQ(unsigned size)
-{
- unsigned size_plus_sentinel = size + 1;
- if (size_plus_sentinel > SQEntries) {
- while (size_plus_sentinel > storeQueue.size()) {
- SQEntry dummy;
- storeQueue.push_back(dummy);
- SQEntries++;
- }
- } else {
- SQEntries = size_plus_sentinel;
- }
-
- assert(SQEntries <= 256);
-}
-
template <class Impl>
void
LSQUnit<Impl>::insert(const DynInstPtr &inst)
@@ -348,44 +289,42 @@ template <class Impl>
void
LSQUnit<Impl>::insertLoad(const DynInstPtr &load_inst)
{
- assert((loadTail + 1) % LQEntries != loadHead);
- assert(loads < LQEntries);
+ assert(!loadQueue.full());
+ assert(loads < loadQueue.capacity());
DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n",
- load_inst->pcState(), loadTail, load_inst->seqNum);
+ load_inst->pcState(), loadQueue.tail(), load_inst->seqNum);
- load_inst->lqIdx = loadTail;
+ /* Grow the queue. */
+ loadQueue.advance_tail();
- if (stores == 0) {
- load_inst->sqIdx = -1;
- } else {
- load_inst->sqIdx = storeTail;
- }
+ load_inst->sqIt = storeQueue.end();
- loadQueue[loadTail] = load_inst;
-
- incrLdIdx(loadTail);
+ assert(!loadQueue.back().valid());
+ loadQueue.back().set(load_inst);
+ load_inst->lqIdx = loadQueue.tail();
+ load_inst->lqIt = loadQueue.getIterator(load_inst->lqIdx);
++loads;
}
template <class Impl>
void
-LSQUnit<Impl>::insertStore(const DynInstPtr &store_inst)
+LSQUnit<Impl>::insertStore(const DynInstPtr& store_inst)
{
// Make sure it is not full before inserting an instruction.
- assert((storeTail + 1) % SQEntries != storeHead);
- assert(stores < SQEntries);
+ assert(!storeQueue.full());
+ assert(stores < storeQueue.capacity());
DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n",
- store_inst->pcState(), storeTail, store_inst->seqNum);
-
- store_inst->sqIdx = storeTail;
- store_inst->lqIdx = loadTail;
+ store_inst->pcState(), storeQueue.tail(), store_inst->seqNum);
+ storeQueue.advance_tail();
- storeQueue[storeTail] = SQEntry(store_inst);
+ store_inst->sqIdx = storeQueue.tail();
+ store_inst->lqIdx = loadQueue.moduloAdd(loadQueue.tail(), 1);
+ store_inst->lqIt = loadQueue.end();
- incrStIdx(storeTail);
+ storeQueue.back().set(store_inst);
++stores;
}
@@ -407,8 +346,9 @@ LSQUnit<Impl>::numFreeLoadEntries()
{
//LQ has an extra dummy entry to differentiate
//empty/full conditions. Subtract 1 from the free entries.
- DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n", LQEntries, loads);
- return LQEntries - loads - 1;
+ DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n",
+ 1 + loadQueue.capacity(), loads);
+ return loadQueue.capacity() - loads;
}
template <class Impl>
@@ -417,8 +357,9 @@ LSQUnit<Impl>::numFreeStoreEntries()
{
//SQ has an extra dummy entry to differentiate
//empty/full conditions. Subtract 1 from the free entries.
- DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n", SQEntries, stores);
- return SQEntries - stores - 1;
+ DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n",
+ 1 + storeQueue.capacity(), stores);
+ return storeQueue.capacity() - stores;
}
@@ -429,11 +370,8 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
// Should only ever get invalidations in here
assert(pkt->isInvalidate());
- int load_idx = loadHead;
DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
- // Only Invalidate packet calls checkSnoop
- assert(pkt->isInvalidate());
for (int x = 0; x < cpu->numContexts(); x++) {
ThreadContext *tc = cpu->getContext(x);
bool no_squash = cpu->thread[x]->noSquashFromTC;
@@ -442,44 +380,37 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
cpu->thread[x]->noSquashFromTC = no_squash;
}
- Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
+ if (loadQueue.empty())
+ return;
- DynInstPtr ld_inst = loadQueue[load_idx];
- if (ld_inst) {
- Addr load_addr_low = ld_inst->physEffAddrLow & cacheBlockMask;
- Addr load_addr_high = ld_inst->physEffAddrHigh & cacheBlockMask;
+ auto iter = loadQueue.begin();
- // Check that this snoop didn't just invalidate our lock flag
- if (ld_inst->effAddrValid() && (load_addr_low == invalidate_addr
- || load_addr_high == invalidate_addr)
- && ld_inst->memReqFlags & Request::LLSC)
- TheISA::handleLockedSnoopHit(ld_inst.get());
- }
+ Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
- // If this is the only load in the LSQ we don't care
- if (load_idx == loadTail)
- return;
+ DynInstPtr ld_inst = iter->instruction();
+ assert(ld_inst);
+ LSQRequest *req = iter->request();
- incrLdIdx(load_idx);
+ // Check that this snoop didn't just invalidate our lock flag
+ if (ld_inst->effAddrValid() &&
+ req->isCacheBlockHit(invalidate_addr, cacheBlockMask)
+ && ld_inst->memReqFlags & Request::LLSC)
+ TheISA::handleLockedSnoopHit(ld_inst.get());
bool force_squash = false;
- while (load_idx != loadTail) {
- DynInstPtr ld_inst = loadQueue[load_idx];
-
- if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
- incrLdIdx(load_idx);
+ while (++iter != loadQueue.end()) {
+ ld_inst = iter->instruction();
+ assert(ld_inst);
+ req = iter->request();
+ if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered())
continue;
- }
-
- Addr load_addr_low = ld_inst->physEffAddrLow & cacheBlockMask;
- Addr load_addr_high = ld_inst->physEffAddrHigh & cacheBlockMask;
- DPRINTF(LSQUnit, "-- inst [sn:%lli] load_addr: %#x to pktAddr:%#x\n",
- ld_inst->seqNum, load_addr_low, invalidate_addr);
+ DPRINTF(LSQUnit, "-- inst [sn:%lli] to pktAddr:%#x\n",
+ ld_inst->seqNum, invalidate_addr);
- if ((load_addr_low == invalidate_addr
- || load_addr_high == invalidate_addr) || force_squash) {
+ if (force_squash ||
+ req->isCacheBlockHit(invalidate_addr, cacheBlockMask)) {
if (needsTSO) {
// If we have a TSO system, as all loads must be ordered with
// all other loads, this load as well as *all* subsequent loads
@@ -508,14 +439,14 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
ld_inst->hitExternalSnoop(true);
}
}
- incrLdIdx(load_idx);
}
return;
}
template <class Impl>
Fault
-LSQUnit<Impl>::checkViolations(int load_idx, const DynInstPtr &inst)
+LSQUnit<Impl>::checkViolations(typename LoadQueue::iterator& loadIt,
+ const DynInstPtr& inst)
{
Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
@@ -525,10 +456,10 @@ LSQUnit<Impl>::checkViolations(int load_idx, const DynInstPtr &inst)
* all instructions that will execute before the store writes back. Thus,
* like the implementation that came before it, we're overly conservative.
*/
- while (load_idx != loadTail) {
- DynInstPtr ld_inst = loadQueue[load_idx];
+ while (loadIt != loadQueue.end()) {
+ DynInstPtr ld_inst = loadIt->instruction();
if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
- incrLdIdx(load_idx);
+ ++loadIt;
continue;
}
@@ -585,7 +516,7 @@ LSQUnit<Impl>::checkViolations(int load_idx, const DynInstPtr &inst)
}
}
- incrLdIdx(load_idx);
+ ++loadIt;
}
return NoFault;
}
@@ -608,8 +539,7 @@ LSQUnit<Impl>::executeLoad(const DynInstPtr &inst)
load_fault = inst->initiateAcc();
- if (inst->isTranslationDelayed() &&
- load_fault == NoFault)
+ if (inst->isTranslationDelayed() && load_fault == NoFault)
return load_fault;
// If the instruction faulted or predicated false, then we need to send it
@@ -631,12 +561,13 @@ LSQUnit<Impl>::executeLoad(const DynInstPtr &inst)
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
} else {
- assert(inst->effAddrValid());
- int load_idx = inst->lqIdx;
- incrLdIdx(load_idx);
+ if (inst->effAddrValid()) {
+ auto it = inst->lqIt;
+ ++it;
- if (checkLoads)
- return checkViolations(load_idx, inst);
+ if (checkLoads)
+ return checkViolations(it, inst);
+ }
}
return load_fault;
@@ -659,7 +590,7 @@ LSQUnit<Impl>::executeStore(const DynInstPtr &store_inst)
// Check the recently completed loads to see if any match this store's
// address. If so, then we have a memory ordering violation.
- int load_idx = store_inst->lqIdx;
+ typename LoadQueue::iterator loadIt = store_inst->lqIt;
Fault store_fault = store_inst->initiateAcc();
@@ -674,7 +605,7 @@ LSQUnit<Impl>::executeStore(const DynInstPtr &store_inst)
return store_fault;
}
- if (storeQueue[store_idx].size == 0) {
+ if (storeQueue[store_idx].size() == 0) {
DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n",
store_inst->pcState(), store_inst->seqNum);
@@ -686,12 +617,12 @@ LSQUnit<Impl>::executeStore(const DynInstPtr &store_inst)
if (store_inst->isStoreConditional()) {
// Store conditionals need to set themselves as able to
// writeback if we haven't had a fault by here.
- storeQueue[store_idx].canWB = true;
+ storeQueue[store_idx].canWB() = true;
++storesToWB;
}
- return checkViolations(load_idx, store_inst);
+ return checkViolations(loadIt, store_inst);
}
@@ -699,14 +630,13 @@ template <class Impl>
void
LSQUnit<Impl>::commitLoad()
{
- assert(loadQueue[loadHead]);
+ assert(loadQueue.front().valid());
DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n",
- loadQueue[loadHead]->pcState());
-
- loadQueue[loadHead] = NULL;
+ loadQueue.front().instruction()->pcState());
- incrLdIdx(loadHead);
+ loadQueue.front().clear();
+ loadQueue.pop_front();
--loads;
}
@@ -715,9 +645,10 @@ template <class Impl>
void
LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
{
- assert(loads == 0 || loadQueue[loadHead]);
+ assert(loads == 0 || loadQueue.front().valid());
- while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
+ while (loads != 0 && loadQueue.front().instruction()->seqNum
+ <= youngest_inst) {
commitLoad();
}
}
@@ -726,45 +657,37 @@ template <class Impl>
void
LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
{
- assert(stores == 0 || storeQueue[storeHead].inst);
-
- int store_idx = storeHead;
+ assert(stores == 0 || storeQueue.front().valid());
- while (store_idx != storeTail) {
- assert(storeQueue[store_idx].inst);
+ /* Forward iterate the store queue (age order). */
+ for (auto& x : storeQueue) {
+ assert(x.valid());
// Mark any stores that are now committed and have not yet
// been marked as able to write back.
- if (!storeQueue[store_idx].canWB) {
- if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
+ if (!x.canWB()) {
+ if (x.instruction()->seqNum > youngest_inst) {
break;
}
DPRINTF(LSQUnit, "Marking store as able to write back, PC "
"%s [sn:%lli]\n",
- storeQueue[store_idx].inst->pcState(),
- storeQueue[store_idx].inst->seqNum);
+ x.instruction()->pcState(),
+ x.instruction()->seqNum);
- storeQueue[store_idx].canWB = true;
+ x.canWB() = true;
++storesToWB;
}
-
- incrStIdx(store_idx);
}
}
template <class Impl>
void
-LSQUnit<Impl>::writebackPendingStore()
+LSQUnit<Impl>::writebackBlockedStore()
{
- if (hasPendingPkt) {
- assert(pendingPkt != NULL);
-
- // If the cache is blocked, this will store the packet for retry.
- if (sendStore(pendingPkt)) {
- storePostSend(pendingPkt);
- }
- pendingPkt = NULL;
- hasPendingPkt = false;
+ assert(isStoreBlocked);
+ storeWBIt->request()->sendPacketToCache();
+ if (storeWBIt->request()->isSent()){
+ storePostSend();
}
}
@@ -772,18 +695,17 @@ template <class Impl>
void
LSQUnit<Impl>::writebackStores()
{
- // First writeback the second packet from any split store that didn't
- // complete last cycle because there weren't enough cache ports available.
- if (TheISA::HasUnalignedMemAcc) {
- writebackPendingStore();
+ if (isStoreBlocked) {
+ DPRINTF(LSQUnit, "Writing back blocked store\n");
+ writebackBlockedStore();
}
while (storesToWB > 0 &&
- storeWBIdx != storeTail &&
- storeQueue[storeWBIdx].inst &&
- storeQueue[storeWBIdx].canWB &&
+ storeWBIt.dereferenceable() &&
+ storeWBIt->valid() &&
+ storeWBIt->canWB() &&
((!needsTSO) || (!storeInFlight)) &&
- usedStorePorts < cacheStorePorts) {
+ lsq->storePortAvailable()) {
if (isStoreBlocked) {
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
@@ -793,188 +715,112 @@ LSQUnit<Impl>::writebackStores()
// Store didn't write any data so no need to write it back to
// memory.
- if (storeQueue[storeWBIdx].size == 0) {
- completeStore(storeWBIdx);
-
- incrStIdx(storeWBIdx);
-
+ if (storeWBIt->size() == 0) {
+ /* It is important that the preincrement happens at (or before)
+ * the call, as the the code of completeStore checks
+ * storeWBIt. */
+ completeStore(storeWBIt++);
continue;
}
- ++usedStorePorts;
-
- if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
- incrStIdx(storeWBIdx);
-
+ if (storeWBIt->instruction()->isDataPrefetch()) {
+ storeWBIt++;
continue;
}
- assert(storeQueue[storeWBIdx].req);
- assert(!storeQueue[storeWBIdx].committed);
-
- if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) {
- assert(storeQueue[storeWBIdx].sreqLow);
- assert(storeQueue[storeWBIdx].sreqHigh);
- }
-
- DynInstPtr inst = storeQueue[storeWBIdx].inst;
-
- RequestPtr &req = storeQueue[storeWBIdx].req;
- const RequestPtr &sreqLow = storeQueue[storeWBIdx].sreqLow;
- const RequestPtr &sreqHigh = storeQueue[storeWBIdx].sreqHigh;
+ assert(storeWBIt->hasRequest());
+ assert(!storeWBIt->committed());
- storeQueue[storeWBIdx].committed = true;
+ DynInstPtr inst = storeWBIt->instruction();
+ LSQRequest* req = storeWBIt->request();
+ storeWBIt->committed() = true;
assert(!inst->memData);
- inst->memData = new uint8_t[req->getSize()];
+ inst->memData = new uint8_t[req->_size];
- if (storeQueue[storeWBIdx].isAllZeros)
- memset(inst->memData, 0, req->getSize());
+ if (storeWBIt->isAllZeros())
+ memset(inst->memData, 0, req->_size);
else
- memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize());
+ memcpy(inst->memData, storeWBIt->data(), req->_size);
- PacketPtr data_pkt;
- PacketPtr snd_data_pkt = NULL;
- LSQSenderState *state = new LSQSenderState;
- state->isLoad = false;
- state->idx = storeWBIdx;
- state->inst = inst;
+ if (req->senderState() == nullptr) {
+ SQSenderState *state = new SQSenderState(storeWBIt);
+ state->isLoad = false;
+ state->needWB = false;
+ state->inst = inst;
- if (!TheISA::HasUnalignedMemAcc || !storeQueue[storeWBIdx].isSplit) {
-
- // Build a single data packet if the store isn't split.
- data_pkt = Packet::createWrite(req);
- data_pkt->dataStatic(inst->memData);
- data_pkt->senderState = state;
- } else {
- // Create two packets if the store is split in two.
- data_pkt = Packet::createWrite(sreqLow);
- snd_data_pkt = Packet::createWrite(sreqHigh);
-
- data_pkt->dataStatic(inst->memData);
- snd_data_pkt->dataStatic(inst->memData + sreqLow->getSize());
-
- data_pkt->senderState = state;
- snd_data_pkt->senderState = state;
-
- state->isSplit = true;
- state->outstanding = 2;
-
- // Can delete the main request now.
- req = sreqLow;
+ req->senderState(state);
+ if (inst->isStoreConditional()) {
+ /* Only store conditionals need a writeback. */
+ state->needWB = true;
+ }
}
+ req->buildPackets();
DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s "
"to Addr:%#x, data:%#x [sn:%lli]\n",
- storeWBIdx, inst->pcState(),
- req->getPaddr(), (int)*(inst->memData),
+ storeWBIt.idx(), inst->pcState(),
+ req->request()->getPaddr(), (int)*(inst->memData),
inst->seqNum);
// @todo: Remove this SC hack once the memory system handles it.
if (inst->isStoreConditional()) {
- assert(!storeQueue[storeWBIdx].isSplit);
// Disable recording the result temporarily. Writing to
// misc regs normally updates the result, but this is not
// the desired behavior when handling store conditionals.
inst->recordResult(false);
- bool success = TheISA::handleLockedWrite(inst.get(), req, cacheBlockMask);
+ bool success = TheISA::handleLockedWrite(inst.get(),
+ req->request(), cacheBlockMask);
inst->recordResult(true);
+ req->packetSent();
if (!success) {
+ req->complete();
// Instantly complete this store.
DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. "
"Instantly completing it.\n",
inst->seqNum);
- WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this);
+ PacketPtr new_pkt = new Packet(*req->packet());
+ WritebackEvent *wb = new WritebackEvent(inst,
+ new_pkt, this);
cpu->schedule(wb, curTick() + 1);
- completeStore(storeWBIdx);
- incrStIdx(storeWBIdx);
+ completeStore(storeWBIt);
+ if (!storeQueue.empty())
+ storeWBIt++;
+ else
+ storeWBIt = storeQueue.end();
continue;
}
- } else {
- // Non-store conditionals do not need a writeback.
- state->noWB = true;
}
- bool split =
- TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit;
-
- ThreadContext *thread = cpu->tcBase(lsqID);
-
- if (req->isMmappedIpr()) {
+ if (req->request()->isMmappedIpr()) {
assert(!inst->isStoreConditional());
- TheISA::handleIprWrite(thread, data_pkt);
- delete data_pkt;
- if (split) {
- assert(snd_data_pkt->req->isMmappedIpr());
- TheISA::handleIprWrite(thread, snd_data_pkt);
- delete snd_data_pkt;
- }
- delete state;
- completeStore(storeWBIdx);
- incrStIdx(storeWBIdx);
- } else if (!sendStore(data_pkt)) {
- DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
- "retry later\n",
- inst->seqNum);
+ ThreadContext *thread = cpu->tcBase(lsqID);
+ PacketPtr main_pkt = new Packet(req->mainRequest(),
+ MemCmd::WriteReq);
+ main_pkt->dataStatic(inst->memData);
+ req->handleIprWrite(thread, main_pkt);
+ delete main_pkt;
+ completeStore(storeWBIt);
+ storeWBIt++;
+ continue;
+ }
+ /* Send to cache */
+ req->sendPacketToCache();
- // Need to store the second packet, if split.
- if (split) {
- state->pktToSend = true;
- state->pendingPacket = snd_data_pkt;
- }
+ /* If successful, do the post send */
+ if (req->isSent()) {
+ storePostSend();
} else {
-
- // If split, try to send the second packet too
- if (split) {
- assert(snd_data_pkt);
-
- // Ensure there are enough ports to use.
- if (usedStorePorts < cacheStorePorts) {
- ++usedStorePorts;
- if (sendStore(snd_data_pkt)) {
- storePostSend(snd_data_pkt);
- } else {
- DPRINTF(IEW, "D-Cache became blocked when writing"
- " [sn:%lli] second packet, will retry later\n",
- inst->seqNum);
- }
- } else {
-
- // Store the packet for when there's free ports.
- assert(pendingPkt == NULL);
- pendingPkt = snd_data_pkt;
- hasPendingPkt = true;
- }
- } else {
-
- // Not a split store.
- storePostSend(data_pkt);
- }
+ DPRINTF(LSQUnit, "D-Cache became blocked when writing [sn:%lli], "
+ "will retry later\n",
+ inst->seqNum);
}
}
-
- // Not sure this should set it to 0.
- usedStorePorts = 0;
-
assert(stores >= 0 && storesToWB >= 0);
}
-/*template <class Impl>
-void
-LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum)
-{
- list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
- mshrSeqNums.end(),
- seqNum);
-
- if (mshr_it != mshrSeqNums.end()) {
- mshrSeqNums.erase(mshr_it);
- DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size());
- }
-}*/
-
template <class Impl>
void
LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
@@ -982,30 +828,26 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
"(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
- int load_idx = loadTail;
- decrLdIdx(load_idx);
-
- while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
+ while (loads != 0 &&
+ loadQueue.back().instruction()->seqNum > squashed_num) {
DPRINTF(LSQUnit,"Load Instruction PC %s squashed, "
"[sn:%lli]\n",
- loadQueue[load_idx]->pcState(),
- loadQueue[load_idx]->seqNum);
+ loadQueue.back().instruction()->pcState(),
+ loadQueue.back().instruction()->seqNum);
- if (isStalled() && load_idx == stallingLoadIdx) {
+ if (isStalled() && loadQueue.tail() == stallingLoadIdx) {
stalled = false;
stallingStoreIsn = 0;
stallingLoadIdx = 0;
}
// Clear the smart pointer to make sure it is decremented.
- loadQueue[load_idx]->setSquashed();
- loadQueue[load_idx] = NULL;
- --loads;
+ loadQueue.back().instruction()->setSquashed();
+ loadQueue.back().clear();
- // Inefficient!
- loadTail = load_idx;
+ --loads;
- decrLdIdx(load_idx);
+ loadQueue.pop_back();
++lsqSquashedLoads;
}
@@ -1013,76 +855,63 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
memDepViolator = NULL;
}
- int store_idx = storeTail;
- decrStIdx(store_idx);
-
while (stores != 0 &&
- storeQueue[store_idx].inst->seqNum > squashed_num) {
+ storeQueue.back().instruction()->seqNum > squashed_num) {
// Instructions marked as can WB are already committed.
- if (storeQueue[store_idx].canWB) {
+ if (storeQueue.back().canWB()) {
break;
}
DPRINTF(LSQUnit,"Store Instruction PC %s squashed, "
"idx:%i [sn:%lli]\n",
- storeQueue[store_idx].inst->pcState(),
- store_idx, storeQueue[store_idx].inst->seqNum);
+ storeQueue.back().instruction()->pcState(),
+ storeQueue.tail(), storeQueue.back().instruction()->seqNum);
// I don't think this can happen. It should have been cleared
// by the stalling load.
if (isStalled() &&
- storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+ storeQueue.back().instruction()->seqNum == stallingStoreIsn) {
panic("Is stalled should have been cleared by stalling load!\n");
stalled = false;
stallingStoreIsn = 0;
}
// Clear the smart pointer to make sure it is decremented.
- storeQueue[store_idx].inst->setSquashed();
- storeQueue[store_idx].inst = NULL;
- storeQueue[store_idx].canWB = 0;
+ storeQueue.back().instruction()->setSquashed();
// Must delete request now that it wasn't handed off to
// memory. This is quite ugly. @todo: Figure out the proper
// place to really handle request deletes.
- storeQueue[store_idx].req.reset();
- if (TheISA::HasUnalignedMemAcc && storeQueue[store_idx].isSplit) {
- storeQueue[store_idx].sreqLow.reset();
- storeQueue[store_idx].sreqHigh.reset();
- }
-
+ storeQueue.back().clear();
--stores;
- // Inefficient!
- storeTail = store_idx;
-
- decrStIdx(store_idx);
+ storeQueue.pop_back();
++lsqSquashedStores;
}
}
template <class Impl>
void
-LSQUnit<Impl>::storePostSend(PacketPtr pkt)
+LSQUnit<Impl>::storePostSend()
{
if (isStalled() &&
- storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
+ storeWBIt->instruction()->seqNum == stallingStoreIsn) {
DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
"load idx:%i\n",
stallingStoreIsn, stallingLoadIdx);
stalled = false;
stallingStoreIsn = 0;
- iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
+ iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction());
}
- if (!storeQueue[storeWBIdx].inst->isStoreConditional()) {
+ if (!storeWBIt->instruction()->isStoreConditional()) {
// The store is basically completed at this time. This
// only works so long as the checker doesn't try to
// verify the value in memory for stores.
- storeQueue[storeWBIdx].inst->setCompleted();
+ storeWBIt->instruction()->setCompleted();
if (cpu->checker) {
- cpu->checker->verify(storeQueue[storeWBIdx].inst);
+ cpu->checker->verify(storeWBIt->instruction());
}
}
@@ -1090,7 +919,7 @@ LSQUnit<Impl>::storePostSend(PacketPtr pkt)
storeInFlight = true;
}
- incrStIdx(storeWBIdx);
+ storeWBIt++;
}
template <class Impl>
@@ -1136,10 +965,10 @@ LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
template <class Impl>
void
-LSQUnit<Impl>::completeStore(int store_idx)
+LSQUnit<Impl>::completeStore(typename StoreQueue::iterator store_idx)
{
- assert(storeQueue[store_idx].inst);
- storeQueue[store_idx].completed = true;
+ assert(store_idx->valid());
+ store_idx->completed() = true;
--storesToWB;
// A bit conservative because a store completion may not free up entries,
// but hopefully avoids two store completions in one cycle from making
@@ -1147,39 +976,42 @@ LSQUnit<Impl>::completeStore(int store_idx)
cpu->wakeCPU();
cpu->activityThisCycle();
- if (store_idx == storeHead) {
+ /* We 'need' a copy here because we may clear the entry from the
+ * store queue. */
+ DynInstPtr store_inst = store_idx->instruction();
+ if (store_idx == storeQueue.begin()) {
do {
- incrStIdx(storeHead);
-
+ storeQueue.front().clear();
+ storeQueue.pop_front();
--stores;
- } while (storeQueue[storeHead].completed &&
- storeHead != storeTail);
+ } while (storeQueue.front().completed() &&
+ !storeQueue.empty());
iewStage->updateLSQNextCycle = true;
}
DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
"idx:%i\n",
- storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
+ store_inst->seqNum, store_idx.idx() - 1, storeQueue.head() - 1);
#if TRACING_ON
if (DTRACE(O3PipeView)) {
- storeQueue[store_idx].inst->storeTick =
- curTick() - storeQueue[store_idx].inst->fetchTick;
+ store_idx->instruction()->storeTick =
+ curTick() - store_idx->instruction()->fetchTick;
}
#endif
if (isStalled() &&
- storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+ store_inst->seqNum == stallingStoreIsn) {
DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
"load idx:%i\n",
stallingStoreIsn, stallingLoadIdx);
stalled = false;
stallingStoreIsn = 0;
- iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
+ iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction());
}
- storeQueue[store_idx].inst->setCompleted();
+ store_inst->setCompleted();
if (needsTSO) {
storeInFlight = false;
@@ -1188,95 +1020,62 @@ LSQUnit<Impl>::completeStore(int store_idx)
// Tell the checker we've completed this instruction. Some stores
// may get reported twice to the checker, but the checker can
// handle that case.
-
// Store conditionals cannot be sent to the checker yet, they have
// to update the misc registers first which should take place
// when they commit
- if (cpu->checker && !storeQueue[store_idx].inst->isStoreConditional()) {
- cpu->checker->verify(storeQueue[store_idx].inst);
+ if (cpu->checker && !store_inst->isStoreConditional()) {
+ cpu->checker->verify(store_inst);
}
}
template <class Impl>
bool
-LSQUnit<Impl>::sendStore(PacketPtr data_pkt)
+LSQUnit<Impl>::trySendPacket(bool isLoad, PacketPtr data_pkt)
{
- if (!dcachePort->sendTimingReq(data_pkt)) {
- // Need to handle becoming blocked on a store.
- isStoreBlocked = true;
- ++lsqCacheBlocked;
- assert(retryPkt == NULL);
- retryPkt = data_pkt;
- return false;
+ bool ret = true;
+ bool cache_got_blocked = false;
+
+ auto state = dynamic_cast<LSQSenderState*>(data_pkt->senderState);
+
+ if (!lsq->cacheBlocked() && (isLoad || lsq->storePortAvailable())) {
+ if (!dcachePort->sendTimingReq(data_pkt)) {
+ ret = false;
+ cache_got_blocked = true;
+ }
+ } else {
+ ret = false;
}
- return true;
-}
-template <class Impl>
-void
-LSQUnit<Impl>::recvRetry()
-{
- if (isStoreBlocked) {
- DPRINTF(LSQUnit, "Receiving retry: store blocked\n");
- assert(retryPkt != NULL);
-
- LSQSenderState *state =
- dynamic_cast<LSQSenderState *>(retryPkt->senderState);
-
- if (dcachePort->sendTimingReq(retryPkt)) {
- // Don't finish the store unless this is the last packet.
- if (!TheISA::HasUnalignedMemAcc || !state->pktToSend ||
- state->pendingPacket == retryPkt) {
- state->pktToSend = false;
- storePostSend(retryPkt);
- }
- retryPkt = NULL;
+ if (ret) {
+ if (!isLoad) {
+ lsq->storePortBusy();
isStoreBlocked = false;
-
- // Send any outstanding packet.
- if (TheISA::HasUnalignedMemAcc && state->pktToSend) {
- assert(state->pendingPacket);
- if (sendStore(state->pendingPacket)) {
- storePostSend(state->pendingPacket);
- }
- }
- } else {
- // Still blocked!
+ }
+ state->outstanding++;
+ state->request()->packetSent();
+ } else {
+ if (cache_got_blocked) {
+ lsq->cacheBlocked(true);
++lsqCacheBlocked;
}
+ if (!isLoad) {
+ assert(state->request() == storeWBIt->request());
+ isStoreBlocked = true;
+ }
+ state->request()->packetNotSent();
}
-}
-
-template <class Impl>
-inline void
-LSQUnit<Impl>::incrStIdx(int &store_idx) const
-{
- if (++store_idx >= SQEntries)
- store_idx = 0;
-}
-
-template <class Impl>
-inline void
-LSQUnit<Impl>::decrStIdx(int &store_idx) const
-{
- if (--store_idx < 0)
- store_idx += SQEntries;
-}
-template <class Impl>
-inline void
-LSQUnit<Impl>::incrLdIdx(int &load_idx) const
-{
- if (++load_idx >= LQEntries)
- load_idx = 0;
+ return ret;
}
template <class Impl>
-inline void
-LSQUnit<Impl>::decrLdIdx(int &load_idx) const
+void
+LSQUnit<Impl>::recvRetry()
{
- if (--load_idx < 0)
- load_idx += LQEntries;
+ if (isStoreBlocked) {
+ DPRINTF(LSQUnit, "Receiving retry: blocked store\n");
+ writebackBlockedStore();
+ }
}
template <class Impl>
@@ -1287,29 +1086,28 @@ LSQUnit<Impl>::dumpInsts() const
cprintf("Load queue size: %i\n", loads);
cprintf("Load queue: ");
- int load_idx = loadHead;
-
- while (load_idx != loadTail && loadQueue[load_idx]) {
- const DynInstPtr &inst(loadQueue[load_idx]);
+ for (const auto& e: loadQueue) {
+ const DynInstPtr &inst(e.instruction());
cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum);
-
- incrLdIdx(load_idx);
}
cprintf("\n");
cprintf("Store queue size: %i\n", stores);
cprintf("Store queue: ");
- int store_idx = storeHead;
-
- while (store_idx != storeTail && storeQueue[store_idx].inst) {
- const DynInstPtr &inst(storeQueue[store_idx].inst);
+ for (const auto& e: storeQueue) {
+ const DynInstPtr &inst(e.instruction());
cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum);
-
- incrStIdx(store_idx);
}
cprintf("\n");
}
+template <class Impl>
+unsigned int
+LSQUnit<Impl>::cacheLineSize()
+{
+ return cpu->cacheLineSize();
+}
+
#endif//__CPU_O3_LSQ_UNIT_IMPL_HH__
diff --git a/src/cpu/o3/probe/elastic_trace.cc b/src/cpu/o3/probe/elastic_trace.cc
index a4a201398..36d8297d1 100644
--- a/src/cpu/o3/probe/elastic_trace.cc
+++ b/src/cpu/o3/probe/elastic_trace.cc
@@ -409,7 +409,7 @@ ElasticTrace::addDepTraceRecord(const DynInstConstPtr& head_inst,
new_record->reqFlags = head_inst->memReqFlags;
new_record->virtAddr = head_inst->effAddr;
new_record->asid = head_inst->asid;
- new_record->physAddr = head_inst->physEffAddrLow;
+ new_record->physAddr = head_inst->physEffAddr;
// Currently the tracing does not support split requests.
new_record->size = head_inst->effSize;
new_record->pc = head_inst->instAddr();