diff options
Diffstat (limited to 'src/cpu/o3')
-rw-r--r-- | src/cpu/o3/cpu.hh | 7 | ||||
-rw-r--r-- | src/cpu/o3/lsq.hh | 48 | ||||
-rw-r--r-- | src/cpu/o3/lsq_impl.hh | 129 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit_impl.hh | 3 |
4 files changed, 116 insertions, 71 deletions
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index c754fe8cf..db8fca20a 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -787,10 +787,13 @@ class FullO3CPU : public BaseO3CPU /** CPU pushRequest function, forwards request to LSQ. */ Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, - uint64_t *res, AtomicOpFunctor *amo_op = nullptr) + uint64_t *res, AtomicOpFunctor *amo_op = nullptr, + const std::vector<bool>& byteEnable = + std::vector<bool>()) + { return iew.ldstQueue.pushRequest(inst, isLoad, data, size, addr, - flags, res, amo_op); + flags, res, amo_op, byteEnable); } /** CPU read function, forwards read to LSQ. */ diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index a6037b7f4..84f1411a5 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -50,6 +50,7 @@ #include "arch/generic/tlb.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/lsq_unit.hh" +#include "cpu/utils.hh" #include "enums/SMTQueuePolicy.hh" #include "mem/port.hh" #include "sim/sim_object.hh" @@ -251,6 +252,7 @@ class LSQ const Addr _addr; const uint32_t _size; const Request::Flags _flags; + std::vector<bool> _byteEnable; uint32_t _numOutstandingPackets; AtomicOpFunctor *_amo_op; protected: @@ -351,6 +353,28 @@ class LSQ } } + /** Helper function used to add a (sub)request, given its address + * `addr`, size `size` and byte-enable mask `byteEnable`. + * + * The request is only added if the mask is empty or if there is at + * least an active element in it. + */ + void + addRequest(Addr addr, unsigned size, + const std::vector<bool>& byteEnable) + { + if (byteEnable.empty() || + isAnyActiveElement(byteEnable.begin(), byteEnable.end())) { + auto request = std::make_shared<Request>(_inst->getASID(), + addr, size, _flags, _inst->masterId(), + _inst->instAddr(), _inst->contextId()); + if (!byteEnable.empty()) { + request->setByteEnable(byteEnable); + } + _requests.push_back(request); + } + } + /** Destructor. * The LSQRequest owns the request. If the packet has already been * sent, the sender state will be deleted upon receiving the reply. @@ -609,11 +633,17 @@ class LSQ * declaration of the names in the parent class. */ using Flag = typename LSQRequest::Flag; using State = typename LSQRequest::State; + using LSQRequest::_addr; using LSQRequest::_fault; + using LSQRequest::_flags; + using LSQRequest::_size; + using LSQRequest::_byteEnable; + using LSQRequest::_requests; using LSQRequest::_inst; using LSQRequest::_packets; using LSQRequest::_port; using LSQRequest::_res; + using LSQRequest::_taskId; using LSQRequest::_senderState; using LSQRequest::_state; using LSQRequest::flags; @@ -635,14 +665,8 @@ class LSQ uint64_t* res = nullptr, AtomicOpFunctor* amo_op = nullptr) : LSQRequest(port, inst, isLoad, addr, size, flags_, data, res, - amo_op) - { - LSQRequest::_requests.push_back( - std::make_shared<Request>(inst->getASID(), addr, size, - flags_, inst->masterId(), inst->instAddr(), - inst->contextId(), amo_op)); - LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum); - } + amo_op) {} + inline virtual ~SingleDataRequest() {} virtual void initiateTranslation(); virtual void finish(const Fault &fault, const RequestPtr &req, @@ -671,6 +695,7 @@ class LSQ using LSQRequest::_port; using LSQRequest::_requests; using LSQRequest::_res; + using LSQRequest::_byteEnable; using LSQRequest::_senderState; using LSQRequest::_size; using LSQRequest::_state; @@ -691,14 +716,14 @@ class LSQ RequestPtr mainReq; PacketPtr _mainPacket; - public: SplitDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad, const Addr& addr, const uint32_t& size, const Request::Flags & flags_, PacketDataPtr data = nullptr, uint64_t* res = nullptr) : - LSQRequest(port, inst, isLoad, addr, size, flags_, data, res), + LSQRequest(port, inst, isLoad, addr, size, flags_, data, res, + nullptr), numFragments(0), numReceivedPackets(0), mainReq(nullptr), @@ -949,7 +974,8 @@ class LSQ Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, - uint64_t *res, AtomicOpFunctor *amo_op); + uint64_t *res, AtomicOpFunctor *amo_op, + const std::vector<bool>& byteEnable); /** The CPU pointer. */ O3CPU *cpu; diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 732712029..70621a523 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -681,29 +681,12 @@ LSQ<Impl>::dumpInsts() const } } -static Addr -addrBlockOffset(Addr addr, unsigned int block_size) -{ - return addr & (block_size - 1); -} - -static Addr -addrBlockAlign(Addr addr, uint64_t block_size) -{ - return addr & ~(block_size - 1); -} - -static bool -transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size) -{ - return (addrBlockOffset(addr, block_size) + size) > block_size; -} - template<class Impl> Fault LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, - uint64_t *res, AtomicOpFunctor *amo_op) + uint64_t *res, AtomicOpFunctor *amo_op, + const std::vector<bool>& byteEnable) { // This comming request can be either load, store or atomic. // Atomic request has a corresponding pointer to its atomic memory @@ -735,6 +718,9 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, size, flags, data, res, amo_op); } assert(req); + if (!byteEnable.empty()) { + req->_byteEnable = byteEnable; + } inst->setRequest(); req->taskId(cpu->taskId()); @@ -756,6 +742,7 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, else inst->getFault() = cpu->write(req, data, inst->sqIdx); } else if (isLoad) { + inst->setMemAccPredicate(false); // Commit will have to clean up whatever happened. Set this // instruction as executed. inst->setExecuted(); @@ -848,14 +835,21 @@ template<class Impl> void LSQ<Impl>::SingleDataRequest::initiateTranslation() { - _inst->translationStarted(true); - setState(State::Translation); - flags.set(Flag::TranslationStarted); + assert(_requests.size() == 0); - _inst->savedReq = this; - sendFragmentToTranslation(0); + this->addRequest(_addr, _size, _byteEnable); - if (isTranslationComplete()) { + if (_requests.size() > 0) { + _requests.back()->setReqInstSeqNum(_inst->seqNum); + _requests.back()->taskId(_taskId); + _inst->translationStarted(true); + setState(State::Translation); + flags.set(Flag::TranslationStarted); + + _inst->savedReq = this; + sendFragmentToTranslation(0); + } else { + _inst->setMemAccPredicate(false); } } @@ -877,11 +871,7 @@ template<class Impl> void LSQ<Impl>::SplitDataRequest::initiateTranslation() { - _inst->translationStarted(true); - setState(State::Translation); - flags.set(Flag::TranslationStarted); - - unsigned int cacheLineSize = _port.cacheLineSize(); + auto cacheLineSize = _port.cacheLineSize(); Addr base_addr = _addr; Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize); Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize); @@ -890,6 +880,9 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation() mainReq = std::make_shared<Request>(_inst->getASID(), base_addr, _size, _flags, _inst->masterId(), _inst->instAddr(), _inst->contextId()); + if (!_byteEnable.empty()) { + mainReq->setByteEnable(_byteEnable); + } // Paddr is not used in mainReq. However, we will accumulate the flags // from the sub requests into mainReq by calling setFlags() in finish(). @@ -898,39 +891,63 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation() mainReq->setPaddr(0); /* Get the pre-fix, possibly unaligned. */ - _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr, - next_addr - base_addr, _flags, _inst->masterId(), - _inst->instAddr(), _inst->contextId())); + if (_byteEnable.empty()) { + this->addRequest(base_addr, next_addr - base_addr, _byteEnable); + } else { + auto it_start = _byteEnable.begin(); + auto it_end = _byteEnable.begin() + (next_addr - base_addr); + this->addRequest(base_addr, next_addr - base_addr, + std::vector<bool>(it_start, it_end)); + } size_so_far = next_addr - base_addr; /* We are block aligned now, reading whole blocks. */ base_addr = next_addr; while (base_addr != final_addr) { - _requests.push_back(std::make_shared<Request>(_inst->getASID(), - base_addr, cacheLineSize, _flags, _inst->masterId(), - _inst->instAddr(), _inst->contextId())); + if (_byteEnable.empty()) { + this->addRequest(base_addr, cacheLineSize, _byteEnable); + } else { + auto it_start = _byteEnable.begin() + size_so_far; + auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize; + this->addRequest(base_addr, cacheLineSize, + std::vector<bool>(it_start, it_end)); + } size_so_far += cacheLineSize; base_addr += cacheLineSize; } /* Deal with the tail. */ if (size_so_far < _size) { - _requests.push_back(std::make_shared<Request>(_inst->getASID(), - base_addr, _size - size_so_far, _flags, _inst->masterId(), - _inst->instAddr(), _inst->contextId())); + if (_byteEnable.empty()) { + this->addRequest(base_addr, _size - size_so_far, _byteEnable); + } else { + auto it_start = _byteEnable.begin() + size_so_far; + auto it_end = _byteEnable.end(); + this->addRequest(base_addr, _size - size_so_far, + std::vector<bool>(it_start, it_end)); + } } - /* Setup the requests and send them to translation. */ - for (auto& r: _requests) { - r->setReqInstSeqNum(_inst->seqNum); - r->taskId(_taskId); - } - this->_inst->savedReq = this; - numInTranslationFragments = 0; - numTranslatedFragments = 0; + if (_requests.size() > 0) { + /* Setup the requests and send them to translation. */ + for (auto& r: _requests) { + r->setReqInstSeqNum(_inst->seqNum); + r->taskId(_taskId); + } - for (uint32_t i = 0; i < _requests.size(); i++) { - sendFragmentToTranslation(i); + _inst->translationStarted(true); + setState(State::Translation); + flags.set(Flag::TranslationStarted); + this->_inst->savedReq = this; + numInTranslationFragments = 0; + numTranslatedFragments = 0; + _fault.resize(_requests.size()); + + for (uint32_t i = 0; i < _requests.size(); i++) { + sendFragmentToTranslation(i); + } + } else { + _inst->setMemAccPredicate(false); } } @@ -968,8 +985,6 @@ LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt) while (pktIdx < _packets.size() && pkt != _packets[pktIdx]) pktIdx++; assert(pktIdx < _packets.size()); - assert(pkt->req == _requests[pktIdx]); - assert(pkt == _packets[pktIdx]); numReceivedPackets++; state->outstanding--; if (numReceivedPackets == _packets.size()) { @@ -1012,16 +1027,19 @@ void LSQ<Impl>::SplitDataRequest::buildPackets() { /* Extra data?? */ - ptrdiff_t offset = 0; + Addr base_address = _addr; + if (_packets.size() == 0) { /* New stuff */ if (isLoad()) { _mainPacket = Packet::createRead(mainReq); _mainPacket->dataStatic(_inst->memData); } - for (auto& r: _requests) { + for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) { + RequestPtr r = _requests[i]; PacketPtr pkt = isLoad() ? Packet::createRead(r) - : Packet::createWrite(r); + : Packet::createWrite(r); + ptrdiff_t offset = r->getVaddr() - base_address; if (isLoad()) { pkt->dataStatic(_inst->memData + offset); } else { @@ -1031,12 +1049,11 @@ LSQ<Impl>::SplitDataRequest::buildPackets() r->getSize()); pkt->dataDynamic(req_data); } - offset += r->getSize(); pkt->senderState = _senderState; _packets.push_back(pkt); } } - assert(_packets.size() == _requests.size()); + assert(_packets.size() > 0); } template<class Impl> diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 9323e8634..21bed99fa 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -542,8 +542,7 @@ LSQUnit<Impl>::executeLoad(const DynInstPtr &inst) load_fault = inst->initiateAcc(); - if (!inst->readMemAccPredicate()) { - assert(load_fault == NoFault); + if (load_fault == NoFault && !inst->readMemAccPredicate()) { assert(inst->readPredicate()); inst->setExecuted(); inst->completeAcc(nullptr); |