summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/cpu.hh7
-rw-r--r--src/cpu/o3/lsq.hh48
-rw-r--r--src/cpu/o3/lsq_impl.hh129
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh3
4 files changed, 116 insertions, 71 deletions
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index c754fe8cf..db8fca20a 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -787,10 +787,13 @@ class FullO3CPU : public BaseO3CPU
/** CPU pushRequest function, forwards request to LSQ. */
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res, AtomicOpFunctor *amo_op = nullptr)
+ uint64_t *res, AtomicOpFunctor *amo_op = nullptr,
+ const std::vector<bool>& byteEnable =
+ std::vector<bool>())
+
{
return iew.ldstQueue.pushRequest(inst, isLoad, data, size, addr,
- flags, res, amo_op);
+ flags, res, amo_op, byteEnable);
}
/** CPU read function, forwards read to LSQ. */
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index a6037b7f4..84f1411a5 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -50,6 +50,7 @@
#include "arch/generic/tlb.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/lsq_unit.hh"
+#include "cpu/utils.hh"
#include "enums/SMTQueuePolicy.hh"
#include "mem/port.hh"
#include "sim/sim_object.hh"
@@ -251,6 +252,7 @@ class LSQ
const Addr _addr;
const uint32_t _size;
const Request::Flags _flags;
+ std::vector<bool> _byteEnable;
uint32_t _numOutstandingPackets;
AtomicOpFunctor *_amo_op;
protected:
@@ -351,6 +353,28 @@ class LSQ
}
}
+ /** Helper function used to add a (sub)request, given its address
+ * `addr`, size `size` and byte-enable mask `byteEnable`.
+ *
+ * The request is only added if the mask is empty or if there is at
+ * least an active element in it.
+ */
+ void
+ addRequest(Addr addr, unsigned size,
+ const std::vector<bool>& byteEnable)
+ {
+ if (byteEnable.empty() ||
+ isAnyActiveElement(byteEnable.begin(), byteEnable.end())) {
+ auto request = std::make_shared<Request>(_inst->getASID(),
+ addr, size, _flags, _inst->masterId(),
+ _inst->instAddr(), _inst->contextId());
+ if (!byteEnable.empty()) {
+ request->setByteEnable(byteEnable);
+ }
+ _requests.push_back(request);
+ }
+ }
+
/** Destructor.
* The LSQRequest owns the request. If the packet has already been
* sent, the sender state will be deleted upon receiving the reply.
@@ -609,11 +633,17 @@ class LSQ
* declaration of the names in the parent class. */
using Flag = typename LSQRequest::Flag;
using State = typename LSQRequest::State;
+ using LSQRequest::_addr;
using LSQRequest::_fault;
+ using LSQRequest::_flags;
+ using LSQRequest::_size;
+ using LSQRequest::_byteEnable;
+ using LSQRequest::_requests;
using LSQRequest::_inst;
using LSQRequest::_packets;
using LSQRequest::_port;
using LSQRequest::_res;
+ using LSQRequest::_taskId;
using LSQRequest::_senderState;
using LSQRequest::_state;
using LSQRequest::flags;
@@ -635,14 +665,8 @@ class LSQ
uint64_t* res = nullptr,
AtomicOpFunctor* amo_op = nullptr) :
LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
- amo_op)
- {
- LSQRequest::_requests.push_back(
- std::make_shared<Request>(inst->getASID(), addr, size,
- flags_, inst->masterId(), inst->instAddr(),
- inst->contextId(), amo_op));
- LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum);
- }
+ amo_op) {}
+
inline virtual ~SingleDataRequest() {}
virtual void initiateTranslation();
virtual void finish(const Fault &fault, const RequestPtr &req,
@@ -671,6 +695,7 @@ class LSQ
using LSQRequest::_port;
using LSQRequest::_requests;
using LSQRequest::_res;
+ using LSQRequest::_byteEnable;
using LSQRequest::_senderState;
using LSQRequest::_size;
using LSQRequest::_state;
@@ -691,14 +716,14 @@ class LSQ
RequestPtr mainReq;
PacketPtr _mainPacket;
-
public:
SplitDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size,
const Request::Flags & flags_,
PacketDataPtr data = nullptr,
uint64_t* res = nullptr) :
- LSQRequest(port, inst, isLoad, addr, size, flags_, data, res),
+ LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
+ nullptr),
numFragments(0),
numReceivedPackets(0),
mainReq(nullptr),
@@ -949,7 +974,8 @@ class LSQ
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res, AtomicOpFunctor *amo_op);
+ uint64_t *res, AtomicOpFunctor *amo_op,
+ const std::vector<bool>& byteEnable);
/** The CPU pointer. */
O3CPU *cpu;
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 732712029..70621a523 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -681,29 +681,12 @@ LSQ<Impl>::dumpInsts() const
}
}
-static Addr
-addrBlockOffset(Addr addr, unsigned int block_size)
-{
- return addr & (block_size - 1);
-}
-
-static Addr
-addrBlockAlign(Addr addr, uint64_t block_size)
-{
- return addr & ~(block_size - 1);
-}
-
-static bool
-transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size)
-{
- return (addrBlockOffset(addr, block_size) + size) > block_size;
-}
-
template<class Impl>
Fault
LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res, AtomicOpFunctor *amo_op)
+ uint64_t *res, AtomicOpFunctor *amo_op,
+ const std::vector<bool>& byteEnable)
{
// This comming request can be either load, store or atomic.
// Atomic request has a corresponding pointer to its atomic memory
@@ -735,6 +718,9 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
size, flags, data, res, amo_op);
}
assert(req);
+ if (!byteEnable.empty()) {
+ req->_byteEnable = byteEnable;
+ }
inst->setRequest();
req->taskId(cpu->taskId());
@@ -756,6 +742,7 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
else
inst->getFault() = cpu->write(req, data, inst->sqIdx);
} else if (isLoad) {
+ inst->setMemAccPredicate(false);
// Commit will have to clean up whatever happened. Set this
// instruction as executed.
inst->setExecuted();
@@ -848,14 +835,21 @@ template<class Impl>
void
LSQ<Impl>::SingleDataRequest::initiateTranslation()
{
- _inst->translationStarted(true);
- setState(State::Translation);
- flags.set(Flag::TranslationStarted);
+ assert(_requests.size() == 0);
- _inst->savedReq = this;
- sendFragmentToTranslation(0);
+ this->addRequest(_addr, _size, _byteEnable);
- if (isTranslationComplete()) {
+ if (_requests.size() > 0) {
+ _requests.back()->setReqInstSeqNum(_inst->seqNum);
+ _requests.back()->taskId(_taskId);
+ _inst->translationStarted(true);
+ setState(State::Translation);
+ flags.set(Flag::TranslationStarted);
+
+ _inst->savedReq = this;
+ sendFragmentToTranslation(0);
+ } else {
+ _inst->setMemAccPredicate(false);
}
}
@@ -877,11 +871,7 @@ template<class Impl>
void
LSQ<Impl>::SplitDataRequest::initiateTranslation()
{
- _inst->translationStarted(true);
- setState(State::Translation);
- flags.set(Flag::TranslationStarted);
-
- unsigned int cacheLineSize = _port.cacheLineSize();
+ auto cacheLineSize = _port.cacheLineSize();
Addr base_addr = _addr;
Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
@@ -890,6 +880,9 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation()
mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
_size, _flags, _inst->masterId(),
_inst->instAddr(), _inst->contextId());
+ if (!_byteEnable.empty()) {
+ mainReq->setByteEnable(_byteEnable);
+ }
// Paddr is not used in mainReq. However, we will accumulate the flags
// from the sub requests into mainReq by calling setFlags() in finish().
@@ -898,39 +891,63 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation()
mainReq->setPaddr(0);
/* Get the pre-fix, possibly unaligned. */
- _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr,
- next_addr - base_addr, _flags, _inst->masterId(),
- _inst->instAddr(), _inst->contextId()));
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin();
+ auto it_end = _byteEnable.begin() + (next_addr - base_addr);
+ this->addRequest(base_addr, next_addr - base_addr,
+ std::vector<bool>(it_start, it_end));
+ }
size_so_far = next_addr - base_addr;
/* We are block aligned now, reading whole blocks. */
base_addr = next_addr;
while (base_addr != final_addr) {
- _requests.push_back(std::make_shared<Request>(_inst->getASID(),
- base_addr, cacheLineSize, _flags, _inst->masterId(),
- _inst->instAddr(), _inst->contextId()));
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, cacheLineSize, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin() + size_so_far;
+ auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
+ this->addRequest(base_addr, cacheLineSize,
+ std::vector<bool>(it_start, it_end));
+ }
size_so_far += cacheLineSize;
base_addr += cacheLineSize;
}
/* Deal with the tail. */
if (size_so_far < _size) {
- _requests.push_back(std::make_shared<Request>(_inst->getASID(),
- base_addr, _size - size_so_far, _flags, _inst->masterId(),
- _inst->instAddr(), _inst->contextId()));
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, _size - size_so_far, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin() + size_so_far;
+ auto it_end = _byteEnable.end();
+ this->addRequest(base_addr, _size - size_so_far,
+ std::vector<bool>(it_start, it_end));
+ }
}
- /* Setup the requests and send them to translation. */
- for (auto& r: _requests) {
- r->setReqInstSeqNum(_inst->seqNum);
- r->taskId(_taskId);
- }
- this->_inst->savedReq = this;
- numInTranslationFragments = 0;
- numTranslatedFragments = 0;
+ if (_requests.size() > 0) {
+ /* Setup the requests and send them to translation. */
+ for (auto& r: _requests) {
+ r->setReqInstSeqNum(_inst->seqNum);
+ r->taskId(_taskId);
+ }
- for (uint32_t i = 0; i < _requests.size(); i++) {
- sendFragmentToTranslation(i);
+ _inst->translationStarted(true);
+ setState(State::Translation);
+ flags.set(Flag::TranslationStarted);
+ this->_inst->savedReq = this;
+ numInTranslationFragments = 0;
+ numTranslatedFragments = 0;
+ _fault.resize(_requests.size());
+
+ for (uint32_t i = 0; i < _requests.size(); i++) {
+ sendFragmentToTranslation(i);
+ }
+ } else {
+ _inst->setMemAccPredicate(false);
}
}
@@ -968,8 +985,6 @@ LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
pktIdx++;
assert(pktIdx < _packets.size());
- assert(pkt->req == _requests[pktIdx]);
- assert(pkt == _packets[pktIdx]);
numReceivedPackets++;
state->outstanding--;
if (numReceivedPackets == _packets.size()) {
@@ -1012,16 +1027,19 @@ void
LSQ<Impl>::SplitDataRequest::buildPackets()
{
/* Extra data?? */
- ptrdiff_t offset = 0;
+ Addr base_address = _addr;
+
if (_packets.size() == 0) {
/* New stuff */
if (isLoad()) {
_mainPacket = Packet::createRead(mainReq);
_mainPacket->dataStatic(_inst->memData);
}
- for (auto& r: _requests) {
+ for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
+ RequestPtr r = _requests[i];
PacketPtr pkt = isLoad() ? Packet::createRead(r)
- : Packet::createWrite(r);
+ : Packet::createWrite(r);
+ ptrdiff_t offset = r->getVaddr() - base_address;
if (isLoad()) {
pkt->dataStatic(_inst->memData + offset);
} else {
@@ -1031,12 +1049,11 @@ LSQ<Impl>::SplitDataRequest::buildPackets()
r->getSize());
pkt->dataDynamic(req_data);
}
- offset += r->getSize();
pkt->senderState = _senderState;
_packets.push_back(pkt);
}
}
- assert(_packets.size() == _requests.size());
+ assert(_packets.size() > 0);
}
template<class Impl>
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 9323e8634..21bed99fa 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -542,8 +542,7 @@ LSQUnit<Impl>::executeLoad(const DynInstPtr &inst)
load_fault = inst->initiateAcc();
- if (!inst->readMemAccPredicate()) {
- assert(load_fault == NoFault);
+ if (load_fault == NoFault && !inst->readMemAccPredicate()) {
assert(inst->readPredicate());
inst->setExecuted();
inst->completeAcc(nullptr);