summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cpu/base.hh4
-rw-r--r--src/cpu/base_dyn_inst.hh17
-rw-r--r--src/cpu/checker/cpu.cc120
-rw-r--r--src/cpu/checker/cpu.hh29
-rw-r--r--src/cpu/exec_context.hh10
-rw-r--r--src/cpu/minor/dyn_inst.hh17
-rw-r--r--src/cpu/minor/exec_context.hh22
-rw-r--r--src/cpu/minor/execute.cc10
-rw-r--r--src/cpu/minor/lsq.cc148
-rw-r--r--src/cpu/minor/lsq.hh11
-rw-r--r--src/cpu/o3/cpu.hh7
-rw-r--r--src/cpu/o3/lsq.hh48
-rw-r--r--src/cpu/o3/lsq_impl.hh129
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh3
-rw-r--r--src/cpu/simple/atomic.cc138
-rw-r--r--src/cpu/simple/atomic.hh29
-rw-r--r--src/cpu/simple/base.cc6
-rw-r--r--src/cpu/simple/base.hh14
-rw-r--r--src/cpu/simple/exec_context.hh20
-rw-r--r--src/cpu/simple/timing.cc18
-rw-r--r--src/cpu/simple/timing.hh10
-rw-r--r--src/cpu/simple_thread.cc7
-rw-r--r--src/cpu/utils.hh96
-rw-r--r--src/mem/abstract_mem.cc2
-rw-r--r--src/mem/cache/cache.cc3
-rw-r--r--src/mem/packet.hh35
-rw-r--r--src/mem/request.hh27
27 files changed, 711 insertions, 269 deletions
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index f013a3e02..3d679f172 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -175,9 +175,9 @@ class BaseCPU : public ClockedObject
uint32_t socketId() const { return _socketId; }
/** Reads this CPU's unique data requestor ID */
- MasterID dataMasterId() { return _dataMasterId; }
+ MasterID dataMasterId() const { return _dataMasterId; }
/** Reads this CPU's unique instruction requestor ID */
- MasterID instMasterId() { return _instMasterId; }
+ MasterID instMasterId() const { return _instMasterId; }
/**
* Get a port on this CPU. All CPUs have a data and
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 4084241bd..22a32ec10 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -298,10 +298,12 @@ class BaseDynInst : public ExecContext, public RefCounted
cpu->demapPage(vaddr, asn);
}
- Fault initiateMemRead(Addr addr, unsigned size, Request::Flags flags);
+ Fault initiateMemRead(Addr addr, unsigned size, Request::Flags flags,
+ const std::vector<bool>& byteEnable = std::vector<bool>());
Fault writeMem(uint8_t *data, unsigned size, Addr addr,
- Request::Flags flags, uint64_t *res);
+ Request::Flags flags, uint64_t *res,
+ const std::vector<bool>& byteEnable = std::vector<bool>());
Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
AtomicOpFunctor *amo_op);
@@ -918,21 +920,24 @@ class BaseDynInst : public ExecContext, public RefCounted
template<class Impl>
Fault
BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
- Request::Flags flags)
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable)
{
return cpu->pushRequest(
dynamic_cast<typename DynInstPtr::PtrType>(this),
- /* ld */ true, nullptr, size, addr, flags, nullptr);
+ /* ld */ true, nullptr, size, addr, flags, nullptr, nullptr,
+ byteEnable);
}
template<class Impl>
Fault
BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr,
- Request::Flags flags, uint64_t *res)
+ Request::Flags flags, uint64_t *res,
+ const std::vector<bool>& byteEnable)
{
return cpu->pushRequest(
dynamic_cast<typename DynInstPtr::PtrType>(this),
- /* st */ false, data, size, addr, flags, res);
+ /* st */ false, data, size, addr, flags, res, nullptr, byteEnable);
}
template<class Impl>
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index 7f8eada4c..cca6d6b12 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011,2013,2017 ARM Limited
+ * Copyright (c) 2011,2013,2017-2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -52,6 +52,7 @@
#include "cpu/simple_thread.hh"
#include "cpu/static_inst.hh"
#include "cpu/thread_context.hh"
+#include "cpu/utils.hh"
#include "params/CheckerCPU.hh"
#include "sim/full_system.hh"
@@ -139,31 +140,68 @@ CheckerCPU::unserialize(CheckpointIn &cp)
{
}
+RequestPtr
+CheckerCPU::genMemFragmentRequest(Addr frag_addr, int size,
+ Request::Flags flags,
+ const std::vector<bool>& byte_enable,
+ int& frag_size, int& size_left) const
+{
+ frag_size = std::min(
+ cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
+ (Addr) size_left);
+ size_left -= frag_size;
+
+ RequestPtr mem_req;
+
+ if (!byte_enable.empty()) {
+ // Set up byte-enable mask for the current fragment
+ auto it_start = byte_enable.cbegin() + (size - (frag_size +
+ size_left));
+ auto it_end = byte_enable.cbegin() + (size - size_left);
+ if (isAnyActiveElement(it_start, it_end)) {
+ mem_req = std::make_shared<Request>(0, frag_addr, frag_size,
+ flags, masterId, thread->pcState().instAddr(),
+ tc->contextId());
+ mem_req->setByteEnable(std::vector<bool>(it_start, it_end));
+ }
+ } else {
+ mem_req = std::make_shared<Request>(0, frag_addr, frag_size,
+ flags, masterId, thread->pcState().instAddr(),
+ tc->contextId());
+ }
+
+ return mem_req;
+}
+
Fault
CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
- Request::Flags flags)
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable)
{
Fault fault = NoFault;
- int fullSize = size;
- Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
bool checked_flags = false;
bool flags_match = true;
Addr pAddr = 0x0;
-
- if (secondAddr > addr)
- size = secondAddr - addr;
+ Addr frag_addr = addr;
+ int frag_size = 0;
+ int size_left = size;
+ bool predicate;
// Need to account for multiple accesses like the Atomic and TimingSimple
while (1) {
- auto mem_req = std::make_shared<Request>(
- 0, addr, size, flags, masterId,
- thread->pcState().instAddr(), tc->contextId());
+ RequestPtr mem_req = genMemFragmentRequest(frag_addr, size, flags,
+ byteEnable, frag_size,
+ size_left);
+
+ predicate = (mem_req != nullptr);
// translate to physical address
- fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Read);
+ if (predicate) {
+ fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Read);
+ }
- if (!checked_flags && fault == NoFault && unverifiedReq) {
+ if (predicate && !checked_flags && fault == NoFault && unverifiedReq) {
flags_match = checkFlags(unverifiedReq, mem_req->getVaddr(),
mem_req->getPaddr(), mem_req->getFlags());
pAddr = mem_req->getPaddr();
@@ -171,7 +209,7 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
}
// Now do the access
- if (fault == NoFault &&
+ if (predicate && fault == NoFault &&
!mem_req->getFlags().isSet(Request::NO_ACCESS)) {
PacketPtr pkt = Packet::createRead(mem_req);
@@ -182,7 +220,7 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
dcachePort->sendFunctional(pkt);
} else {
// Assume the data is correct if it's an uncached access
- memcpy(data, unverifiedMemData, size);
+ memcpy(data, unverifiedMemData, frag_size);
}
delete pkt;
@@ -196,22 +234,21 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
}
//If we don't need to access a second cache line, stop now.
- if (secondAddr <= addr)
+ if (size_left == 0)
{
break;
}
// Setup for accessing next cache line
- data += size;
- unverifiedMemData += size;
- size = addr + fullSize - secondAddr;
- addr = secondAddr;
+ frag_addr += frag_size;
+ data += frag_size;
+ unverifiedMemData += frag_size;
}
if (!flags_match) {
warn("%lli: Flags do not match CPU:%#x %#x %#x Checker:%#x %#x %#x\n",
curTick(), unverifiedReq->getVaddr(), unverifiedReq->getPaddr(),
- unverifiedReq->getFlags(), addr, pAddr, flags);
+ unverifiedReq->getFlags(), frag_addr, pAddr, flags);
handleError();
}
@@ -220,31 +257,35 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
Fault
CheckerCPU::writeMem(uint8_t *data, unsigned size,
- Addr addr, Request::Flags flags, uint64_t *res)
+ Addr addr, Request::Flags flags, uint64_t *res,
+ const std::vector<bool>& byteEnable)
{
+ assert(byteEnable.empty() || byteEnable.size() == size);
+
Fault fault = NoFault;
bool checked_flags = false;
bool flags_match = true;
Addr pAddr = 0x0;
static uint8_t zero_data[64] = {};
- int fullSize = size;
-
- Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
- if (secondAddr > addr)
- size = secondAddr - addr;
+ Addr frag_addr = addr;
+ int frag_size = 0;
+ int size_left = size;
+ bool predicate;
// Need to account for a multiple access like Atomic and Timing CPUs
while (1) {
- auto mem_req = std::make_shared<Request>(
- 0, addr, size, flags, masterId,
- thread->pcState().instAddr(), tc->contextId());
+ RequestPtr mem_req = genMemFragmentRequest(frag_addr, size, flags,
+ byteEnable, frag_size,
+ size_left);
- // translate to physical address
- fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Write);
+ predicate = (mem_req != nullptr);
+
+ if (predicate) {
+ fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Write);
+ }
- if (!checked_flags && fault == NoFault && unverifiedReq) {
+ if (predicate && !checked_flags && fault == NoFault && unverifiedReq) {
flags_match = checkFlags(unverifiedReq, mem_req->getVaddr(),
mem_req->getPaddr(), mem_req->getFlags());
pAddr = mem_req->getPaddr();
@@ -261,7 +302,7 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
bool was_prefetch = mem_req->isPrefetch();
//If we don't need to access a second cache line, stop now.
- if (fault != NoFault || secondAddr <= addr)
+ if (fault != NoFault || size_left == 0)
{
if (fault != NoFault && was_prefetch) {
fault = NoFault;
@@ -269,16 +310,13 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
break;
}
- //Update size and access address
- size = addr + fullSize - secondAddr;
- //And access the right address.
- addr = secondAddr;
+ frag_addr += frag_size;
}
if (!flags_match) {
warn("%lli: Flags do not match CPU:%#x %#x Checker:%#x %#x %#x\n",
curTick(), unverifiedReq->getVaddr(), unverifiedReq->getPaddr(),
- unverifiedReq->getFlags(), addr, pAddr, flags);
+ unverifiedReq->getFlags(), frag_addr, pAddr, flags);
handleError();
}
@@ -304,12 +342,12 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
// const set of zeros.
if (flags & Request::STORE_NO_DATA) {
assert(!data);
- assert(sizeof(zero_data) <= fullSize);
+ assert(sizeof(zero_data) <= size);
data = zero_data;
}
if (unverifiedReq && unverifiedMemData &&
- memcmp(data, unverifiedMemData, fullSize) && extraData) {
+ memcmp(data, unverifiedMemData, size) && extraData) {
warn("%lli: Store value does not match value sent to memory! "
"data: %#x inst_data: %#x", curTick(), data,
unverifiedMemData);
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index 8c3000005..66632b720 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -531,11 +531,36 @@ class CheckerCPU : public BaseCPU, public ExecContext
this->dtb->demapPage(vaddr, asn);
}
+ /**
+ * Helper function used to generate the request for a single fragment of a
+ * memory access.
+ *
+ * Takes care of setting up the appropriate byte-enable mask for the
+ * fragment, given the mask for the entire memory access.
+ *
+ * @param frag_addr Start address of the fragment.
+ * @param size Total size of the memory access in bytes.
+ * @param flags Request flags.
+ * @param byte_enable Byte-enable mask for the entire memory access.
+ * @param[out] frag_size Fragment size.
+ * @param[in,out] size_left Size left to be processed in the memory access.
+ * @return Pointer to the allocated Request, nullptr if the byte-enable
+ * mask is all-false for the fragment.
+ */
+ RequestPtr genMemFragmentRequest(Addr frag_addr, int size,
+ Request::Flags flags,
+ const std::vector<bool>& byte_enable,
+ int& frag_size, int& size_left) const;
+
Fault readMem(Addr addr, uint8_t *data, unsigned size,
- Request::Flags flags) override;
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
+ override;
Fault writeMem(uint8_t *data, unsigned size, Addr addr,
- Request::Flags flags, uint64_t *res) override;
+ Request::Flags flags, uint64_t *res,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
+ override;
Fault amoMem(Addr addr, uint8_t* data, unsigned size,
Request::Flags flags, AtomicOpFunctor *amo_op) override
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index 4cad9e3e1..b294387e2 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -235,7 +235,8 @@ class ExecContext {
* should never be called).
*/
virtual Fault readMem(Addr addr, uint8_t *data, unsigned int size,
- Request::Flags flags)
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
{
panic("ExecContext::readMem() should be overridden\n");
}
@@ -248,7 +249,8 @@ class ExecContext {
* should never be called).
*/
virtual Fault initiateMemRead(Addr addr, unsigned int size,
- Request::Flags flags)
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
{
panic("ExecContext::initiateMemRead() should be overridden\n");
}
@@ -258,7 +260,9 @@ class ExecContext {
* For timing-mode contexts, initiate a timing memory write operation.
*/
virtual Fault writeMem(uint8_t *data, unsigned int size, Addr addr,
- Request::Flags flags, uint64_t *res) = 0;
+ Request::Flags flags, uint64_t *res,
+ const std::vector<bool>& byteEnable =
+ std::vector<bool>()) = 0;
/**
* For atomic-mode contexts, perform an atomic AMO (a.k.a., Atomic
diff --git a/src/cpu/minor/dyn_inst.hh b/src/cpu/minor/dyn_inst.hh
index b2decb39b..0a8ff8acf 100644
--- a/src/cpu/minor/dyn_inst.hh
+++ b/src/cpu/minor/dyn_inst.hh
@@ -202,6 +202,13 @@ class MinorDynInst : public RefCounted
* to allow other instructions to fill the fetch delay */
bool canEarlyIssue;
+ /** Flag controlling conditional execution of the instruction */
+ bool predicate;
+
+ /** Flag controlling conditional execution of the memory access associated
+ * with the instruction (only meaningful for loads/stores) */
+ bool memAccPredicate;
+
/** execSeqNum of the latest inst on which this inst depends.
* This can be used as a sanity check for dependency ordering
* where slightly out of order execution is required (notably
@@ -227,7 +234,7 @@ class MinorDynInst : public RefCounted
pc(TheISA::PCState(0)), fault(fault_),
triedToPredict(false), predictedTaken(false),
fuIndex(0), inLSQ(false), inStoreBuffer(false),
- canEarlyIssue(false),
+ canEarlyIssue(false), predicate(true), memAccPredicate(true),
instToWaitFor(0), extraCommitDelay(Cycles(0)),
extraCommitDelayExpr(NULL), minimumCommitCycle(Cycles(0))
{ }
@@ -266,6 +273,14 @@ class MinorDynInst : public RefCounted
/** ReportIF interface */
void reportData(std::ostream &os) const;
+ bool readPredicate() const { return predicate; }
+
+ void setPredicate(bool val) { predicate = val; }
+
+ bool readMemAccPredicate() const { return memAccPredicate; }
+
+ void setMemAccPredicate(bool val) { memAccPredicate = val; }
+
~MinorDynInst();
};
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index b39bbac3f..9f6fce4cd 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -96,28 +96,40 @@ class ExecContext : public ::ExecContext
{
DPRINTF(MinorExecute, "ExecContext setting PC: %s\n", inst->pc);
pcState(inst->pc);
- setPredicate(true);
+ setPredicate(inst->readPredicate());
+ setMemAccPredicate(inst->readMemAccPredicate());
thread.setIntReg(TheISA::ZeroReg, 0);
#if THE_ISA == ALPHA_ISA
thread.setFloatReg(TheISA::ZeroReg, 0);
#endif
}
+ ~ExecContext()
+ {
+ inst->setPredicate(readPredicate());
+ inst->setMemAccPredicate(readMemAccPredicate());
+ }
+
Fault
initiateMemRead(Addr addr, unsigned int size,
- Request::Flags flags) override
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
+ override
{
execute.getLSQ().pushRequest(inst, true /* load */, nullptr,
- size, addr, flags, NULL, nullptr);
+ size, addr, flags, nullptr, nullptr, byteEnable);
return NoFault;
}
Fault
writeMem(uint8_t *data, unsigned int size, Addr addr,
- Request::Flags flags, uint64_t *res) override
+ Request::Flags flags, uint64_t *res,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
+ override
{
+ assert(byteEnable.empty() || byteEnable.size() == size);
execute.getLSQ().pushRequest(inst, false /* store */, data,
- size, addr, flags, res, nullptr);
+ size, addr, flags, res, nullptr, byteEnable);
return NoFault;
}
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 47f3cbc68..527eb2bc0 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014,2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -364,6 +364,8 @@ Execute::handleMemResponse(MinorDynInstPtr inst,
DPRINTF(MinorMem, "Completing failed request inst: %s\n",
*inst);
use_context_predicate = false;
+ if (!context.readMemAccPredicate())
+ inst->staticInst->completeAcc(nullptr, &context, inst->traceData);
} else if (packet->isError()) {
DPRINTF(MinorMem, "Trying to commit error response: %s\n",
*inst);
@@ -481,6 +483,10 @@ Execute::executeMemRefInst(MinorDynInstPtr inst, BranchData &branch,
} else {
/* Only set this if the instruction passed its
* predicate */
+ if (!context.readMemAccPredicate()) {
+ DPRINTF(MinorMem, "No memory access for inst: %s\n", *inst);
+ assert(context.readPredicate());
+ }
passed_predicate = context.readPredicate();
/* Set predicate in tracing */
@@ -928,7 +934,7 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
* until it gets to the head of inFlightInsts */
inst->canEarlyIssue = false;
/* Not completed as we'll come here again to pick up
- * the fault when we get to the end of the FU */
+ * the fault when we get to the end of the FU */
completed_inst = false;
} else {
DPRINTF(MinorExecute, "Fault in execute: %s\n",
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index 6fe6c3738..1d9f17e8d 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014,2017 ARM Limited
+ * Copyright (c) 2013-2014,2017-2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -49,27 +49,13 @@
#include "cpu/minor/exec_context.hh"
#include "cpu/minor/execute.hh"
#include "cpu/minor/pipeline.hh"
+#include "cpu/utils.hh"
#include "debug/Activity.hh"
#include "debug/MinorMem.hh"
namespace Minor
{
-/** Returns the offset of addr into an aligned a block of size block_size */
-static Addr
-addrBlockOffset(Addr addr, unsigned int block_size)
-{
- return addr & (block_size - 1);
-}
-
-/** Returns true if the given [addr .. addr+size-1] transfer needs to be
- * fragmented across a block size of block_size */
-static bool
-transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
-{
- return (addrBlockOffset(addr, block_size) + size) > block_size;
-}
-
LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
PacketDataPtr data_, uint64_t *res_) :
SenderState(),
@@ -88,6 +74,13 @@ LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
request = std::make_shared<Request>();
}
+void
+LSQ::LSQRequest::disableMemAccess()
+{
+ port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false);
+ DPRINTFS(MinorMem, (&port), "Disable mem access for inst:%s\n", *inst);
+}
+
LSQ::AddrRangeCoverage
LSQ::LSQRequest::containsAddrRangeOf(
Addr req1_addr, unsigned int req1_size,
@@ -256,16 +249,23 @@ LSQ::SingleDataRequest::startAddrTranslation()
ThreadContext *thread = port.cpu.getContext(
inst->id.threadId);
- port.numAccessesInDTLB++;
+ const auto &byteEnable = request->getByteEnable();
+ if (byteEnable.size() == 0 ||
+ isAnyActiveElement(byteEnable.cbegin(), byteEnable.cend())) {
+ port.numAccessesInDTLB++;
- setState(LSQ::LSQRequest::InTranslation);
+ setState(LSQ::LSQRequest::InTranslation);
- DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
- /* Submit the translation request. The response will come through
- * finish/markDelayed on the LSQRequest as it bears the Translation
- * interface */
- thread->getDTBPtr()->translateTiming(
- request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
+ DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
+ /* Submit the translation request. The response will come through
+ * finish/markDelayed on the LSQRequest as it bears the Translation
+ * interface */
+ thread->getDTBPtr()->translateTiming(
+ request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
+ } else {
+ disableMemAccess();
+ setState(LSQ::LSQRequest::Complete);
+ }
}
void
@@ -357,6 +357,8 @@ LSQ::SplitDataRequest::makeFragmentRequests()
unsigned int fragment_size;
Addr fragment_addr;
+ std::vector<bool> fragment_write_byte_en;
+
/* Assume that this transfer is across potentially many block snap
* boundaries:
*
@@ -401,6 +403,9 @@ LSQ::SplitDataRequest::makeFragmentRequests()
/* Just past the last address in the request */
Addr end_addr = base_addr + whole_size;
+ auto& byte_enable = request->getByteEnable();
+ unsigned int num_disabled_fragments = 0;
+
for (unsigned int fragment_index = 0; fragment_index < numFragments;
fragment_index++)
{
@@ -421,32 +426,58 @@ LSQ::SplitDataRequest::makeFragmentRequests()
}
RequestPtr fragment = std::make_shared<Request>();
+ bool disabled_fragment = false;
fragment->setContext(request->contextId());
- fragment->setVirt(0 /* asid */,
- fragment_addr, fragment_size, request->getFlags(),
- request->masterId(),
- request->getPC());
+ if (byte_enable.empty()) {
+ fragment->setVirt(0 /* asid */,
+ fragment_addr, fragment_size, request->getFlags(),
+ request->masterId(),
+ request->getPC());
+ } else {
+ // Set up byte-enable mask for the current fragment
+ auto it_start = byte_enable.begin() +
+ (fragment_addr - base_addr);
+ auto it_end = byte_enable.begin() +
+ (fragment_addr - base_addr) + fragment_size;
+ if (isAnyActiveElement(it_start, it_end)) {
+ fragment->setVirt(0 /* asid */,
+ fragment_addr, fragment_size, request->getFlags(),
+ request->masterId(),
+ request->getPC());
+ fragment->setByteEnable(std::vector<bool>(it_start, it_end));
+ } else {
+ disabled_fragment = true;
+ }
+ }
- DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x size: %d"
- " (whole request addr: 0x%x size: %d) %s\n",
- fragment_addr, fragment_size, base_addr, whole_size,
- (is_last_fragment ? "last fragment" : ""));
+ if (!disabled_fragment) {
+ DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x"
+ " size: %d (whole request addr: 0x%x size: %d) %s\n",
+ fragment_addr, fragment_size, base_addr, whole_size,
+ (is_last_fragment ? "last fragment" : ""));
- fragment_addr += fragment_size;
+ fragmentRequests.push_back(fragment);
+ } else {
+ num_disabled_fragments++;
+ }
- fragmentRequests.push_back(fragment);
+ fragment_addr += fragment_size;
}
+ assert(numFragments >= num_disabled_fragments);
+ numFragments -= num_disabled_fragments;
}
void
LSQ::SplitDataRequest::makeFragmentPackets()
{
+ assert(numTranslatedFragments > 0);
Addr base_addr = request->getVaddr();
DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst);
- for (unsigned int fragment_index = 0; fragment_index < numFragments;
+ for (unsigned int fragment_index = 0;
+ fragment_index < numTranslatedFragments;
fragment_index++)
{
RequestPtr fragment = fragmentRequests[fragment_index];
@@ -490,28 +521,32 @@ LSQ::SplitDataRequest::makeFragmentPackets()
void
LSQ::SplitDataRequest::startAddrTranslation()
{
- setState(LSQ::LSQRequest::InTranslation);
-
makeFragmentRequests();
- numInTranslationFragments = 0;
- numTranslatedFragments = 0;
+ if (numFragments > 0) {
+ setState(LSQ::LSQRequest::InTranslation);
+ numInTranslationFragments = 0;
+ numTranslatedFragments = 0;
- /* @todo, just do these in sequence for now with
- * a loop of:
- * do {
- * sendNextFragmentToTranslation ; translateTiming ; finish
- * } while (numTranslatedFragments != numFragments);
- */
+ /* @todo, just do these in sequence for now with
+ * a loop of:
+ * do {
+ * sendNextFragmentToTranslation ; translateTiming ; finish
+ * } while (numTranslatedFragments != numFragments);
+ */
- /* Do first translation */
- sendNextFragmentToTranslation();
+ /* Do first translation */
+ sendNextFragmentToTranslation();
+ } else {
+ disableMemAccess();
+ setState(LSQ::LSQRequest::Complete);
+ }
}
PacketPtr
LSQ::SplitDataRequest::getHeadPacket()
{
- assert(numIssuedFragments < numFragments);
+ assert(numIssuedFragments < numTranslatedFragments);
return fragmentPackets[numIssuedFragments];
}
@@ -519,7 +554,7 @@ LSQ::SplitDataRequest::getHeadPacket()
void
LSQ::SplitDataRequest::stepToNextPacket()
{
- assert(numIssuedFragments < numFragments);
+ assert(numIssuedFragments < numTranslatedFragments);
numIssuedFragments++;
}
@@ -527,14 +562,13 @@ LSQ::SplitDataRequest::stepToNextPacket()
void
LSQ::SplitDataRequest::retireResponse(PacketPtr response)
{
- assert(numRetiredFragments < numFragments);
+ assert(numRetiredFragments < numTranslatedFragments);
DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
- " offset: 0x%x (retired fragment num: %d) %s\n",
+ " offset: 0x%x (retired fragment num: %d)\n",
response->req->getVaddr(), response->req->getSize(),
request->getVaddr() - response->req->getVaddr(),
- numRetiredFragments,
- (fault == NoFault ? "" : fault->name()));
+ numRetiredFragments);
numRetiredFragments++;
@@ -573,7 +607,7 @@ LSQ::SplitDataRequest::retireResponse(PacketPtr response)
packet->makeResponse();
}
- if (numRetiredFragments == numFragments)
+ if (numRetiredFragments == numTranslatedFragments)
setState(Complete);
if (!skipped && isComplete()) {
@@ -1477,7 +1511,8 @@ LSQ::needsToTick()
void
LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res, AtomicOpFunctor *amo_op)
+ uint64_t *res, AtomicOpFunctor *amo_op,
+ const std::vector<bool>& byteEnable)
{
bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
@@ -1533,6 +1568,9 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
addr, size, flags, cpu.dataMasterId(),
/* I've no idea why we need the PC, but give it */
inst->pc.instAddr(), amo_op);
+ if (!byteEnable.empty()) {
+ request->request->setByteEnable(byteEnable);
+ }
requests.push(request);
request->startAddrTranslation();
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh
index 11fa8774f..23b47c53c 100644
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014, 2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -188,6 +188,8 @@ class LSQ : public Named
/** BaseTLB::Translation interface */
void markDelayed() { }
+ void disableMemAccess();
+
public:
LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
PacketDataPtr data_ = NULL, uint64_t *res_ = NULL);
@@ -441,7 +443,8 @@ class LSQ : public Named
{ return numIssuedFragments != numRetiredFragments; }
/** Have we stepped past the end of fragmentPackets? */
- bool sentAllPackets() { return numIssuedFragments == numFragments; }
+ bool sentAllPackets()
+ { return numIssuedFragments == numTranslatedFragments; }
/** For loads, paste the response data into the main
* response packet */
@@ -700,7 +703,9 @@ class LSQ : public Named
* the LSQ */
void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res, AtomicOpFunctor *amo_op);
+ uint64_t *res, AtomicOpFunctor *amo_op,
+ const std::vector<bool>& byteEnable =
+ std::vector<bool>());
/** Push a predicate failed-representing request into the queues just
* to maintain commit order */
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index c754fe8cf..db8fca20a 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -787,10 +787,13 @@ class FullO3CPU : public BaseO3CPU
/** CPU pushRequest function, forwards request to LSQ. */
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res, AtomicOpFunctor *amo_op = nullptr)
+ uint64_t *res, AtomicOpFunctor *amo_op = nullptr,
+ const std::vector<bool>& byteEnable =
+ std::vector<bool>())
+
{
return iew.ldstQueue.pushRequest(inst, isLoad, data, size, addr,
- flags, res, amo_op);
+ flags, res, amo_op, byteEnable);
}
/** CPU read function, forwards read to LSQ. */
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index a6037b7f4..84f1411a5 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -50,6 +50,7 @@
#include "arch/generic/tlb.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/lsq_unit.hh"
+#include "cpu/utils.hh"
#include "enums/SMTQueuePolicy.hh"
#include "mem/port.hh"
#include "sim/sim_object.hh"
@@ -251,6 +252,7 @@ class LSQ
const Addr _addr;
const uint32_t _size;
const Request::Flags _flags;
+ std::vector<bool> _byteEnable;
uint32_t _numOutstandingPackets;
AtomicOpFunctor *_amo_op;
protected:
@@ -351,6 +353,28 @@ class LSQ
}
}
+ /** Helper function used to add a (sub)request, given its address
+ * `addr`, size `size` and byte-enable mask `byteEnable`.
+ *
+ * The request is only added if the mask is empty or if there is at
+ * least an active element in it.
+ */
+ void
+ addRequest(Addr addr, unsigned size,
+ const std::vector<bool>& byteEnable)
+ {
+ if (byteEnable.empty() ||
+ isAnyActiveElement(byteEnable.begin(), byteEnable.end())) {
+ auto request = std::make_shared<Request>(_inst->getASID(),
+ addr, size, _flags, _inst->masterId(),
+ _inst->instAddr(), _inst->contextId());
+ if (!byteEnable.empty()) {
+ request->setByteEnable(byteEnable);
+ }
+ _requests.push_back(request);
+ }
+ }
+
/** Destructor.
* The LSQRequest owns the request. If the packet has already been
* sent, the sender state will be deleted upon receiving the reply.
@@ -609,11 +633,17 @@ class LSQ
* declaration of the names in the parent class. */
using Flag = typename LSQRequest::Flag;
using State = typename LSQRequest::State;
+ using LSQRequest::_addr;
using LSQRequest::_fault;
+ using LSQRequest::_flags;
+ using LSQRequest::_size;
+ using LSQRequest::_byteEnable;
+ using LSQRequest::_requests;
using LSQRequest::_inst;
using LSQRequest::_packets;
using LSQRequest::_port;
using LSQRequest::_res;
+ using LSQRequest::_taskId;
using LSQRequest::_senderState;
using LSQRequest::_state;
using LSQRequest::flags;
@@ -635,14 +665,8 @@ class LSQ
uint64_t* res = nullptr,
AtomicOpFunctor* amo_op = nullptr) :
LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
- amo_op)
- {
- LSQRequest::_requests.push_back(
- std::make_shared<Request>(inst->getASID(), addr, size,
- flags_, inst->masterId(), inst->instAddr(),
- inst->contextId(), amo_op));
- LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum);
- }
+ amo_op) {}
+
inline virtual ~SingleDataRequest() {}
virtual void initiateTranslation();
virtual void finish(const Fault &fault, const RequestPtr &req,
@@ -671,6 +695,7 @@ class LSQ
using LSQRequest::_port;
using LSQRequest::_requests;
using LSQRequest::_res;
+ using LSQRequest::_byteEnable;
using LSQRequest::_senderState;
using LSQRequest::_size;
using LSQRequest::_state;
@@ -691,14 +716,14 @@ class LSQ
RequestPtr mainReq;
PacketPtr _mainPacket;
-
public:
SplitDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size,
const Request::Flags & flags_,
PacketDataPtr data = nullptr,
uint64_t* res = nullptr) :
- LSQRequest(port, inst, isLoad, addr, size, flags_, data, res),
+ LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
+ nullptr),
numFragments(0),
numReceivedPackets(0),
mainReq(nullptr),
@@ -949,7 +974,8 @@ class LSQ
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res, AtomicOpFunctor *amo_op);
+ uint64_t *res, AtomicOpFunctor *amo_op,
+ const std::vector<bool>& byteEnable);
/** The CPU pointer. */
O3CPU *cpu;
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 732712029..70621a523 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -681,29 +681,12 @@ LSQ<Impl>::dumpInsts() const
}
}
-static Addr
-addrBlockOffset(Addr addr, unsigned int block_size)
-{
- return addr & (block_size - 1);
-}
-
-static Addr
-addrBlockAlign(Addr addr, uint64_t block_size)
-{
- return addr & ~(block_size - 1);
-}
-
-static bool
-transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size)
-{
- return (addrBlockOffset(addr, block_size) + size) > block_size;
-}
-
template<class Impl>
Fault
LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res, AtomicOpFunctor *amo_op)
+ uint64_t *res, AtomicOpFunctor *amo_op,
+ const std::vector<bool>& byteEnable)
{
// This comming request can be either load, store or atomic.
// Atomic request has a corresponding pointer to its atomic memory
@@ -735,6 +718,9 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
size, flags, data, res, amo_op);
}
assert(req);
+ if (!byteEnable.empty()) {
+ req->_byteEnable = byteEnable;
+ }
inst->setRequest();
req->taskId(cpu->taskId());
@@ -756,6 +742,7 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
else
inst->getFault() = cpu->write(req, data, inst->sqIdx);
} else if (isLoad) {
+ inst->setMemAccPredicate(false);
// Commit will have to clean up whatever happened. Set this
// instruction as executed.
inst->setExecuted();
@@ -848,14 +835,21 @@ template<class Impl>
void
LSQ<Impl>::SingleDataRequest::initiateTranslation()
{
- _inst->translationStarted(true);
- setState(State::Translation);
- flags.set(Flag::TranslationStarted);
+ assert(_requests.size() == 0);
- _inst->savedReq = this;
- sendFragmentToTranslation(0);
+ this->addRequest(_addr, _size, _byteEnable);
- if (isTranslationComplete()) {
+ if (_requests.size() > 0) {
+ _requests.back()->setReqInstSeqNum(_inst->seqNum);
+ _requests.back()->taskId(_taskId);
+ _inst->translationStarted(true);
+ setState(State::Translation);
+ flags.set(Flag::TranslationStarted);
+
+ _inst->savedReq = this;
+ sendFragmentToTranslation(0);
+ } else {
+ _inst->setMemAccPredicate(false);
}
}
@@ -877,11 +871,7 @@ template<class Impl>
void
LSQ<Impl>::SplitDataRequest::initiateTranslation()
{
- _inst->translationStarted(true);
- setState(State::Translation);
- flags.set(Flag::TranslationStarted);
-
- unsigned int cacheLineSize = _port.cacheLineSize();
+ auto cacheLineSize = _port.cacheLineSize();
Addr base_addr = _addr;
Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
@@ -890,6 +880,9 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation()
mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
_size, _flags, _inst->masterId(),
_inst->instAddr(), _inst->contextId());
+ if (!_byteEnable.empty()) {
+ mainReq->setByteEnable(_byteEnable);
+ }
// Paddr is not used in mainReq. However, we will accumulate the flags
// from the sub requests into mainReq by calling setFlags() in finish().
@@ -898,39 +891,63 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation()
mainReq->setPaddr(0);
/* Get the pre-fix, possibly unaligned. */
- _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr,
- next_addr - base_addr, _flags, _inst->masterId(),
- _inst->instAddr(), _inst->contextId()));
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin();
+ auto it_end = _byteEnable.begin() + (next_addr - base_addr);
+ this->addRequest(base_addr, next_addr - base_addr,
+ std::vector<bool>(it_start, it_end));
+ }
size_so_far = next_addr - base_addr;
/* We are block aligned now, reading whole blocks. */
base_addr = next_addr;
while (base_addr != final_addr) {
- _requests.push_back(std::make_shared<Request>(_inst->getASID(),
- base_addr, cacheLineSize, _flags, _inst->masterId(),
- _inst->instAddr(), _inst->contextId()));
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, cacheLineSize, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin() + size_so_far;
+ auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
+ this->addRequest(base_addr, cacheLineSize,
+ std::vector<bool>(it_start, it_end));
+ }
size_so_far += cacheLineSize;
base_addr += cacheLineSize;
}
/* Deal with the tail. */
if (size_so_far < _size) {
- _requests.push_back(std::make_shared<Request>(_inst->getASID(),
- base_addr, _size - size_so_far, _flags, _inst->masterId(),
- _inst->instAddr(), _inst->contextId()));
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, _size - size_so_far, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin() + size_so_far;
+ auto it_end = _byteEnable.end();
+ this->addRequest(base_addr, _size - size_so_far,
+ std::vector<bool>(it_start, it_end));
+ }
}
- /* Setup the requests and send them to translation. */
- for (auto& r: _requests) {
- r->setReqInstSeqNum(_inst->seqNum);
- r->taskId(_taskId);
- }
- this->_inst->savedReq = this;
- numInTranslationFragments = 0;
- numTranslatedFragments = 0;
+ if (_requests.size() > 0) {
+ /* Setup the requests and send them to translation. */
+ for (auto& r: _requests) {
+ r->setReqInstSeqNum(_inst->seqNum);
+ r->taskId(_taskId);
+ }
- for (uint32_t i = 0; i < _requests.size(); i++) {
- sendFragmentToTranslation(i);
+ _inst->translationStarted(true);
+ setState(State::Translation);
+ flags.set(Flag::TranslationStarted);
+ this->_inst->savedReq = this;
+ numInTranslationFragments = 0;
+ numTranslatedFragments = 0;
+ _fault.resize(_requests.size());
+
+ for (uint32_t i = 0; i < _requests.size(); i++) {
+ sendFragmentToTranslation(i);
+ }
+ } else {
+ _inst->setMemAccPredicate(false);
}
}
@@ -968,8 +985,6 @@ LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
pktIdx++;
assert(pktIdx < _packets.size());
- assert(pkt->req == _requests[pktIdx]);
- assert(pkt == _packets[pktIdx]);
numReceivedPackets++;
state->outstanding--;
if (numReceivedPackets == _packets.size()) {
@@ -1012,16 +1027,19 @@ void
LSQ<Impl>::SplitDataRequest::buildPackets()
{
/* Extra data?? */
- ptrdiff_t offset = 0;
+ Addr base_address = _addr;
+
if (_packets.size() == 0) {
/* New stuff */
if (isLoad()) {
_mainPacket = Packet::createRead(mainReq);
_mainPacket->dataStatic(_inst->memData);
}
- for (auto& r: _requests) {
+ for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
+ RequestPtr r = _requests[i];
PacketPtr pkt = isLoad() ? Packet::createRead(r)
- : Packet::createWrite(r);
+ : Packet::createWrite(r);
+ ptrdiff_t offset = r->getVaddr() - base_address;
if (isLoad()) {
pkt->dataStatic(_inst->memData + offset);
} else {
@@ -1031,12 +1049,11 @@ LSQ<Impl>::SplitDataRequest::buildPackets()
r->getSize());
pkt->dataDynamic(req_data);
}
- offset += r->getSize();
pkt->senderState = _senderState;
_packets.push_back(pkt);
}
}
- assert(_packets.size() == _requests.size());
+ assert(_packets.size() > 0);
}
template<class Impl>
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 9323e8634..21bed99fa 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -542,8 +542,7 @@ LSQUnit<Impl>::executeLoad(const DynInstPtr &inst)
load_fault = inst->initiateAcc();
- if (!inst->readMemAccPredicate()) {
- assert(load_fault == NoFault);
+ if (load_fault == NoFault && !inst->readMemAccPredicate()) {
assert(inst->readPredicate());
inst->setExecuted();
inst->completeAcc(nullptr);
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index caf2427ef..c5b024532 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -49,6 +49,7 @@
#include "base/output.hh"
#include "config/the_isa.hh"
#include "cpu/exetrace.hh"
+#include "cpu/utils.hh"
#include "debug/Drain.hh"
#include "debug/ExecFaulting.hh"
#include "debug/SimpleCPU.hh"
@@ -333,9 +334,43 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
}
}
+bool
+AtomicSimpleCPU::genMemFragmentRequest(const RequestPtr& req, Addr frag_addr,
+ int size, Request::Flags flags,
+ const std::vector<bool>& byte_enable,
+ int& frag_size, int& size_left) const
+{
+ bool predicate = true;
+ Addr inst_addr = threadInfo[curThread]->thread->pcState().instAddr();
+
+ frag_size = std::min(
+ cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
+ (Addr) size_left);
+ size_left -= frag_size;
+
+ if (!byte_enable.empty()) {
+ // Set up byte-enable mask for the current fragment
+ auto it_start = byte_enable.begin() + (size - (frag_size + size_left));
+ auto it_end = byte_enable.begin() + (size - size_left);
+ if (isAnyActiveElement(it_start, it_end)) {
+ req->setVirt(0, frag_addr, frag_size, flags, dataMasterId(),
+ inst_addr);
+ req->setByteEnable(std::vector<bool>(it_start, it_end));
+ } else {
+ predicate = false;
+ }
+ } else {
+ req->setVirt(0, frag_addr, frag_size, flags, dataMasterId(),
+ inst_addr);
+ }
+
+ return predicate;
+}
+
Fault
AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
- Request::Flags flags)
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable)
{
SimpleExecContext& t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
@@ -346,28 +381,29 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
if (traceData)
traceData->setMem(addr, size, flags);
- //The size of the data we're trying to read.
- int fullSize = size;
-
- //The address of the second part of this access if it needs to be split
- //across a cache line boundary.
- Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
- if (secondAddr > addr)
- size = secondAddr - addr;
-
dcache_latency = 0;
req->taskId(taskId());
+
+ Addr frag_addr = addr;
+ int frag_size = 0;
+ int size_left = size;
+ bool predicate;
+ Fault fault = NoFault;
+
while (1) {
- req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
+ predicate = genMemFragmentRequest(req, frag_addr, size, flags,
+ byteEnable, frag_size, size_left);
// translate to physical address
- Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
- BaseTLB::Read);
+ if (predicate) {
+ fault = thread->dtb->translateAtomic(req, thread->getTC(),
+ BaseTLB::Read);
+ }
// Now do the access.
- if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
+ if (predicate && fault == NoFault &&
+ !req->getFlags().isSet(Request::NO_ACCESS)) {
Packet pkt(req, Packet::makeReadCmd(req));
pkt.dataStatic(data);
@@ -394,33 +430,29 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
}
}
- //If we don't need to access a second cache line, stop now.
- if (secondAddr <= addr)
- {
+ // If we don't need to access further cache lines, stop now.
+ if (size_left == 0) {
if (req->isLockedRMW() && fault == NoFault) {
assert(!locked);
locked = true;
}
-
return fault;
}
/*
- * Set up for accessing the second cache line.
+ * Set up for accessing the next cache line.
*/
+ frag_addr += frag_size;
//Move the pointer we're reading into to the correct location.
- data += size;
- //Adjust the size to get the remaining bytes.
- size = addr + fullSize - secondAddr;
- //And access the right address.
- addr = secondAddr;
+ data += frag_size;
}
}
Fault
AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
- Request::Flags flags, uint64_t *res)
+ Request::Flags flags, uint64_t *res,
+ const std::vector<bool>& byteEnable)
{
SimpleExecContext& t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
@@ -439,32 +471,37 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
if (traceData)
traceData->setMem(addr, size, flags);
- //The size of the data we're trying to read.
- int fullSize = size;
-
- //The address of the second part of this access if it needs to be split
- //across a cache line boundary.
- Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
- if (secondAddr > addr)
- size = secondAddr - addr;
-
dcache_latency = 0;
req->taskId(taskId());
+
+ Addr frag_addr = addr;
+ int frag_size = 0;
+ int size_left = size;
+ int curr_frag_id = 0;
+ bool predicate;
+ Fault fault = NoFault;
+
while (1) {
- req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
+ predicate = genMemFragmentRequest(req, frag_addr, size, flags,
+ byteEnable, frag_size, size_left);
// translate to physical address
- Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Write);
+ if (predicate)
+ fault = thread->dtb->translateAtomic(req, thread->getTC(),
+ BaseTLB::Write);
// Now do the access.
- if (fault == NoFault) {
+ if (predicate && fault == NoFault) {
bool do_access = true; // flag to suppress cache access
if (req->isLLSC()) {
- do_access = TheISA::handleLockedWrite(thread, req, dcachePort.cacheBlockMask);
+ assert(curr_frag_id == 0);
+ do_access =
+ TheISA::handleLockedWrite(thread, req,
+ dcachePort.cacheBlockMask);
} else if (req->isSwap()) {
+ assert(curr_frag_id == 0);
if (req->isCondSwap()) {
assert(res);
req->setExtraData(*res);
@@ -488,8 +525,8 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
assert(!pkt.isError());
if (req->isSwap()) {
- assert(res);
- memcpy(res, pkt.getConstPtr<uint8_t>(), fullSize);
+ assert(res && curr_frag_id == 0);
+ memcpy(res, pkt.getConstPtr<uint8_t>(), size);
}
}
@@ -500,14 +537,14 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
//If there's a fault or we don't need to access a second cache line,
//stop now.
- if (fault != NoFault || secondAddr <= addr)
+ if (fault != NoFault || size_left == 0)
{
if (req->isLockedRMW() && fault == NoFault) {
- assert(locked);
+ assert(byteEnable.empty());
+ assert(locked && curr_frag_id == 0);
locked = false;
}
-
if (fault != NoFault && req->isPrefetch()) {
return NoFault;
} else {
@@ -516,15 +553,14 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
}
/*
- * Set up for accessing the second cache line.
+ * Set up for accessing the next cache line.
*/
+ frag_addr += frag_size;
//Move the pointer we're reading into to the correct location.
- data += size;
- //Adjust the size to get the remaining bytes.
- size = addr + fullSize - secondAddr;
- //And access the right address.
- addr = secondAddr;
+ data += frag_size;
+
+ curr_frag_id++;
}
}
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index 84f379121..100306981 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -195,11 +195,36 @@ class AtomicSimpleCPU : public BaseSimpleCPU
void activateContext(ThreadID thread_num) override;
void suspendContext(ThreadID thread_num) override;
+ /**
+ * Helper function used to set up the request for a single fragment of a
+ * memory access.
+ *
+ * Takes care of setting up the appropriate byte-enable mask for the
+ * fragment, given the mask for the entire memory access.
+ *
+ * @param req Pointer to the Request object to populate.
+ * @param frag_addr Start address of the fragment.
+ * @param size Total size of the memory access in bytes.
+ * @param flags Request flags.
+ * @param byte_enable Byte-enable mask for the entire memory access.
+ * @param[out] frag_size Fragment size.
+ * @param[in,out] size_left Size left to be processed in the memory access.
+ * @return True if the byte-enable mask for the fragment is not all-false.
+ */
+ bool genMemFragmentRequest(const RequestPtr& req, Addr frag_addr,
+ int size, Request::Flags flags,
+ const std::vector<bool>& byte_enable,
+ int& frag_size, int& size_left) const;
+
Fault readMem(Addr addr, uint8_t *data, unsigned size,
- Request::Flags flags) override;
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
+ override;
Fault writeMem(uint8_t *data, unsigned size,
- Addr addr, Request::Flags flags, uint64_t *res) override;
+ Addr addr, Request::Flags flags, uint64_t *res,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
+ override;
Fault amoMem(Addr addr, uint8_t* data, unsigned size,
Request::Flags flags, AtomicOpFunctor *amo_op) override;
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 298ba9f9e..816add707 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2012, 2015, 2017 ARM Limited
+ * Copyright (c) 2010-2012, 2015, 2017, 2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -494,6 +494,10 @@ BaseSimpleCPU::preExecute()
thread->setFloatReg(ZeroReg, 0);
#endif // ALPHA_ISA
+ // resets predicates
+ t_info.setPredicate(true);
+ t_info.setMemAccPredicate(true);
+
// check for instruction-count-based events
comInstEventQueue[curThread]->serviceEvents(t_info.numInst);
system->instEventQueue.serviceEvents(system->totalNumInsts);
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 8060b07ad..5404e5df8 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012,2015 ARM Limited
+ * Copyright (c) 2011-2012,2015,2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -143,15 +143,21 @@ class BaseSimpleCPU : public BaseCPU
void startup() override;
virtual Fault readMem(Addr addr, uint8_t* data, unsigned size,
- Request::Flags flags)
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable =
+ std::vector<bool>())
{ panic("readMem() is not implemented\n"); }
virtual Fault initiateMemRead(Addr addr, unsigned size,
- Request::Flags flags)
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable =
+ std::vector<bool>())
{ panic("initiateMemRead() is not implemented\n"); }
virtual Fault writeMem(uint8_t* data, unsigned size, Addr addr,
- Request::Flags flags, uint64_t* res)
+ Request::Flags flags, uint64_t* res,
+ const std::vector<bool>& byteEnable =
+ std::vector<bool>())
{ panic("writeMem() is not implemented\n"); }
virtual Fault amoMem(Addr addr, uint8_t* data, unsigned size,
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index be7a863c5..de98d6efd 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -434,26 +434,32 @@ class SimpleExecContext : public ExecContext {
thread->pcState(val);
}
-
Fault
readMem(Addr addr, uint8_t *data, unsigned int size,
- Request::Flags flags) override
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
+ override
{
- return cpu->readMem(addr, data, size, flags);
+ return cpu->readMem(addr, data, size, flags, byteEnable);
}
Fault
initiateMemRead(Addr addr, unsigned int size,
- Request::Flags flags) override
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
+ override
{
- return cpu->initiateMemRead(addr, size, flags);
+ return cpu->initiateMemRead(addr, size, flags, byteEnable);
}
Fault
writeMem(uint8_t *data, unsigned int size, Addr addr,
- Request::Flags flags, uint64_t *res) override
+ Request::Flags flags, uint64_t *res,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
+ override
{
- return cpu->writeMem(data, size, addr, flags, res);
+ assert(byteEnable.empty() || byteEnable.size() == size);
+ return cpu->writeMem(data, size, addr, flags, res, byteEnable);
}
Fault amoMem(Addr addr, uint8_t *data, unsigned int size,
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 637308a96..454259099 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -1,6 +1,6 @@
/*
* Copyright 2014 Google, Inc.
- * Copyright (c) 2010-2013,2015,2017 ARM Limited
+ * Copyright (c) 2010-2013,2015,2017-2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -417,7 +417,8 @@ TimingSimpleCPU::buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2,
Fault
TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size,
- Request::Flags flags)
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable)
{
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
@@ -434,6 +435,9 @@ TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size,
RequestPtr req = std::make_shared<Request>(
asid, addr, size, flags, dataMasterId(), pc,
thread->contextId());
+ if (!byteEnable.empty()) {
+ req->setByteEnable(byteEnable);
+ }
req->taskId(taskId());
@@ -491,7 +495,8 @@ TimingSimpleCPU::handleWritePacket()
Fault
TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
- Addr addr, Request::Flags flags, uint64_t *res)
+ Addr addr, Request::Flags flags, uint64_t *res,
+ const std::vector<bool>& byteEnable)
{
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
@@ -516,6 +521,9 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
RequestPtr req = std::make_shared<Request>(
asid, addr, size, flags, dataMasterId(), pc,
thread->contextId());
+ if (!byteEnable.empty()) {
+ req->setByteEnable(byteEnable);
+ }
req->taskId(taskId());
@@ -523,6 +531,10 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
assert(split_addr <= addr || split_addr - addr < block_size);
_status = DTBWaitResponse;
+
+ // TODO: TimingSimpleCPU doesn't support arbitrarily long multi-line mem.
+ // accesses yet
+
if (split_addr > addr) {
RequestPtr req1, req2;
assert(!req->isLLSC() && !req->isSwap());
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index ce0a4dbfc..a49822fc1 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2013,2015 ARM Limited
+ * Copyright (c) 2012-2013,2015,2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -283,10 +283,14 @@ class TimingSimpleCPU : public BaseSimpleCPU
void suspendContext(ThreadID thread_num) override;
Fault initiateMemRead(Addr addr, unsigned size,
- Request::Flags flags) override;
+ Request::Flags flags,
+ const std::vector<bool>& byteEnable =std::vector<bool>())
+ override;
Fault writeMem(uint8_t *data, unsigned size,
- Addr addr, Request::Flags flags, uint64_t *res) override;
+ Addr addr, Request::Flags flags, uint64_t *res,
+ const std::vector<bool>& byteEnable = std::vector<bool>())
+ override;
Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
AtomicOpFunctor *amo_op) override;
diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc
index 9067e877b..0936e41ad 100644
--- a/src/cpu/simple_thread.cc
+++ b/src/cpu/simple_thread.cc
@@ -77,7 +77,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
Process *_process, BaseTLB *_itb,
BaseTLB *_dtb, TheISA::ISA *_isa)
: ThreadState(_cpu, _thread_num, _process), isa(_isa),
- predicate(false), system(_sys),
+ predicate(true), memAccPredicate(true), system(_sys),
itb(_itb), dtb(_dtb), decoder(TheISA::Decoder(_isa))
{
clearArchRegs();
@@ -87,8 +87,9 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
BaseTLB *_itb, BaseTLB *_dtb,
TheISA::ISA *_isa, bool use_kernel_stats)
- : ThreadState(_cpu, _thread_num, NULL), isa(_isa), system(_sys), itb(_itb),
- dtb(_dtb), decoder(TheISA::Decoder(_isa))
+ : ThreadState(_cpu, _thread_num, NULL), isa(_isa),
+ predicate(true), memAccPredicate(true), system(_sys),
+ itb(_itb), dtb(_dtb), decoder(TheISA::Decoder(_isa))
{
quiesceEvent = new EndQuiesceEvent(this);
diff --git a/src/cpu/utils.hh b/src/cpu/utils.hh
new file mode 100644
index 000000000..4c1318174
--- /dev/null
+++ b/src/cpu/utils.hh
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Andrew Bardsley
+ */
+
+#ifndef __CPU_UTILS_HH__
+#define __CPU_UTILS_HH__
+
+#include "base/types.hh"
+
+/**
+ * Calculates the offset of a given address wrt aligned fixed-size blocks.
+ * @param addr Input address.
+ * @param block_size Block size in bytes.
+ * @return Offset of the given address in bytes.
+ */
+inline Addr
+addrBlockOffset(Addr addr, Addr block_size)
+{
+ return addr & (block_size - 1);
+}
+
+/**
+ * Returns the address of the closest aligned fixed-size block to the given
+ * address.
+ * @param addr Input address.
+ * @param block_size Block size in bytes.
+ * @return Address of the closest aligned block.
+ */
+inline Addr
+addrBlockAlign(Addr addr, Addr block_size)
+{
+ return addr & ~(block_size - 1);
+}
+
+/**
+ * Returns true if the given memory access (address, size) needs to be
+ * fragmented across aligned fixed-size blocks.
+ * @param addr Address of the memory access.
+ * @param size Size of the memory access.
+ * @param block_size Block size in bytes.
+ * @return True if the memory access needs to be fragmented.
+ */
+inline bool
+transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
+{
+ return (addrBlockOffset(addr, block_size) + size) > block_size;
+}
+
+/**
+ * Test if there is any active element in an enablement range.
+ */
+inline bool
+isAnyActiveElement(const std::vector<bool>::const_iterator& it_start,
+ const std::vector<bool>::const_iterator& it_end)
+{
+ auto it_tmp = it_start;
+ for (;it_tmp != it_end && !(*it_tmp); ++it_tmp);
+ return (it_tmp != it_end);
+}
+
+#endif // __CPU_UTILS_HH__
diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc
index f7b02ce17..a998530fd 100644
--- a/src/mem/abstract_mem.cc
+++ b/src/mem/abstract_mem.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2012,2017 ARM Limited
+ * Copyright (c) 2010-2012,2017-2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 494a998a5..b72ff4261 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -297,7 +297,8 @@ Cache::promoteWholeLineWrites(PacketPtr pkt)
{
// Cache line clearing instructions
if (doFastWrites && (pkt->cmd == MemCmd::WriteReq) &&
- (pkt->getSize() == blkSize) && (pkt->getOffset(blkSize) == 0)) {
+ (pkt->getSize() == blkSize) && (pkt->getOffset(blkSize) == 0) &&
+ !pkt->isMaskedWrite()) {
pkt->cmd = MemCmd::WriteLineReq;
DPRINTF(Cache, "packet promoted from Write to WriteLineReq\n");
}
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 93b3ad5de..130cc41ad 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -1092,6 +1092,7 @@ class Packet : public Printable
getPtr()
{
assert(flags.isSet(STATIC_DATA|DYNAMIC_DATA));
+ assert(!isMaskedWrite());
return (T*)data;
}
@@ -1180,10 +1181,11 @@ class Packet : public Printable
// same pointer from source to destination and back
assert(p != getPtr<uint8_t>() || flags.isSet(STATIC_DATA));
- if (p != getPtr<uint8_t>())
+ if (p != getPtr<uint8_t>()) {
// for packet with allocated dynamic data, we copy data from
// one to the other, e.g. a forwarded response to a response
std::memcpy(getPtr<uint8_t>(), p, getSize());
+ }
}
/**
@@ -1203,7 +1205,19 @@ class Packet : public Printable
void
writeData(uint8_t *p) const
{
- std::memcpy(p, getConstPtr<uint8_t>(), getSize());
+ if (!isMaskedWrite()) {
+ std::memcpy(p, getConstPtr<uint8_t>(), getSize());
+ } else {
+ assert(req->getByteEnable().size() == getSize());
+ // Write only the enabled bytes
+ const uint8_t *base = getConstPtr<uint8_t>();
+ for (int i = 0; i < getSize(); i++) {
+ if (req->getByteEnable()[i]) {
+ p[i] = *(base + i);
+ }
+ // Disabled bytes stay untouched
+ }
+ }
}
/**
@@ -1268,6 +1282,17 @@ class Packet : public Printable
bool
trySatisfyFunctional(PacketPtr other)
{
+ if (other->isMaskedWrite()) {
+ // Do not forward data if overlapping with a masked write
+ if (_isSecure == other->isSecure() &&
+ getAddr() <= (other->getAddr() + other->getSize() - 1) &&
+ other->getAddr() <= (getAddr() + getSize() - 1)) {
+ warn("Trying to check against a masked write, skipping."
+ " (addr: 0x%x, other addr: 0x%x)", getAddr(),
+ other->getAddr());
+ }
+ return false;
+ }
// all packets that are carrying a payload should have a valid
// data pointer
return trySatisfyFunctional(other, other->getAddr(), other->isSecure(),
@@ -1296,6 +1321,12 @@ class Packet : public Printable
return cmd == MemCmd::CleanEvict || cmd == MemCmd::WritebackClean;
}
+ bool
+ isMaskedWrite() const
+ {
+ return (cmd == MemCmd::WriteReq && !req->getByteEnable().empty());
+ }
+
/**
* Check a functional request against a memory value represented
* by a base/size pair and an associated data array. If the
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 2a53c21a4..324ae382e 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -320,6 +320,9 @@ class Request
*/
unsigned _size;
+ /** Byte-enable mask for writes. */
+ std::vector<bool> _byteEnable;
+
/** The requestor ID which is unique in the system for all ports
* that are capable of issuing a transaction
*/
@@ -567,6 +570,9 @@ class Request
* Generate two requests as if this request had been split into two
* pieces. The original request can't have been translated already.
*/
+ // TODO: this function is still required by TimingSimpleCPU - should be
+ // removed once TimingSimpleCPU will support arbitrarily long multi-line
+ // mem. accesses
void splitOnVaddr(Addr split_addr, RequestPtr &req1, RequestPtr &req2)
{
assert(privateFlags.isSet(VALID_VADDR));
@@ -577,6 +583,14 @@ class Request
req1->_size = split_addr - _vaddr;
req2->_vaddr = split_addr;
req2->_size = _size - req1->_size;
+ if (!_byteEnable.empty()) {
+ req1->_byteEnable = std::vector<bool>(
+ _byteEnable.begin(),
+ _byteEnable.begin() + req1->_size);
+ req2->_byteEnable = std::vector<bool>(
+ _byteEnable.begin() + req1->_size,
+ _byteEnable.end());
+ }
}
/**
@@ -628,6 +642,19 @@ class Request
return _size;
}
+ const std::vector<bool>&
+ getByteEnable() const
+ {
+ return _byteEnable;
+ }
+
+ void
+ setByteEnable(const std::vector<bool>& be)
+ {
+ assert(be.empty() || be.size() == _size);
+ _byteEnable = be;
+ }
+
/** Accessor for time. */
Tick
time() const