summaryrefslogtreecommitdiff
path: root/src/cpu/minor
diff options
context:
space:
mode:
authorGabor Dozsa <gabor.dozsa@arm.com>2019-02-27 17:26:56 +0000
committerGiacomo Gabrielli <giacomo.gabrielli@arm.com>2019-07-27 20:51:31 +0000
commit46da8fb805407cdc224abe788e8c666f3b0dadd1 (patch)
tree38368de3852a7263d84e6b7a355cc1485bd6a5f8 /src/cpu/minor
parent7652b2f12c0acdc22d29deb4f786364c80c8528f (diff)
downloadgem5-46da8fb805407cdc224abe788e8c666f3b0dadd1.tar.xz
cpu: Add first-/non-faulting load support to Minor and O3
Some architectures allow masking faults of memory load instructions in some specific circumstances (e.g. first-faulting and non-faulting loads in Arm SVE). This patch adds support for such loads in the Minor and O3 CPU models. Change-Id: I264a81a078f049127779aa834e89f0e693ba0bea Signed-off-by: Gabor Dozsa <gabor.dozsa@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/19178 Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> Maintainer: Andreas Sandberg <andreas.sandberg@arm.com> Tested-by: kokoro <noreply+kokoro@google.com>
Diffstat (limited to 'src/cpu/minor')
-rw-r--r--src/cpu/minor/dyn_inst.cc6
-rw-r--r--src/cpu/minor/dyn_inst.hh11
-rw-r--r--src/cpu/minor/exec_context.hh9
-rw-r--r--src/cpu/minor/execute.cc20
-rw-r--r--src/cpu/minor/lsq.cc115
-rw-r--r--src/cpu/minor/lsq.hh23
6 files changed, 135 insertions, 49 deletions
diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc
index 353163758..087b718d3 100644
--- a/src/cpu/minor/dyn_inst.cc
+++ b/src/cpu/minor/dyn_inst.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014, 2016 ARM Limited
+ * Copyright (c) 2013-2014, 2016,2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -108,6 +108,8 @@ MinorDynInst::reportData(std::ostream &os) const
os << "-";
else if (isFault())
os << "F;" << id;
+ else if (translationFault != NoFault)
+ os << "TF;" << id;
else
os << id;
}
@@ -120,6 +122,8 @@ operator <<(std::ostream &os, const MinorDynInst &inst)
if (inst.isFault())
os << "fault: \"" << inst.fault->name() << '"';
+ else if (inst.translationFault != NoFault)
+ os << "translation fault: \"" << inst.translationFault->name() << '"';
else if (inst.staticInst)
os << inst.staticInst->getName();
else
diff --git a/src/cpu/minor/dyn_inst.hh b/src/cpu/minor/dyn_inst.hh
index 0a8ff8acf..3eb7f980f 100644
--- a/src/cpu/minor/dyn_inst.hh
+++ b/src/cpu/minor/dyn_inst.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014,2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -194,6 +194,9 @@ class MinorDynInst : public RefCounted
/** This instruction is in the LSQ, not a functional unit */
bool inLSQ;
+ /** Translation fault in case of a mem ref */
+ Fault translationFault;
+
/** The instruction has been sent to the store buffer */
bool inStoreBuffer;
@@ -233,9 +236,9 @@ class MinorDynInst : public RefCounted
staticInst(NULL), id(id_), traceData(NULL),
pc(TheISA::PCState(0)), fault(fault_),
triedToPredict(false), predictedTaken(false),
- fuIndex(0), inLSQ(false), inStoreBuffer(false),
- canEarlyIssue(false), predicate(true), memAccPredicate(true),
- instToWaitFor(0), extraCommitDelay(Cycles(0)),
+ fuIndex(0), inLSQ(false), translationFault(NoFault),
+ inStoreBuffer(false), canEarlyIssue(false), predicate(true),
+ memAccPredicate(true), instToWaitFor(0), extraCommitDelay(Cycles(0)),
extraCommitDelayExpr(NULL), minimumCommitCycle(Cycles(0))
{ }
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index 9f6fce4cd..1871e2479 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -116,9 +116,8 @@ class ExecContext : public ::ExecContext
const std::vector<bool>& byteEnable = std::vector<bool>())
override
{
- execute.getLSQ().pushRequest(inst, true /* load */, nullptr,
+ return execute.getLSQ().pushRequest(inst, true /* load */, nullptr,
size, addr, flags, nullptr, nullptr, byteEnable);
- return NoFault;
}
Fault
@@ -128,9 +127,8 @@ class ExecContext : public ::ExecContext
override
{
assert(byteEnable.empty() || byteEnable.size() == size);
- execute.getLSQ().pushRequest(inst, false /* store */, data,
+ return execute.getLSQ().pushRequest(inst, false /* store */, data,
size, addr, flags, res, nullptr, byteEnable);
- return NoFault;
}
Fault
@@ -138,9 +136,8 @@ class ExecContext : public ::ExecContext
AtomicOpFunctor *amo_op) override
{
// AMO requests are pushed through the store path
- execute.getLSQ().pushRequest(inst, false /* amo */, nullptr,
+ return execute.getLSQ().pushRequest(inst, false /* amo */, nullptr,
size, addr, flags, nullptr, amo_op);
- return NoFault;
}
RegVal
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 810ff11c6..c7fda489e 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -337,19 +337,19 @@ Execute::handleMemResponse(MinorDynInstPtr inst,
* context predicate, otherwise, it will be set to false */
bool use_context_predicate = true;
- if (response->fault != NoFault) {
+ if (inst->translationFault != NoFault) {
/* Invoke memory faults. */
DPRINTF(MinorMem, "Completing fault from DTLB access: %s\n",
- response->fault->name());
+ inst->translationFault->name());
if (inst->staticInst->isPrefetch()) {
DPRINTF(MinorMem, "Not taking fault on prefetch: %s\n",
- response->fault->name());
+ inst->translationFault->name());
/* Don't assign to fault */
} else {
/* Take the fault raised during the TLB/memory access */
- fault = response->fault;
+ fault = inst->translationFault;
fault->invoke(thread, inst->staticInst);
}
@@ -469,6 +469,18 @@ Execute::executeMemRefInst(MinorDynInstPtr inst, BranchData &branch,
Fault init_fault = inst->staticInst->initiateAcc(&context,
inst->traceData);
+ if (inst->inLSQ) {
+ if (init_fault != NoFault) {
+ assert(inst->translationFault != NoFault);
+ // Translation faults are dealt with in handleMemResponse()
+ init_fault = NoFault;
+ } else {
+ // If we have a translation fault then it got suppressed by
+ // initateAcc()
+ inst->translationFault = NoFault;
+ }
+ }
+
if (init_fault != NoFault) {
DPRINTF(MinorExecute, "Fault on memory inst: %s"
" initiateAcc: %s\n", *inst, init_fault->name());
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index 1d9f17e8d..1e5e89647 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -65,16 +65,52 @@ LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
data(data_),
packet(NULL),
request(),
- fault(NoFault),
res(res_),
skipped(false),
issuedToMemory(false),
+ isTranslationDelayed(false),
state(NotIssued)
{
request = std::make_shared<Request>();
}
void
+LSQ::LSQRequest::tryToSuppressFault()
+{
+ SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
+ TheISA::PCState old_pc = thread.pcState();
+ ExecContext context(port.cpu, thread, port.execute, inst);
+ Fault M5_VAR_USED fault = inst->translationFault;
+
+ // Give the instruction a chance to suppress a translation fault
+ inst->translationFault = inst->staticInst->initiateAcc(&context, nullptr);
+ if (inst->translationFault == NoFault) {
+ DPRINTFS(MinorMem, (&port),
+ "Translation fault suppressed for inst:%s\n", *inst);
+ } else {
+ assert(inst->translationFault == fault);
+ }
+ thread.pcState(old_pc);
+}
+
+void
+LSQ::LSQRequest::completeDisabledMemAccess()
+{
+ DPRINTFS(MinorMem, (&port), "Complete disabled mem access for inst:%s\n",
+ *inst);
+
+ SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
+ TheISA::PCState old_pc = thread.pcState();
+
+ ExecContext context(port.cpu, thread, port.execute, inst);
+
+ context.setMemAccPredicate(false);
+ inst->staticInst->completeAcc(nullptr, &context, inst->traceData);
+
+ thread.pcState(old_pc);
+}
+
+void
LSQ::LSQRequest::disableMemAccess()
{
port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false);
@@ -227,16 +263,26 @@ void
LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
ThreadContext *tc, BaseTLB::Mode mode)
{
- fault = fault_;
-
port.numAccessesInDTLB--;
DPRINTFS(MinorMem, (&port), "Received translation response for"
- " request: %s\n", *inst);
-
- makePacket();
-
- setState(Translated);
+ " request: %s delayed:%d %s\n", *inst, isTranslationDelayed,
+ fault_ != NoFault ? fault_->name() : "");
+
+ if (fault_ != NoFault) {
+ inst->translationFault = fault_;
+ if (isTranslationDelayed) {
+ tryToSuppressFault();
+ if (inst->translationFault == NoFault) {
+ completeDisabledMemAccess();
+ setState(Complete);
+ }
+ }
+ setState(Translated);
+ } else {
+ setState(Translated);
+ makePacket();
+ }
port.tryToSendToTransfers(this);
/* Let's try and wake up the processor for the next cycle */
@@ -281,8 +327,6 @@ void
LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
ThreadContext *tc, BaseTLB::Mode mode)
{
- fault = fault_;
-
port.numAccessesInDTLB--;
unsigned int M5_VAR_USED expected_fragment_index =
@@ -292,7 +336,9 @@ LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
numTranslatedFragments++;
DPRINTFS(MinorMem, (&port), "Received translation response for fragment"
- " %d of request: %s\n", expected_fragment_index, *inst);
+ " %d of request: %s delayed:%d %s\n", expected_fragment_index,
+ *inst, isTranslationDelayed,
+ fault_ != NoFault ? fault_->name() : "");
assert(request_ == fragmentRequests[expected_fragment_index]);
@@ -300,18 +346,33 @@ LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
* tryToSendToTransfers does take */
port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
- if (fault != NoFault) {
+ if (fault_ != NoFault) {
/* tryToSendToTransfers will handle the fault */
+ inst->translationFault = fault_;
DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:"
" %d of request: %s\n",
expected_fragment_index, *inst);
- setState(Translated);
+ if (expected_fragment_index > 0 || isTranslationDelayed)
+ tryToSuppressFault();
+ if (expected_fragment_index == 0) {
+ if (isTranslationDelayed && inst->translationFault == NoFault) {
+ completeDisabledMemAccess();
+ setState(Complete);
+ } else {
+ setState(Translated);
+ }
+ } else if (inst->translationFault == NoFault) {
+ setState(Translated);
+ numTranslatedFragments--;
+ makeFragmentPackets();
+ } else {
+ setState(Translated);
+ }
port.tryToSendToTransfers(this);
} else if (numTranslatedFragments == numFragments) {
makeFragmentPackets();
-
setState(Translated);
port.tryToSendToTransfers(this);
} else {
@@ -562,6 +623,7 @@ LSQ::SplitDataRequest::stepToNextPacket()
void
LSQ::SplitDataRequest::retireResponse(PacketPtr response)
{
+ assert(inst->translationFault == NoFault);
assert(numRetiredFragments < numTranslatedFragments);
DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
@@ -950,7 +1012,7 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request)
return;
}
- if (request->fault != NoFault) {
+ if (request->inst->translationFault != NoFault) {
if (request->inst->staticInst->isPrefetch()) {
DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n");
}
@@ -1508,12 +1570,18 @@ LSQ::needsToTick()
return ret;
}
-void
+Fault
LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
uint64_t *res, AtomicOpFunctor *amo_op,
const std::vector<bool>& byteEnable)
{
+ assert(inst->translationFault == NoFault || inst->inLSQ);
+
+ if (inst->inLSQ) {
+ return inst->translationFault;
+ }
+
bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
if (needs_burst && inst->staticInst->isAtomic()) {
@@ -1568,12 +1636,13 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
addr, size, flags, cpu.dataMasterId(),
/* I've no idea why we need the PC, but give it */
inst->pc.instAddr(), amo_op);
- if (!byteEnable.empty()) {
- request->request->setByteEnable(byteEnable);
- }
+ request->request->setByteEnable(byteEnable);
requests.push(request);
+ inst->inLSQ = true;
request->startAddrTranslation();
+
+ return inst->translationFault;
}
void
@@ -1642,16 +1711,12 @@ LSQ::issuedMemBarrierInst(MinorDynInstPtr inst)
void
LSQ::LSQRequest::makePacket()
{
+ assert(inst->translationFault == NoFault);
+
/* Make the function idempotent */
if (packet)
return;
- // if the translation faulted, do not create a packet
- if (fault != NoFault) {
- assert(packet == NULL);
- return;
- }
-
packet = makePacketForRequest(request, isLoad, this, data);
/* Null the ret data so we know not to deallocate it when the
* ret is destroyed. The data now belongs to the ret and
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh
index 23b47c53c..a7c7cb632 100644
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -145,9 +145,6 @@ class LSQ : public Named
/** The underlying request of this LSQRequest */
RequestPtr request;
- /** Fault generated performing this request */
- Fault fault;
-
/** Res from pushRequest */
uint64_t *res;
@@ -160,6 +157,9 @@ class LSQ : public Named
* that's visited the memory system */
bool issuedToMemory;
+ /** Address translation is delayed due to table walk */
+ bool isTranslationDelayed;
+
enum LSQRequestState
{
NotIssued, /* Newly created */
@@ -186,9 +186,14 @@ class LSQ : public Named
protected:
/** BaseTLB::Translation interface */
- void markDelayed() { }
+ void markDelayed() { isTranslationDelayed = true; }
+
+ /** Instructions may want to suppress translation faults (e.g.
+ * non-faulting vector loads).*/
+ void tryToSuppressFault();
void disableMemAccess();
+ void completeDisabledMemAccess();
public:
LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
@@ -701,11 +706,11 @@ class LSQ : public Named
/** Single interface for readMem/writeMem/amoMem to issue requests into
* the LSQ */
- void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
- unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res, AtomicOpFunctor *amo_op,
- const std::vector<bool>& byteEnable =
- std::vector<bool>());
+ Fault pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
+ unsigned int size, Addr addr, Request::Flags flags,
+ uint64_t *res, AtomicOpFunctor *amo_op,
+ const std::vector<bool>& byteEnable =
+ std::vector<bool>());
/** Push a predicate failed-representing request into the queues just
* to maintain commit order */