summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
authorGabor Dozsa <gabor.dozsa@arm.com>2019-02-27 17:26:56 +0000
committerGiacomo Gabrielli <giacomo.gabrielli@arm.com>2019-07-27 20:51:31 +0000
commit46da8fb805407cdc224abe788e8c666f3b0dadd1 (patch)
tree38368de3852a7263d84e6b7a355cc1485bd6a5f8 /src/cpu/o3
parent7652b2f12c0acdc22d29deb4f786364c80c8528f (diff)
downloadgem5-46da8fb805407cdc224abe788e8c666f3b0dadd1.tar.xz
cpu: Add first-/non-faulting load support to Minor and O3
Some architectures allow masking faults of memory load instructions in some specific circumstances (e.g. first-faulting and non-faulting loads in Arm SVE). This patch adds support for such loads in the Minor and O3 CPU models. Change-Id: I264a81a078f049127779aa834e89f0e693ba0bea Signed-off-by: Gabor Dozsa <gabor.dozsa@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/19178 Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> Maintainer: Andreas Sandberg <andreas.sandberg@arm.com> Tested-by: kokoro <noreply+kokoro@google.com>
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/lsq.hh14
-rw-r--r--src/cpu/o3/lsq_impl.hh47
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh10
3 files changed, 54 insertions, 17 deletions
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 84f1411a5..6f7820113 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -226,6 +226,7 @@ class LSQ
Complete,
Squashed,
Fault,
+ PartialFault,
};
State _state;
LSQSenderState* _senderState;
@@ -564,6 +565,19 @@ class LSQ
return flags.isSet(Flag::Sent);
}
+ bool
+ isPartialFault()
+ {
+ return _state == State::PartialFault;
+ }
+
+ bool
+ isMemAccessRequired()
+ {
+ return (_state == State::Request ||
+ (isPartialFault() && isLoad()));
+ }
+
/**
* The LSQ entry is cleared
*/
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index d4e0a289e..27a563071 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -733,7 +733,7 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
/* This is the place were instructions get the effAddr. */
if (req->isTranslationComplete()) {
- if (inst->getFault() == NoFault) {
+ if (req->isMemAccessRequired()) {
inst->effAddr = req->getVaddr();
inst->effSize = size;
inst->effAddrValid(true);
@@ -741,10 +741,17 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
if (cpu->checker) {
inst->reqToVerify = std::make_shared<Request>(*req->request());
}
+ Fault fault;
if (isLoad)
- inst->getFault() = cpu->read(req, inst->lqIdx);
+ fault = cpu->read(req, inst->lqIdx);
else
- inst->getFault() = cpu->write(req, data, inst->sqIdx);
+ fault = cpu->write(req, data, inst->sqIdx);
+ // inst->getFault() may have the first-fault of a
+ // multi-access split request at this point.
+ // Overwrite that only if we got another type of fault
+ // (e.g. re-exec).
+ if (fault != NoFault)
+ inst->getFault() = fault;
} else if (isLoad) {
inst->setMemAccPredicate(false);
// Commit will have to clean up whatever happened. Set this
@@ -797,13 +804,16 @@ void
LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
ThreadContext* tc, BaseTLB::Mode mode)
{
- _fault.push_back(fault);
- assert(req == _requests[numTranslatedFragments] || this->isDelayed());
+ int i;
+ for (i = 0; i < _requests.size() && _requests[i] != req; i++);
+ assert(i < _requests.size());
+ _fault[i] = fault;
numInTranslationFragments--;
numTranslatedFragments++;
- mainReq->setFlags(req->getFlags());
+ if (fault == NoFault)
+ mainReq->setFlags(req->getFlags());
if (numTranslatedFragments == _requests.size()) {
if (_inst->isSquashed()) {
@@ -811,27 +821,30 @@ LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
} else {
_inst->strictlyOrdered(mainReq->isStrictlyOrdered());
flags.set(Flag::TranslationFinished);
- auto fault_it = _fault.begin();
- /* Ffwd to the first NoFault. */
- while (fault_it != _fault.end() && *fault_it == NoFault)
- fault_it++;
- /* If none of the fragments faulted: */
- if (fault_it == _fault.end()) {
- _inst->physEffAddr = request(0)->getPaddr();
+ _inst->translationCompleted(true);
+ for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
+ if (i > 0) {
+ _inst->physEffAddr = request(0)->getPaddr();
_inst->memReqFlags = mainReq->getFlags();
if (mainReq->isCondSwap()) {
+ assert (i == _fault.size());
assert(_res);
mainReq->setExtraData(*_res);
}
- setState(State::Request);
- _inst->fault = NoFault;
+ if (i == _fault.size()) {
+ _inst->fault = NoFault;
+ setState(State::Request);
+ } else {
+ _inst->fault = _fault[i];
+ setState(State::PartialFault);
+ }
} else {
+ _inst->fault = _fault[0];
setState(State::Fault);
- _inst->fault = *fault_it;
}
- _inst->translationCompleted(true);
}
+
}
}
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 21bed99fa..b71ed7f78 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -554,6 +554,16 @@ LSQUnit<Impl>::executeLoad(const DynInstPtr &inst)
if (inst->isTranslationDelayed() && load_fault == NoFault)
return load_fault;
+ if (load_fault != NoFault && inst->translationCompleted() &&
+ inst->savedReq->isPartialFault() && !inst->savedReq->isComplete()) {
+ assert(inst->savedReq->isSplit());
+ // If we have a partial fault where the mem access is not complete yet
+ // then the cache must have been blocked. This load will be re-executed
+ // when the cache gets unblocked. We will handle the fault when the
+ // mem access is complete.
+ return NoFault;
+ }
+
// If the instruction faulted or predicated false, then we need to send it
// along to commit without the instruction completing.
if (load_fault != NoFault || !inst->readPredicate()) {