diff options
author | Gabor Dozsa <gabor.dozsa@arm.com> | 2019-02-27 17:26:56 +0000 |
---|---|---|
committer | Giacomo Gabrielli <giacomo.gabrielli@arm.com> | 2019-07-27 20:51:31 +0000 |
commit | 46da8fb805407cdc224abe788e8c666f3b0dadd1 (patch) | |
tree | 38368de3852a7263d84e6b7a355cc1485bd6a5f8 /src/cpu/o3 | |
parent | 7652b2f12c0acdc22d29deb4f786364c80c8528f (diff) | |
download | gem5-46da8fb805407cdc224abe788e8c666f3b0dadd1.tar.xz |
cpu: Add first-/non-faulting load support to Minor and O3
Some architectures allow masking faults of memory load instructions in
some specific circumstances (e.g. first-faulting and non-faulting
loads in Arm SVE). This patch adds support for such loads in the Minor
and O3 CPU models.
Change-Id: I264a81a078f049127779aa834e89f0e693ba0bea
Signed-off-by: Gabor Dozsa <gabor.dozsa@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/19178
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Diffstat (limited to 'src/cpu/o3')
-rw-r--r-- | src/cpu/o3/lsq.hh | 14 | ||||
-rw-r--r-- | src/cpu/o3/lsq_impl.hh | 47 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit_impl.hh | 10 |
3 files changed, 54 insertions, 17 deletions
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 84f1411a5..6f7820113 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -226,6 +226,7 @@ class LSQ Complete, Squashed, Fault, + PartialFault, }; State _state; LSQSenderState* _senderState; @@ -564,6 +565,19 @@ class LSQ return flags.isSet(Flag::Sent); } + bool + isPartialFault() + { + return _state == State::PartialFault; + } + + bool + isMemAccessRequired() + { + return (_state == State::Request || + (isPartialFault() && isLoad())); + } + /** * The LSQ entry is cleared */ diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index d4e0a289e..27a563071 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -733,7 +733,7 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, /* This is the place were instructions get the effAddr. */ if (req->isTranslationComplete()) { - if (inst->getFault() == NoFault) { + if (req->isMemAccessRequired()) { inst->effAddr = req->getVaddr(); inst->effSize = size; inst->effAddrValid(true); @@ -741,10 +741,17 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, if (cpu->checker) { inst->reqToVerify = std::make_shared<Request>(*req->request()); } + Fault fault; if (isLoad) - inst->getFault() = cpu->read(req, inst->lqIdx); + fault = cpu->read(req, inst->lqIdx); else - inst->getFault() = cpu->write(req, data, inst->sqIdx); + fault = cpu->write(req, data, inst->sqIdx); + // inst->getFault() may have the first-fault of a + // multi-access split request at this point. + // Overwrite that only if we got another type of fault + // (e.g. re-exec). + if (fault != NoFault) + inst->getFault() = fault; } else if (isLoad) { inst->setMemAccPredicate(false); // Commit will have to clean up whatever happened. Set this @@ -797,13 +804,16 @@ void LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req, ThreadContext* tc, BaseTLB::Mode mode) { - _fault.push_back(fault); - assert(req == _requests[numTranslatedFragments] || this->isDelayed()); + int i; + for (i = 0; i < _requests.size() && _requests[i] != req; i++); + assert(i < _requests.size()); + _fault[i] = fault; numInTranslationFragments--; numTranslatedFragments++; - mainReq->setFlags(req->getFlags()); + if (fault == NoFault) + mainReq->setFlags(req->getFlags()); if (numTranslatedFragments == _requests.size()) { if (_inst->isSquashed()) { @@ -811,27 +821,30 @@ LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req, } else { _inst->strictlyOrdered(mainReq->isStrictlyOrdered()); flags.set(Flag::TranslationFinished); - auto fault_it = _fault.begin(); - /* Ffwd to the first NoFault. */ - while (fault_it != _fault.end() && *fault_it == NoFault) - fault_it++; - /* If none of the fragments faulted: */ - if (fault_it == _fault.end()) { - _inst->physEffAddr = request(0)->getPaddr(); + _inst->translationCompleted(true); + for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++); + if (i > 0) { + _inst->physEffAddr = request(0)->getPaddr(); _inst->memReqFlags = mainReq->getFlags(); if (mainReq->isCondSwap()) { + assert (i == _fault.size()); assert(_res); mainReq->setExtraData(*_res); } - setState(State::Request); - _inst->fault = NoFault; + if (i == _fault.size()) { + _inst->fault = NoFault; + setState(State::Request); + } else { + _inst->fault = _fault[i]; + setState(State::PartialFault); + } } else { + _inst->fault = _fault[0]; setState(State::Fault); - _inst->fault = *fault_it; } - _inst->translationCompleted(true); } + } } diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 21bed99fa..b71ed7f78 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -554,6 +554,16 @@ LSQUnit<Impl>::executeLoad(const DynInstPtr &inst) if (inst->isTranslationDelayed() && load_fault == NoFault) return load_fault; + if (load_fault != NoFault && inst->translationCompleted() && + inst->savedReq->isPartialFault() && !inst->savedReq->isComplete()) { + assert(inst->savedReq->isSplit()); + // If we have a partial fault where the mem access is not complete yet + // then the cache must have been blocked. This load will be re-executed + // when the cache gets unblocked. We will handle the fault when the + // mem access is complete. + return NoFault; + } + // If the instruction faulted or predicated false, then we need to send it // along to commit without the instruction completing. if (load_fault != NoFault || !inst->readPredicate()) { |