diff options
author | Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> | 2017-02-13 09:41:44 +0000 |
---|---|---|
committer | Giacomo Travaglini <giacomo.travaglini@arm.com> | 2019-01-24 09:46:34 +0000 |
commit | 51becd2475748fb5515f261254c48827b3b5c2ba (patch) | |
tree | 2aabad4aad0ee7528ec437783b3077080ddb657a /src/cpu/base_dyn_inst.hh | |
parent | 6379bebd41899ca74ac146e8073aee0bd1781b3f (diff) | |
download | gem5-51becd2475748fb5515f261254c48827b3b5c2ba.tar.xz |
cpu-o3: O3 LSQ Generalisation
This patch does a large modification of the LSQ in the O3 model. The
main goal of the patch is to remove the 'an operation can be served with
one or two memory requests' assumption that is present in the LSQ
and the instruction with the req, reqLow, reqHigh triplet, and
generalising it to operations that can be addressed with one request,
and operations that require many requests, embodied in the
SingleDataRequest and the SplitDataRequest.
This modification has been done mimicking the minor model to an extent,
shifting the responsibilities of dealing with VtoP translation and
tracking the status and resources from the DynInst to the LSQ via the
LSQRequest. The LSQRequest models the information concerning the
operation, handles the creation of fragments for translation and request
as well as assembling/splitting the data accordingly.
With this modifications, the implementation of vector ISAs, particularly
on the memory side, become more rich, as the new model permits a
dissociation of the ISA characteristics as vector length, from the
microarchitectural characteristics that govern how contiguous loads are
executing, allowing exploration of different LSQ to DL1 bus widths to
understand the tradeoffs in complexity and performance.
Part of the complexities introduced stem from the fact that gem5 keeps a
large amount of metadata regarding, in particular, memory operations,
thus, when an instruction is squashed while some operation as TLB lookup
or cache access is ongoing, when the relevant structure communicates to
the LSQ that the operation is over, it tries to access some pieces of
data that should have died when the instruction is squashed, leading to
asserts, panics, or memory corruption. To ensure the correct behaviour,
the LSQRequest rely on assesing who is their owner, and self-destroying
if they detect their owner is done with the request, and there will be
no subsequent action. For example, in the case of an instruction
squashed whal the TLB is doing a walk to serve the translation, when the
translation is served by the TLB, the LSQRequest detects that the
instruction was squashed, and as the translation is done, no one else
expect to access its information, and therefore, it self-destructs.
Having destroyed the LSQRequest earlier, would lead to wrong behaviour
as the TLB walk may access some fields of it.
Additional authors:
- Gabor Dozsa <gabor.dozsa@arm.com>
Change-Id: I9578a1a3f6b899c390cdd886856a24db68ff7d0c
Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/13516
Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Diffstat (limited to 'src/cpu/base_dyn_inst.hh')
-rw-r--r-- | src/cpu/base_dyn_inst.hh | 242 |
1 files changed, 26 insertions, 216 deletions
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index c2a14089a..d81b58bdf 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011,2013,2016 ARM Limited + * Copyright (c) 2011, 2013, 2016-2018 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * @@ -84,6 +84,10 @@ class BaseDynInst : public ExecContext, public RefCounted typedef typename ImplCPU::ImplState ImplState; using VecRegContainer = TheISA::VecRegContainer; + using LSQRequestPtr = typename Impl::CPUPol::LSQ::LSQRequest*; + using LQIterator = typename Impl::CPUPol::LSQUnit::LQIterator; + using SQIterator = typename Impl::CPUPol::LSQUnit::SQIterator; + // The DynInstPtr type. typedef typename Impl::DynInstPtr DynInstPtr; typedef RefCountingPtr<BaseDynInst<Impl> > BaseDynInstPtr; @@ -203,12 +207,7 @@ class BaseDynInst : public ExecContext, public RefCounted Addr effAddr; /** The effective physical address. */ - Addr physEffAddrLow; - - /** The effective physical address - * of the second request for a split request - */ - Addr physEffAddrHigh; + Addr physEffAddr; /** The memory request flags (from translation). */ unsigned memReqFlags; @@ -224,19 +223,19 @@ class BaseDynInst : public ExecContext, public RefCounted /** Load queue index. */ int16_t lqIdx; + LQIterator lqIt; /** Store queue index. */ int16_t sqIdx; + SQIterator sqIt; /////////////////////// TLB Miss ////////////////////// /** - * Saved memory requests (needed when the DTB address translation is + * Saved memory request (needed when the DTB address translation is * delayed due to a hw page table walk). */ - RequestPtr savedReq; - RequestPtr savedSreqLow; - RequestPtr savedSreqHigh; + LSQRequestPtr savedReq; /////////////////////// Checker ////////////////////// // Need a copy of main request pointer to verify on writes. @@ -270,6 +269,7 @@ class BaseDynInst : public ExecContext, public RefCounted /** Is the effective virtual address valid. */ bool effAddrValid() const { return instFlags[EffAddrValid]; } + void effAddrValid(bool b) { instFlags[EffAddrValid] = b; } /** Whether or not the memory operation is done. */ bool memOpDone() const { return instFlags[MemOpDone]; } @@ -303,18 +303,6 @@ class BaseDynInst : public ExecContext, public RefCounted Fault writeMem(uint8_t *data, unsigned size, Addr addr, Request::Flags flags, uint64_t *res); - /** Splits a request in two if it crosses a dcache block. */ - void splitRequest(const RequestPtr &req, RequestPtr &sreqLow, - RequestPtr &sreqHigh); - - /** Initiate a DTB address translation. */ - void initiateTranslation(const RequestPtr &req, const RequestPtr &sreqLow, - const RequestPtr &sreqHigh, uint64_t *res, - BaseTLB::Mode mode); - - /** Finish a DTB address translation. */ - void finishTranslation(WholeTranslationState *state); - /** True if the DTB address translation has started. */ bool translationStarted() const { return instFlags[TranslationStarted]; } void translationStarted(bool f) { instFlags[TranslationStarted] = f; } @@ -454,6 +442,9 @@ class BaseDynInst : public ExecContext, public RefCounted /** Returns the fault type. */ Fault getFault() const { return fault; } + /** TODO: This I added for the LSQRequest side to be able to modify the + * fault. There should be a better mechanism in place. */ + Fault& getFault() { return fault; } /** Checks whether or not this instruction has had its branch target * calculated yet. For now it is not utilized and is hacked to be @@ -589,7 +580,8 @@ class BaseDynInst : public ExecContext, public RefCounted int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } int8_t numCCDestRegs() const { return staticInst->numCCDestRegs(); } int8_t numVecDestRegs() const { return staticInst->numVecDestRegs(); } - int8_t numVecElemDestRegs() const { + int8_t numVecElemDestRegs() const + { return staticInst->numVecElemDestRegs(); } @@ -837,6 +829,7 @@ class BaseDynInst : public ExecContext, public RefCounted /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } + short getASID() { return asid; } /** Sets the thread id. */ void setTid(ThreadID tid) { threadNumber = tid; } @@ -853,9 +846,12 @@ class BaseDynInst : public ExecContext, public RefCounted /** Is this instruction's memory access strictly ordered? */ bool strictlyOrdered() const { return instFlags[IsStrictlyOrdered]; } + void strictlyOrdered(bool so) { instFlags[IsStrictlyOrdered] = so; } /** Has this instruction generated a memory request. */ bool hasRequest() const { return instFlags[ReqMade]; } + /** Assert this instruction has generated a memory request. */ + void setRequest() { instFlags[ReqMade] = true; } /** Returns iterator to this instruction in the list of all insts. */ ListIt &getInstListIt() { return instListIt; } @@ -887,50 +883,9 @@ Fault BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size, Request::Flags flags) { - instFlags[ReqMade] = true; - RequestPtr req = NULL; - RequestPtr sreqLow = NULL; - RequestPtr sreqHigh = NULL; - - if (instFlags[ReqMade] && translationStarted()) { - req = savedReq; - sreqLow = savedSreqLow; - sreqHigh = savedSreqHigh; - } else { - req = std::make_shared<Request>( - asid, addr, size, flags, masterId(), - this->pc.instAddr(), thread->contextId()); - - req->taskId(cpu->taskId()); - - // Only split the request if the ISA supports unaligned accesses. - if (TheISA::HasUnalignedMemAcc) { - splitRequest(req, sreqLow, sreqHigh); - } - initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read); - } - - if (translationCompleted()) { - if (fault == NoFault) { - effAddr = req->getVaddr(); - effSize = size; - instFlags[EffAddrValid] = true; - - if (cpu->checker) { - reqToVerify = std::make_shared<Request>(*req); - } - fault = cpu->read(req, sreqLow, sreqHigh, lqIdx); - } else { - // Commit will have to clean up whatever happened. Set this - // instruction as executed. - this->setExecuted(); - } - } - - if (traceData) - traceData->setMem(addr, size, flags); - - return fault; + return cpu->pushRequest( + dynamic_cast<typename DynInstPtr::PtrType>(this), + /* ld */ true, nullptr, size, addr, flags, nullptr); } template<class Impl> @@ -938,154 +893,9 @@ Fault BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr, Request::Flags flags, uint64_t *res) { - if (traceData) - traceData->setMem(addr, size, flags); - - instFlags[ReqMade] = true; - RequestPtr req = NULL; - RequestPtr sreqLow = NULL; - RequestPtr sreqHigh = NULL; - - if (instFlags[ReqMade] && translationStarted()) { - req = savedReq; - sreqLow = savedSreqLow; - sreqHigh = savedSreqHigh; - } else { - req = std::make_shared<Request>( - asid, addr, size, flags, masterId(), - this->pc.instAddr(), thread->contextId()); - - req->taskId(cpu->taskId()); - - // Only split the request if the ISA supports unaligned accesses. - if (TheISA::HasUnalignedMemAcc) { - splitRequest(req, sreqLow, sreqHigh); - } - initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write); - } - - if (fault == NoFault && translationCompleted()) { - effAddr = req->getVaddr(); - effSize = size; - instFlags[EffAddrValid] = true; - - if (cpu->checker) { - reqToVerify = std::make_shared<Request>(*req); - } - fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx); - } - - return fault; -} - -template<class Impl> -inline void -BaseDynInst<Impl>::splitRequest(const RequestPtr &req, RequestPtr &sreqLow, - RequestPtr &sreqHigh) -{ - // Check to see if the request crosses the next level block boundary. - unsigned block_size = cpu->cacheLineSize(); - Addr addr = req->getVaddr(); - Addr split_addr = roundDown(addr + req->getSize() - 1, block_size); - assert(split_addr <= addr || split_addr - addr < block_size); - - // Spans two blocks. - if (split_addr > addr) { - req->splitOnVaddr(split_addr, sreqLow, sreqHigh); - } -} - -template<class Impl> -inline void -BaseDynInst<Impl>::initiateTranslation(const RequestPtr &req, - const RequestPtr &sreqLow, - const RequestPtr &sreqHigh, - uint64_t *res, - BaseTLB::Mode mode) -{ - translationStarted(true); - - if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) { - WholeTranslationState *state = - new WholeTranslationState(req, NULL, res, mode); - - // One translation if the request isn't split. - DataTranslation<BaseDynInstPtr> *trans = - new DataTranslation<BaseDynInstPtr>(this, state); - - cpu->dtb->translateTiming(req, thread->getTC(), trans, mode); - - if (!translationCompleted()) { - // The translation isn't yet complete, so we can't possibly have a - // fault. Overwrite any existing fault we might have from a previous - // execution of this instruction (e.g. an uncachable load that - // couldn't execute because it wasn't at the head of the ROB). - fault = NoFault; - - // Save memory requests. - savedReq = state->mainReq; - savedSreqLow = state->sreqLow; - savedSreqHigh = state->sreqHigh; - } - } else { - WholeTranslationState *state = - new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode); - - // Two translations when the request is split. - DataTranslation<BaseDynInstPtr> *stransLow = - new DataTranslation<BaseDynInstPtr>(this, state, 0); - DataTranslation<BaseDynInstPtr> *stransHigh = - new DataTranslation<BaseDynInstPtr>(this, state, 1); - - cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode); - cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode); - - if (!translationCompleted()) { - // The translation isn't yet complete, so we can't possibly have a - // fault. Overwrite any existing fault we might have from a previous - // execution of this instruction (e.g. an uncachable load that - // couldn't execute because it wasn't at the head of the ROB). - fault = NoFault; - - // Save memory requests. - savedReq = state->mainReq; - savedSreqLow = state->sreqLow; - savedSreqHigh = state->sreqHigh; - } - } -} - -template<class Impl> -inline void -BaseDynInst<Impl>::finishTranslation(WholeTranslationState *state) -{ - fault = state->getFault(); - - instFlags[IsStrictlyOrdered] = state->isStrictlyOrdered(); - - if (fault == NoFault) { - // save Paddr for a single req - physEffAddrLow = state->getPaddr(); - - // case for the request that has been split - if (state->isSplit) { - physEffAddrLow = state->sreqLow->getPaddr(); - physEffAddrHigh = state->sreqHigh->getPaddr(); - } - - memReqFlags = state->getFlags(); - - if (state->mainReq->isCondSwap()) { - assert(state->res); - state->mainReq->setExtraData(*state->res); - } - - } else { - state->deleteReqs(); - } - delete state; - - translationCompleted(true); + return cpu->pushRequest( + dynamic_cast<typename DynInstPtr::PtrType>(this), + /* st */ false, data, size, addr, flags, res); } #endif // __CPU_BASE_DYN_INST_HH__ |