cpu-o3: O3 LSQ Generalisation

This patch does a large modification of the LSQ in the O3 model. The main goal of the patch is to remove the 'an operation can be served with one or two memory requests' assumption that is present in the LSQ and the instruction with the req, reqLow, reqHigh triplet, and generalising it to operations that can be addressed with one request, and operations that require many requests, embodied in the SingleDataRequest and the SplitDataRequest. This modification has been done mimicking the minor model to an extent, shifting the responsibilities of dealing with VtoP translation and tracking the status and resources from the DynInst to the LSQ via the LSQRequest. The LSQRequest models the information concerning the operation, handles the creation of fragments for translation and request as well as assembling/splitting the data accordingly. With this modifications, the implementation of vector ISAs, particularly on the memory side, become more rich, as the new model permits a dissociation of the ISA characteristics as vector length, from the microarchitectural characteristics that govern how contiguous loads are executing, allowing exploration of different LSQ to DL1 bus widths to understand the tradeoffs in complexity and performance. Part of the complexities introduced stem from the fact that gem5 keeps a large amount of metadata regarding, in particular, memory operations, thus, when an instruction is squashed while some operation as TLB lookup or cache access is ongoing, when the relevant structure communicates to the LSQ that the operation is over, it tries to access some pieces of data that should have died when the instruction is squashed, leading to asserts, panics, or memory corruption. To ensure the correct behaviour, the LSQRequest rely on assesing who is their owner, and self-destroying if they detect their owner is done with the request, and there will be no subsequent action. For example, in the case of an instruction squashed whal the TLB is doing a walk to serve the translation, when the translation is served by the TLB, the LSQRequest detects that the instruction was squashed, and as the translation is done, no one else expect to access its information, and therefore, it self-destructs. Having destroyed the LSQRequest earlier, would lead to wrong behaviour as the TLB walk may access some fields of it. Additional authors: - Gabor Dozsa <gabor.dozsa@arm.com> Change-Id: I9578a1a3f6b899c390cdd886856a24db68ff7d0c Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/13516 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
author: Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> 2017-02-13 09:41:44 +0000
committer: Giacomo Travaglini <giacomo.travaglini@arm.com> 2019-01-24 09:46:34 +0000
commit: 51becd2475748fb5515f261254c48827b3b5c2ba (patch)
tree: 2aabad4aad0ee7528ec437783b3077080ddb657a /src/cpu/base_dyn_inst.hh
parent: 6379bebd41899ca74ac146e8073aee0bd1781b3f (diff)
download: gem5-51becd2475748fb5515f261254c48827b3b5c2ba.tar.xz
1 files changed, 26 insertions, 216 deletions
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index c2a14089a..d81b58bdf 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011,2013,2016 ARM Limited
+ * Copyright (c) 2011, 2013, 2016-2018 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
@@ -84,6 +84,10 @@ class BaseDynInst : public ExecContext, public RefCounted
     typedef typename ImplCPU::ImplState ImplState;
     using VecRegContainer = TheISA::VecRegContainer;
 
+    using LSQRequestPtr = typename Impl::CPUPol::LSQ::LSQRequest*;
+    using LQIterator = typename Impl::CPUPol::LSQUnit::LQIterator;
+    using SQIterator = typename Impl::CPUPol::LSQUnit::SQIterator;
+
     // The DynInstPtr type.
     typedef typename Impl::DynInstPtr DynInstPtr;
     typedef RefCountingPtr<BaseDynInst<Impl> > BaseDynInstPtr;
@@ -203,12 +207,7 @@ class BaseDynInst : public ExecContext, public RefCounted
     Addr effAddr;
 
     /** The effective physical address. */
-    Addr physEffAddrLow;
-
-    /** The effective physical address
-     *  of the second request for a split request
-     */
-    Addr physEffAddrHigh;
+    Addr physEffAddr;
 
     /** The memory request flags (from translation). */
     unsigned memReqFlags;
@@ -224,19 +223,19 @@ class BaseDynInst : public ExecContext, public RefCounted
 
     /** Load queue index. */
     int16_t lqIdx;
+    LQIterator lqIt;
 
     /** Store queue index. */
     int16_t sqIdx;
+    SQIterator sqIt;
 
 
     /////////////////////// TLB Miss //////////////////////
     /**
-     * Saved memory requests (needed when the DTB address translation is
+     * Saved memory request (needed when the DTB address translation is
      * delayed due to a hw page table walk).
      */
-    RequestPtr savedReq;
-    RequestPtr savedSreqLow;
-    RequestPtr savedSreqHigh;
+    LSQRequestPtr savedReq;
 
     /////////////////////// Checker //////////////////////
     // Need a copy of main request pointer to verify on writes.
@@ -270,6 +269,7 @@ class BaseDynInst : public ExecContext, public RefCounted
 
     /** Is the effective virtual address valid. */
     bool effAddrValid() const { return instFlags[EffAddrValid]; }
+    void effAddrValid(bool b) { instFlags[EffAddrValid] = b; }
 
     /** Whether or not the memory operation is done. */
     bool memOpDone() const { return instFlags[MemOpDone]; }
@@ -303,18 +303,6 @@ class BaseDynInst : public ExecContext, public RefCounted
     Fault writeMem(uint8_t *data, unsigned size, Addr addr,
                    Request::Flags flags, uint64_t *res);
 
-    /** Splits a request in two if it crosses a dcache block. */
-    void splitRequest(const RequestPtr &req, RequestPtr &sreqLow,
-                      RequestPtr &sreqHigh);
-
-    /** Initiate a DTB address translation. */
-    void initiateTranslation(const RequestPtr &req, const RequestPtr &sreqLow,
-                             const RequestPtr &sreqHigh, uint64_t *res,
-                             BaseTLB::Mode mode);
-
-    /** Finish a DTB address translation. */
-    void finishTranslation(WholeTranslationState *state);
-
     /** True if the DTB address translation has started. */
     bool translationStarted() const { return instFlags[TranslationStarted]; }
     void translationStarted(bool f) { instFlags[TranslationStarted] = f; }
@@ -454,6 +442,9 @@ class BaseDynInst : public ExecContext, public RefCounted
 
     /** Returns the fault type. */
     Fault getFault() const { return fault; }
+    /** TODO: This I added for the LSQRequest side to be able to modify the
+     * fault. There should be a better mechanism in place. */
+    Fault& getFault() { return fault; }
 
     /** Checks whether or not this instruction has had its branch target
      *  calculated yet.  For now it is not utilized and is hacked to be
@@ -589,7 +580,8 @@ class BaseDynInst : public ExecContext, public RefCounted
     int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); }
     int8_t numCCDestRegs() const { return staticInst->numCCDestRegs(); }
     int8_t numVecDestRegs() const { return staticInst->numVecDestRegs(); }
-    int8_t numVecElemDestRegs() const {
+    int8_t numVecElemDestRegs() const
+    {
         return staticInst->numVecElemDestRegs();
     }
 
@@ -837,6 +829,7 @@ class BaseDynInst : public ExecContext, public RefCounted
 
     /** Sets the ASID. */
     void setASID(short addr_space_id) { asid = addr_space_id; }
+    short getASID() { return asid; }
 
     /** Sets the thread id. */
     void setTid(ThreadID tid) { threadNumber = tid; }
@@ -853,9 +846,12 @@ class BaseDynInst : public ExecContext, public RefCounted
 
     /** Is this instruction's memory access strictly ordered? */
     bool strictlyOrdered() const { return instFlags[IsStrictlyOrdered]; }
+    void strictlyOrdered(bool so) { instFlags[IsStrictlyOrdered] = so; }
 
     /** Has this instruction generated a memory request. */
     bool hasRequest() const { return instFlags[ReqMade]; }
+    /** Assert this instruction has generated a memory request. */
+    void setRequest() { instFlags[ReqMade] = true; }
 
     /** Returns iterator to this instruction in the list of all insts. */
     ListIt &getInstListIt() { return instListIt; }
@@ -887,50 +883,9 @@ Fault
 BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
                                    Request::Flags flags)
 {
-    instFlags[ReqMade] = true;
-    RequestPtr req = NULL;
-    RequestPtr sreqLow = NULL;
-    RequestPtr sreqHigh = NULL;
-
-    if (instFlags[ReqMade] && translationStarted()) {
-        req = savedReq;
-        sreqLow = savedSreqLow;
-        sreqHigh = savedSreqHigh;
-    } else {
-        req = std::make_shared<Request>(
-            asid, addr, size, flags, masterId(),
-            this->pc.instAddr(), thread->contextId());
-
-        req->taskId(cpu->taskId());
-
-        // Only split the request if the ISA supports unaligned accesses.
-        if (TheISA::HasUnalignedMemAcc) {
-            splitRequest(req, sreqLow, sreqHigh);
-        }
-        initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
-    }
-
-    if (translationCompleted()) {
-        if (fault == NoFault) {
-            effAddr = req->getVaddr();
-            effSize = size;
-            instFlags[EffAddrValid] = true;
-
-            if (cpu->checker) {
-                reqToVerify = std::make_shared<Request>(*req);
-            }
-            fault = cpu->read(req, sreqLow, sreqHigh, lqIdx);
-        } else {
-            // Commit will have to clean up whatever happened.  Set this
-            // instruction as executed.
-            this->setExecuted();
-        }
-    }
-
-    if (traceData)
-        traceData->setMem(addr, size, flags);
-
-    return fault;
+    return cpu->pushRequest(
+            dynamic_cast<typename DynInstPtr::PtrType>(this),
+            /* ld */ true, nullptr, size, addr, flags, nullptr);
 }
 
 template<class Impl>
@@ -938,154 +893,9 @@ Fault
 BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr,
                             Request::Flags flags, uint64_t *res)
 {
-    if (traceData)
-        traceData->setMem(addr, size, flags);
-
-    instFlags[ReqMade] = true;
-    RequestPtr req = NULL;
-    RequestPtr sreqLow = NULL;
-    RequestPtr sreqHigh = NULL;
-
-    if (instFlags[ReqMade] && translationStarted()) {
-        req = savedReq;
-        sreqLow = savedSreqLow;
-        sreqHigh = savedSreqHigh;
-    } else {
-        req = std::make_shared<Request>(
-            asid, addr, size, flags, masterId(),
-            this->pc.instAddr(), thread->contextId());
-
-        req->taskId(cpu->taskId());
-
-        // Only split the request if the ISA supports unaligned accesses.
-        if (TheISA::HasUnalignedMemAcc) {
-            splitRequest(req, sreqLow, sreqHigh);
-        }
-        initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write);
-    }
-
-    if (fault == NoFault && translationCompleted()) {
-        effAddr = req->getVaddr();
-        effSize = size;
-        instFlags[EffAddrValid] = true;
-
-        if (cpu->checker) {
-            reqToVerify = std::make_shared<Request>(*req);
-        }
-        fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
-    }
-
-    return fault;
-}
-
-template<class Impl>
-inline void
-BaseDynInst<Impl>::splitRequest(const RequestPtr &req, RequestPtr &sreqLow,
-                                RequestPtr &sreqHigh)
-{
-    // Check to see if the request crosses the next level block boundary.
-    unsigned block_size = cpu->cacheLineSize();
-    Addr addr = req->getVaddr();
-    Addr split_addr = roundDown(addr + req->getSize() - 1, block_size);
-    assert(split_addr <= addr || split_addr - addr < block_size);
-
-    // Spans two blocks.
-    if (split_addr > addr) {
-        req->splitOnVaddr(split_addr, sreqLow, sreqHigh);
-    }
-}
-
-template<class Impl>
-inline void
-BaseDynInst<Impl>::initiateTranslation(const RequestPtr &req,
-                                       const RequestPtr &sreqLow,
-                                       const RequestPtr &sreqHigh,
-                                       uint64_t *res,
-                                       BaseTLB::Mode mode)
-{
-    translationStarted(true);
-
-    if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) {
-        WholeTranslationState *state =
-            new WholeTranslationState(req, NULL, res, mode);
-
-        // One translation if the request isn't split.
-        DataTranslation<BaseDynInstPtr> *trans =
-            new DataTranslation<BaseDynInstPtr>(this, state);
-
-        cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
-
-        if (!translationCompleted()) {
-            // The translation isn't yet complete, so we can't possibly have a
-            // fault. Overwrite any existing fault we might have from a previous
-            // execution of this instruction (e.g. an uncachable load that
-            // couldn't execute because it wasn't at the head of the ROB).
-            fault = NoFault;
-
-            // Save memory requests.
-            savedReq = state->mainReq;
-            savedSreqLow = state->sreqLow;
-            savedSreqHigh = state->sreqHigh;
-        }
-    } else {
-        WholeTranslationState *state =
-            new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode);
-
-        // Two translations when the request is split.
-        DataTranslation<BaseDynInstPtr> *stransLow =
-            new DataTranslation<BaseDynInstPtr>(this, state, 0);
-        DataTranslation<BaseDynInstPtr> *stransHigh =
-            new DataTranslation<BaseDynInstPtr>(this, state, 1);
-
-        cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode);
-        cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode);
-
-        if (!translationCompleted()) {
-            // The translation isn't yet complete, so we can't possibly have a
-            // fault. Overwrite any existing fault we might have from a previous
-            // execution of this instruction (e.g. an uncachable load that
-            // couldn't execute because it wasn't at the head of the ROB).
-            fault = NoFault;
-
-            // Save memory requests.
-            savedReq = state->mainReq;
-            savedSreqLow = state->sreqLow;
-            savedSreqHigh = state->sreqHigh;
-        }
-    }
-}
-
-template<class Impl>
-inline void
-BaseDynInst<Impl>::finishTranslation(WholeTranslationState *state)
-{
-    fault = state->getFault();
-
-    instFlags[IsStrictlyOrdered] = state->isStrictlyOrdered();
-
-    if (fault == NoFault) {
-        // save Paddr for a single req
-        physEffAddrLow = state->getPaddr();
-
-        // case for the request that has been split
-        if (state->isSplit) {
-          physEffAddrLow = state->sreqLow->getPaddr();
-          physEffAddrHigh = state->sreqHigh->getPaddr();
-        }
-
-        memReqFlags = state->getFlags();
-
-        if (state->mainReq->isCondSwap()) {
-            assert(state->res);
-            state->mainReq->setExtraData(*state->res);
-        }
-
-    } else {
-        state->deleteReqs();
-    }
-    delete state;
-
-    translationCompleted(true);
+    return cpu->pushRequest(
+            dynamic_cast<typename DynInstPtr::PtrType>(this),
+            /* st */ false, data, size, addr, flags, res);
 }
 
 #endif // __CPU_BASE_DYN_INST_HH__
author	Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com>	2017-02-13 09:41:44 +0000
committer	Giacomo Travaglini <giacomo.travaglini@arm.com>	2019-01-24 09:46:34 +0000
commit	51becd2475748fb5515f261254c48827b3b5c2ba (patch)
tree	2aabad4aad0ee7528ec437783b3077080ddb657a /src/cpu/base_dyn_inst.hh
parent	6379bebd41899ca74ac146e8073aee0bd1781b3f (diff)
download	gem5-51becd2475748fb5515f261254c48827b3b5c2ba.tar.xz