diff options
-rw-r--r-- | src/arch/alpha/tlb.cc | 2 | ||||
-rw-r--r-- | src/arch/arm/tlb.cc | 14 | ||||
-rw-r--r-- | src/arch/mips/tlb.cc | 2 | ||||
-rw-r--r-- | src/arch/power/tlb.cc | 2 | ||||
-rw-r--r-- | src/arch/sparc/tlb.cc | 14 | ||||
-rw-r--r-- | src/arch/x86/tlb.cc | 8 | ||||
-rw-r--r-- | src/cpu/base_dyn_inst.hh | 8 | ||||
-rw-r--r-- | src/cpu/minor/lsq.cc | 2 | ||||
-rw-r--r-- | src/cpu/o3/comm.hh | 10 | ||||
-rw-r--r-- | src/cpu/o3/commit_impl.hh | 10 | ||||
-rw-r--r-- | src/cpu/o3/iew_impl.hh | 11 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit.hh | 16 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit_impl.hh | 12 | ||||
-rw-r--r-- | src/cpu/translation.hh | 10 | ||||
-rw-r--r-- | src/mem/request.hh | 21 |
15 files changed, 83 insertions, 59 deletions
diff --git a/src/arch/alpha/tlb.cc b/src/arch/alpha/tlb.cc index bcf61f3bf..a740da388 100644 --- a/src/arch/alpha/tlb.cc +++ b/src/arch/alpha/tlb.cc @@ -225,7 +225,7 @@ TLB::checkCacheability(RequestPtr &req, bool itb) "IPR memory space not implemented!"); } else { // mark request as uncacheable - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); // Clear bits 42:35 of the physical address (10-2 in // Tsunami manual) diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc index 4674e5889..8c3bb047d 100644 --- a/src/arch/arm/tlb.cc +++ b/src/arch/arm/tlb.cc @@ -985,13 +985,13 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode, if (flags & Request::CLEAR_LL){ // @todo: check implications of security extensions req->setPaddr(0); - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); req->setFlags(Request::CLEAR_LL); return NoFault; } if ((req->isInstFetch() && (!sctlr.i)) || ((!req->isInstFetch()) && (!sctlr.c))){ - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); } if (!is_fetch) { assert(flags & MustBeOne); @@ -1018,10 +1018,10 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode, // @todo: double check this (ARM ARM issue C B3.2.1) if (long_desc_format || sctlr.tre == 0) { - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); } else { if (nmrr.ir0 == 0 || nmrr.or0 == 0 || prrr.tr0 != 0x2) - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); } // Set memory attributes @@ -1074,9 +1074,9 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode, te->shareable, te->innerAttrs, te->outerAttrs, static_cast<uint8_t>(te->mtype), isStage2); setAttr(te->attributes); - if (te->nonCacheable) { - req->setFlags(Request::UNCACHEABLE); - } + + if (te->nonCacheable) + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); Addr pa = te->pAddr(vaddr); req->setPaddr(pa); diff --git a/src/arch/mips/tlb.cc b/src/arch/mips/tlb.cc index b43797541..6c46cacc6 100644 --- a/src/arch/mips/tlb.cc +++ b/src/arch/mips/tlb.cc @@ -148,7 +148,7 @@ TLB::checkCacheability(RequestPtr &req) // address or by the TLB entry if ((req->getVaddr() & VAddrUncacheable) == VAddrUncacheable) { // mark request as uncacheable - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); } return NoFault; } diff --git a/src/arch/power/tlb.cc b/src/arch/power/tlb.cc index 950483893..458ed29bf 100644 --- a/src/arch/power/tlb.cc +++ b/src/arch/power/tlb.cc @@ -150,7 +150,7 @@ TLB::checkCacheability(RequestPtr &req) if ((req->getVaddr() & VAddrUncacheable) == VAddrUncacheable) { // mark request as uncacheable - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); } return NoFault; } diff --git a/src/arch/sparc/tlb.cc b/src/arch/sparc/tlb.cc index c4994657d..84d748dd3 100644 --- a/src/arch/sparc/tlb.cc +++ b/src/arch/sparc/tlb.cc @@ -571,8 +571,10 @@ TLB::translateData(RequestPtr req, ThreadContext *tc, bool write) ce_va < vaddr + size && ce_va + ce->range.size > vaddr && (!write || ce->pte.writable())) { req->setPaddr(ce->pte.translate(vaddr)); - if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1) - req->setFlags(Request::UNCACHEABLE); + if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1) { + req->setFlags( + Request::UNCACHEABLE | Request::STRICT_ORDER); + } DPRINTF(TLB, "TLB: %#X -> %#X\n", vaddr, req->getPaddr()); return NoFault; } // if matched @@ -584,8 +586,10 @@ TLB::translateData(RequestPtr req, ThreadContext *tc, bool write) ce_va < vaddr + size && ce_va + ce->range.size > vaddr && (!write || ce->pte.writable())) { req->setPaddr(ce->pte.translate(vaddr)); - if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1) - req->setFlags(Request::UNCACHEABLE); + if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1) { + req->setFlags( + Request::UNCACHEABLE | Request::STRICT_ORDER); + } DPRINTF(TLB, "TLB: %#X -> %#X\n", vaddr, req->getPaddr()); return NoFault; } // if matched @@ -748,7 +752,7 @@ TLB::translateData(RequestPtr req, ThreadContext *tc, bool write) } if (e->pte.sideffect() || (e->pte.paddr() >> 39) & 1) - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); // cache translation date for next translation cacheState = tlbdata; diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index dd0aed770..86e051deb 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -206,7 +206,7 @@ TLB::translateInt(RequestPtr req, ThreadContext *tc) req->setFlags(Request::MMAPPED_IPR); req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg)); } else if ((IOPort & ~mask(2)) == 0xCFC) { - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); Addr configAddress = tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); if (bits(configAddress, 31, 31)) { @@ -217,7 +217,7 @@ TLB::translateInt(RequestPtr req, ThreadContext *tc) req->setPaddr(PhysAddrPrefixIO | IOPort); } } else { - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); req->setPaddr(PhysAddrPrefixIO | IOPort); } return NoFault; @@ -261,7 +261,7 @@ TLB::finalizePhysical(RequestPtr req, ThreadContext *tc, Mode mode) const return new GeneralProtection(0); */ // Force the access to be uncacheable. - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); req->setPaddr(x86LocalAPICAddress(tc->contextId(), paddr - apicRange.start())); } @@ -401,7 +401,7 @@ TLB::translate(RequestPtr req, ThreadContext *tc, Translation *translation, DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, paddr); req->setPaddr(paddr); if (entry->uncacheable) - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); } else { //Use the address which already has segmentation applied. DPRINTF(TLB, "Paging disabled.\n"); diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 875cb2946..50b1b12ce 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -144,7 +144,7 @@ class BaseDynInst : public ExecContext, public RefCounted * @todo: Consider if this is necessary or not. */ EACalcDone, - IsUncacheable, + IsStrictlyOrdered, ReqMade, MemOpDone, MaxFlags @@ -834,8 +834,8 @@ class BaseDynInst : public ExecContext, public RefCounted /** Returns whether or not the eff. addr. source registers are ready. */ bool eaSrcsReady(); - /** Is this instruction's memory access uncacheable. */ - bool uncacheable() { return instFlags[IsUncacheable]; } + /** Is this instruction's memory access strictly ordered? */ + bool strictlyOrdered() const { return instFlags[IsStrictlyOrdered]; } /** Has this instruction generated a memory request. */ bool hasRequest() { return instFlags[ReqMade]; } @@ -1052,7 +1052,7 @@ BaseDynInst<Impl>::finishTranslation(WholeTranslationState *state) { fault = state->getFault(); - instFlags[IsUncacheable] = state->isUncacheable(); + instFlags[IsStrictlyOrdered] = state->isStrictlyOrdered(); if (fault == NoFault) { physEffAddr = state->getPaddr(); diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc index ff609deac..db57daa37 100644 --- a/src/cpu/minor/lsq.cc +++ b/src/cpu/minor/lsq.cc @@ -926,7 +926,7 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request) bool is_load = request->isLoad; bool is_llsc = request->request.isLLSC(); bool is_swap = request->request.isSwap(); - bool bufferable = !(request->request.isUncacheable() || + bool bufferable = !(request->request.isStrictlyOrdered() || is_llsc || is_swap); if (is_load) { diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index a425484f5..4da251104 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -185,8 +185,9 @@ struct TimeBufStruct { /// Instruction that caused the a non-mispredict squash DynInstPtr squashInst; // *F - /// Hack for now to send back an uncached access to the IEW stage. - DynInstPtr uncachedLoad; // *I + /// Hack for now to send back a strictly ordered access to the + /// IEW stage. + DynInstPtr strictlyOrderedLoad; // *I /// Communication specifically to the IQ to tell the IQ that it can /// schedule a non-speculative instruction. @@ -216,8 +217,9 @@ struct TimeBufStruct { /// If the interrupt ended up being cleared before being handled bool clearInterrupt; // *F - /// Hack for now to send back an uncached access to the IEW stage. - bool uncached; // *I + /// Hack for now to send back an strictly ordered access to + /// the IEW stage. + bool strictlyOrdered; // *I }; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index aa1948602..5323e1413 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -1145,7 +1145,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // think are possible. assert(head_inst->isNonSpeculative() || head_inst->isStoreConditional() || head_inst->isMemBarrier() || head_inst->isWriteBarrier() || - (head_inst->isLoad() && head_inst->uncacheable())); + (head_inst->isLoad() && head_inst->strictlyOrdered())); DPRINTF(Commit, "Encountered a barrier or non-speculative " "instruction [sn:%lli] at the head of the ROB, PC %s.\n", @@ -1162,11 +1162,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // it is executed. head_inst->clearCanCommit(); - if (head_inst->isLoad() && head_inst->uncacheable()) { - DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %s.\n", + if (head_inst->isLoad() && head_inst->strictlyOrdered()) { + DPRINTF(Commit, "[sn:%lli]: Strictly ordered load, PC %s.\n", head_inst->seqNum, head_inst->pcState()); - toIEW->commitInfo[tid].uncached = true; - toIEW->commitInfo[tid].uncachedLoad = head_inst; + toIEW->commitInfo[tid].strictlyOrdered = true; + toIEW->commitInfo[tid].strictlyOrderedLoad = head_inst; } else { ++commitNonSpecStalls; } diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 4741df634..730eb0cfe 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1418,9 +1418,9 @@ DefaultIEW<Impl>::writebackInsts() // Some instructions will be sent to commit without having // executed because they need commit to handle them. - // E.g. Uncached loads have not actually executed when they + // E.g. Strictly ordered loads have not actually executed when they // are first sent to commit. Instead commit must tell the LSQ - // when it's ready to execute the uncached load. + // when it's ready to execute the strictly ordered load. if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() == NoFault) { int dependents = instQueue.wakeDependents(inst); @@ -1522,9 +1522,10 @@ DefaultIEW<Impl>::tick() if (fromCommit->commitInfo[tid].nonSpecSeqNum != 0) { //DPRINTF(IEW,"NonspecInst from thread %i",tid); - if (fromCommit->commitInfo[tid].uncached) { - instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad); - fromCommit->commitInfo[tid].uncachedLoad->setAtCommit(); + if (fromCommit->commitInfo[tid].strictlyOrdered) { + instQueue.replayMemInst( + fromCommit->commitInfo[tid].strictlyOrderedLoad); + fromCommit->commitInfo[tid].strictlyOrderedLoad->setAtCommit(); } else { instQueue.scheduleNonSpec( fromCommit->commitInfo[tid].nonSpecSeqNum); diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 6fe832bf6..e356dd442 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -559,15 +559,15 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh, assert(!load_inst->isExecuted()); - // Make sure this isn't an uncacheable access - // A bit of a hackish way to get uncached accesses to work only if they're - // at the head of the LSQ and are ready to commit (at the head of the ROB - // too). - if (req->isUncacheable() && + // Make sure this isn't a strictly ordered load + // A bit of a hackish way to get strictly ordered accesses to work + // only if they're at the head of the LSQ and are ready to commit + // (at the head of the ROB too). + if (req->isStrictlyOrdered() && (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); ++lsqRescheduledLoads; - DPRINTF(LSQUnit, "Uncachable load [sn:%lli] PC %s\n", + DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n", load_inst->seqNum, load_inst->pcState()); // Must delete request now that it wasn't handed off to @@ -579,7 +579,7 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh, delete sreqHigh; } return std::make_shared<GenericISA::M5PanicFault>( - "Uncachable load [sn:%llx] PC %s\n", + "Strictly ordered load [sn:%llx] PC %s\n", load_inst->seqNum, load_inst->pcState()); } @@ -653,7 +653,7 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh, if (store_size == 0) continue; - else if (storeQueue[store_idx].inst->uncacheable()) + else if (storeQueue[store_idx].inst->strictlyOrdered()) continue; assert(storeQueue[store_idx].inst->effAddrValid()); diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 9c500443e..3019e80d2 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -471,7 +471,7 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt) while (load_idx != loadTail) { DynInstPtr ld_inst = loadQueue[load_idx]; - if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { + if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) { incrLdIdx(load_idx); continue; } @@ -528,7 +528,7 @@ LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst) */ while (load_idx != loadTail) { DynInstPtr ld_inst = loadQueue[load_idx]; - if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { + if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) { incrLdIdx(load_idx); continue; } @@ -617,15 +617,15 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst) // along to commit without the instruction completing. if (load_fault != NoFault || !inst->readPredicate()) { // Send this instruction to commit, also make sure iew stage - // realizes there is activity. - // Mark it as executed unless it is an uncached load that - // needs to hit the head of commit. + // realizes there is activity. Mark it as executed unless it + // is a strictly ordered load that needs to hit the head of + // commit. if (!inst->readPredicate()) inst->forwardOldRegs(); DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n", inst->seqNum, (load_fault != NoFault ? "fault" : "predication")); - if (!(inst->hasRequest() && inst->uncacheable()) || + if (!(inst->hasRequest() && inst->strictlyOrdered()) || inst->isAtCommit()) { inst->setExecuted(); } diff --git a/src/cpu/translation.hh b/src/cpu/translation.hh index 4ff75546a..a7372f3ee 100644 --- a/src/cpu/translation.hh +++ b/src/cpu/translation.hh @@ -153,14 +153,14 @@ class WholeTranslationState } /** - * Check if this request is uncacheable. We only need to check the main - * request because the flags will have been copied here on a split - * translation. + * Check if this request is strictly ordered device access. We + * only need to check the main request because the flags will have + * been copied here on a split translation. */ bool - isUncacheable() const + isStrictlyOrdered() const { - return mainReq->isUncacheable(); + return mainReq->isStrictlyOrdered(); } /** diff --git a/src/mem/request.hh b/src/mem/request.hh index 029636100..5a2130029 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -101,8 +101,24 @@ class Request static const FlagsType INST_FETCH = 0x00000100; /** The virtual address is also the physical address. */ static const FlagsType PHYSICAL = 0x00000200; - /** The request is to an uncacheable address. */ - static const FlagsType UNCACHEABLE = 0x00001000; + /** + * The request is to an uncacheable address. + * + * @note Uncacheable accesses may be reordered by CPU models. The + * STRICT_ORDER flag should be set if such reordering is + * undesirable. + */ + static const FlagsType UNCACHEABLE = 0x00000400; + /** + * The request is required to be strictly ordered by <i>CPU + * models</i> and is non-speculative. + * + * A strictly ordered request is guaranteed to never be re-ordered + * or executed speculatively by a CPU model. The memory system may + * still reorder requests in caches unless the UNCACHEABLE flag is + * set as well. + */ + static const FlagsType STRICT_ORDER = 0x00000800; /** This request is to a memory mapped register. */ static const FlagsType MMAPPED_IPR = 0x00002000; /** This request is a clear exclusive. */ @@ -618,6 +634,7 @@ class Request /** Accessor functions for flags. Note that these are for testing only; setting flags should be done via setFlags(). */ bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); } + bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); } bool isInstFetch() const { return _flags.isSet(INST_FETCH); } bool isPrefetch() const { return _flags.isSet(PREFETCH); } bool isLLSC() const { return _flags.isSet(LLSC); } |