15 files changed, 83 insertions, 59 deletions
diff --git a/src/arch/alpha/tlb.cc b/src/arch/alpha/tlb.cc
index bcf61f3bf..a740da388 100644
--- a/src/arch/alpha/tlb.cc
+++ b/src/arch/alpha/tlb.cc
@@ -225,7 +225,7 @@ TLB::checkCacheability(RequestPtr &req, bool itb)
                 "IPR memory space not implemented!");
         } else {
             // mark request as uncacheable
-            req->setFlags(Request::UNCACHEABLE);
+            req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
 
             // Clear bits 42:35 of the physical address (10-2 in
             // Tsunami manual)
diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc
index 4674e5889..8c3bb047d 100644
--- a/src/arch/arm/tlb.cc
+++ b/src/arch/arm/tlb.cc
@@ -985,13 +985,13 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode,
     if (flags & Request::CLEAR_LL){
         // @todo: check implications of security extensions
        req->setPaddr(0);
-       req->setFlags(Request::UNCACHEABLE);
+       req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
        req->setFlags(Request::CLEAR_LL);
        return NoFault;
     }
     if ((req->isInstFetch() && (!sctlr.i)) ||
         ((!req->isInstFetch()) && (!sctlr.c))){
-       req->setFlags(Request::UNCACHEABLE);
+       req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
     }
     if (!is_fetch) {
         assert(flags & MustBeOne);
@@ -1018,10 +1018,10 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode,
 
         // @todo: double check this (ARM ARM issue C B3.2.1)
         if (long_desc_format || sctlr.tre == 0) {
-            req->setFlags(Request::UNCACHEABLE);
+            req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
         } else {
             if (nmrr.ir0 == 0 || nmrr.or0 == 0 || prrr.tr0 != 0x2)
-                req->setFlags(Request::UNCACHEABLE);
+                req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
         }
 
         // Set memory attributes
@@ -1074,9 +1074,9 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode,
                 te->shareable, te->innerAttrs, te->outerAttrs,
                 static_cast<uint8_t>(te->mtype), isStage2);
         setAttr(te->attributes);
-        if (te->nonCacheable) {
-            req->setFlags(Request::UNCACHEABLE);
-        }
+
+        if (te->nonCacheable)
+            req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
 
         Addr pa = te->pAddr(vaddr);
         req->setPaddr(pa);
diff --git a/src/arch/mips/tlb.cc b/src/arch/mips/tlb.cc
index b43797541..6c46cacc6 100644
--- a/src/arch/mips/tlb.cc
+++ b/src/arch/mips/tlb.cc
@@ -148,7 +148,7 @@ TLB::checkCacheability(RequestPtr &req)
     // address or by the TLB entry
     if ((req->getVaddr() & VAddrUncacheable) == VAddrUncacheable) {
         // mark request as uncacheable
-        req->setFlags(Request::UNCACHEABLE);
+        req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
     }
     return NoFault;
 }
diff --git a/src/arch/power/tlb.cc b/src/arch/power/tlb.cc
index 950483893..458ed29bf 100644
--- a/src/arch/power/tlb.cc
+++ b/src/arch/power/tlb.cc
@@ -150,7 +150,7 @@ TLB::checkCacheability(RequestPtr &req)
     if ((req->getVaddr() & VAddrUncacheable) == VAddrUncacheable) {
 
         // mark request as uncacheable
-        req->setFlags(Request::UNCACHEABLE);
+        req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
     }
     return NoFault;
 }
diff --git a/src/arch/sparc/tlb.cc b/src/arch/sparc/tlb.cc
index c4994657d..84d748dd3 100644
--- a/src/arch/sparc/tlb.cc
+++ b/src/arch/sparc/tlb.cc
@@ -571,8 +571,10 @@ TLB::translateData(RequestPtr req, ThreadContext *tc, bool write)
                     ce_va < vaddr + size && ce_va + ce->range.size > vaddr &&
                     (!write || ce->pte.writable())) {
                     req->setPaddr(ce->pte.translate(vaddr));
-                    if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1)
-                        req->setFlags(Request::UNCACHEABLE);
+                    if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1) {
+                        req->setFlags(
+                            Request::UNCACHEABLE | Request::STRICT_ORDER);
+                    }
                     DPRINTF(TLB, "TLB: %#X -> %#X\n", vaddr, req->getPaddr());
                     return NoFault;
                 } // if matched
@@ -584,8 +586,10 @@ TLB::translateData(RequestPtr req, ThreadContext *tc, bool write)
                     ce_va < vaddr + size && ce_va + ce->range.size > vaddr &&
                     (!write || ce->pte.writable())) {
                     req->setPaddr(ce->pte.translate(vaddr));
-                    if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1)
-                        req->setFlags(Request::UNCACHEABLE);
+                    if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1) {
+                        req->setFlags(
+                            Request::UNCACHEABLE | Request::STRICT_ORDER);
+                    }
                     DPRINTF(TLB, "TLB: %#X -> %#X\n", vaddr, req->getPaddr());
                     return NoFault;
                 } // if matched
@@ -748,7 +752,7 @@ TLB::translateData(RequestPtr req, ThreadContext *tc, bool write)
     }
 
     if (e->pte.sideffect() || (e->pte.paddr() >> 39) & 1)
-        req->setFlags(Request::UNCACHEABLE);
+        req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
 
     // cache translation date for next translation
     cacheState = tlbdata;
diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc
index dd0aed770..86e051deb 100644
--- a/src/arch/x86/tlb.cc
+++ b/src/arch/x86/tlb.cc
@@ -206,7 +206,7 @@ TLB::translateInt(RequestPtr req, ThreadContext *tc)
             req->setFlags(Request::MMAPPED_IPR);
             req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
         } else if ((IOPort & ~mask(2)) == 0xCFC) {
-            req->setFlags(Request::UNCACHEABLE);
+            req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
             Addr configAddress =
                 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
             if (bits(configAddress, 31, 31)) {
@@ -217,7 +217,7 @@ TLB::translateInt(RequestPtr req, ThreadContext *tc)
                 req->setPaddr(PhysAddrPrefixIO | IOPort);
             }
         } else {
-            req->setFlags(Request::UNCACHEABLE);
+            req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
             req->setPaddr(PhysAddrPrefixIO | IOPort);
         }
         return NoFault;
@@ -261,7 +261,7 @@ TLB::finalizePhysical(RequestPtr req, ThreadContext *tc, Mode mode) const
                 return new GeneralProtection(0);
             */
             // Force the access to be uncacheable.
-            req->setFlags(Request::UNCACHEABLE);
+            req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
             req->setPaddr(x86LocalAPICAddress(tc->contextId(),
                                               paddr - apicRange.start()));
         }
@@ -401,7 +401,7 @@ TLB::translate(RequestPtr req, ThreadContext *tc, Translation *translation,
             DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, paddr);
             req->setPaddr(paddr);
             if (entry->uncacheable)
-                req->setFlags(Request::UNCACHEABLE);
+                req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
         } else {
             //Use the address which already has segmentation applied.
             DPRINTF(TLB, "Paging disabled.\n");
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 875cb2946..50b1b12ce 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -144,7 +144,7 @@ class BaseDynInst : public ExecContext, public RefCounted
          *  @todo: Consider if this is necessary or not.
          */
         EACalcDone,
-        IsUncacheable,
+        IsStrictlyOrdered,
         ReqMade,
         MemOpDone,
         MaxFlags
@@ -834,8 +834,8 @@ class BaseDynInst : public ExecContext, public RefCounted
     /** Returns whether or not the eff. addr. source registers are ready. */
     bool eaSrcsReady();
 
-    /** Is this instruction's memory access uncacheable. */
-    bool uncacheable() { return instFlags[IsUncacheable]; }
+    /** Is this instruction's memory access strictly ordered? */
+    bool strictlyOrdered() const { return instFlags[IsStrictlyOrdered]; }
 
     /** Has this instruction generated a memory request. */
     bool hasRequest() { return instFlags[ReqMade]; }
@@ -1052,7 +1052,7 @@ BaseDynInst<Impl>::finishTranslation(WholeTranslationState *state)
 {
     fault = state->getFault();
 
-    instFlags[IsUncacheable] = state->isUncacheable();
+    instFlags[IsStrictlyOrdered] = state->isStrictlyOrdered();
 
     if (fault == NoFault) {
         physEffAddr = state->getPaddr();
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index ff609deac..db57daa37 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -926,7 +926,7 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request)
     bool is_load = request->isLoad;
     bool is_llsc = request->request.isLLSC();
     bool is_swap = request->request.isSwap();
-    bool bufferable = !(request->request.isUncacheable() ||
+    bool bufferable = !(request->request.isStrictlyOrdered() ||
         is_llsc || is_swap);
 
     if (is_load) {
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index a425484f5..4da251104 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -185,8 +185,9 @@ struct TimeBufStruct {
         /// Instruction that caused the a non-mispredict squash
         DynInstPtr squashInst; // *F
 
-        /// Hack for now to send back an uncached access to the IEW stage.
-        DynInstPtr uncachedLoad; // *I
+        /// Hack for now to send back a strictly ordered access to the
+        /// IEW stage.
+        DynInstPtr strictlyOrderedLoad; // *I
 
         /// Communication specifically to the IQ to tell the IQ that it can
         /// schedule a non-speculative instruction.
@@ -216,8 +217,9 @@ struct TimeBufStruct {
         /// If the interrupt ended up being cleared before being handled
         bool clearInterrupt; // *F
 
-        /// Hack for now to send back an uncached access to the IEW stage.
-        bool uncached; // *I
+        /// Hack for now to send back an strictly ordered access to
+        /// the IEW stage.
+        bool strictlyOrdered; // *I
 
     };
 
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index aa1948602..5323e1413 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -1145,7 +1145,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         // think are possible.
         assert(head_inst->isNonSpeculative() || head_inst->isStoreConditional()
                || head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
-               (head_inst->isLoad() && head_inst->uncacheable()));
+               (head_inst->isLoad() && head_inst->strictlyOrdered()));
 
         DPRINTF(Commit, "Encountered a barrier or non-speculative "
                 "instruction [sn:%lli] at the head of the ROB, PC %s.\n",
@@ -1162,11 +1162,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         // it is executed.
         head_inst->clearCanCommit();
 
-        if (head_inst->isLoad() && head_inst->uncacheable()) {
-            DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %s.\n",
+        if (head_inst->isLoad() && head_inst->strictlyOrdered()) {
+            DPRINTF(Commit, "[sn:%lli]: Strictly ordered load, PC %s.\n",
                     head_inst->seqNum, head_inst->pcState());
-            toIEW->commitInfo[tid].uncached = true;
-            toIEW->commitInfo[tid].uncachedLoad = head_inst;
+            toIEW->commitInfo[tid].strictlyOrdered = true;
+            toIEW->commitInfo[tid].strictlyOrderedLoad = head_inst;
         } else {
             ++commitNonSpecStalls;
         }
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 4741df634..730eb0cfe 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1418,9 +1418,9 @@ DefaultIEW<Impl>::writebackInsts()
 
         // Some instructions will be sent to commit without having
         // executed because they need commit to handle them.
-        // E.g. Uncached loads have not actually executed when they
+        // E.g. Strictly ordered loads have not actually executed when they
         // are first sent to commit.  Instead commit must tell the LSQ
-        // when it's ready to execute the uncached load.
+        // when it's ready to execute the strictly ordered load.
         if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() == NoFault) {
             int dependents = instQueue.wakeDependents(inst);
 
@@ -1522,9 +1522,10 @@ DefaultIEW<Impl>::tick()
         if (fromCommit->commitInfo[tid].nonSpecSeqNum != 0) {
 
             //DPRINTF(IEW,"NonspecInst from thread %i",tid);
-            if (fromCommit->commitInfo[tid].uncached) {
-                instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad);
-                fromCommit->commitInfo[tid].uncachedLoad->setAtCommit();
+            if (fromCommit->commitInfo[tid].strictlyOrdered) {
+                instQueue.replayMemInst(
+                    fromCommit->commitInfo[tid].strictlyOrderedLoad);
+                fromCommit->commitInfo[tid].strictlyOrderedLoad->setAtCommit();
             } else {
                 instQueue.scheduleNonSpec(
                     fromCommit->commitInfo[tid].nonSpecSeqNum);
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 6fe832bf6..e356dd442 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -559,15 +559,15 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
 
     assert(!load_inst->isExecuted());
 
-    // Make sure this isn't an uncacheable access
-    // A bit of a hackish way to get uncached accesses to work only if they're
-    // at the head of the LSQ and are ready to commit (at the head of the ROB
-    // too).
-    if (req->isUncacheable() &&
+    // Make sure this isn't a strictly ordered load
+    // A bit of a hackish way to get strictly ordered accesses to work
+    // only if they're at the head of the LSQ and are ready to commit
+    // (at the head of the ROB too).
+    if (req->isStrictlyOrdered() &&
         (load_idx != loadHead || !load_inst->isAtCommit())) {
         iewStage->rescheduleMemInst(load_inst);
         ++lsqRescheduledLoads;
-        DPRINTF(LSQUnit, "Uncachable load [sn:%lli] PC %s\n",
+        DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
                 load_inst->seqNum, load_inst->pcState());
 
         // Must delete request now that it wasn't handed off to
@@ -579,7 +579,7 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
             delete sreqHigh;
         }
         return std::make_shared<GenericISA::M5PanicFault>(
-            "Uncachable load [sn:%llx] PC %s\n",
+            "Strictly ordered load [sn:%llx] PC %s\n",
             load_inst->seqNum, load_inst->pcState());
     }
 
@@ -653,7 +653,7 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
 
         if (store_size == 0)
             continue;
-        else if (storeQueue[store_idx].inst->uncacheable())
+        else if (storeQueue[store_idx].inst->strictlyOrdered())
             continue;
 
         assert(storeQueue[store_idx].inst->effAddrValid());
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 9c500443e..3019e80d2 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -471,7 +471,7 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
     while (load_idx != loadTail) {
         DynInstPtr ld_inst = loadQueue[load_idx];
 
-        if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) {
+        if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
             incrLdIdx(load_idx);
             continue;
         }
@@ -528,7 +528,7 @@ LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
      */
     while (load_idx != loadTail) {
         DynInstPtr ld_inst = loadQueue[load_idx];
-        if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) {
+        if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
             incrLdIdx(load_idx);
             continue;
         }
@@ -617,15 +617,15 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
     // along to commit without the instruction completing.
     if (load_fault != NoFault || !inst->readPredicate()) {
         // Send this instruction to commit, also make sure iew stage
-        // realizes there is activity.
-        // Mark it as executed unless it is an uncached load that
-        // needs to hit the head of commit.
+        // realizes there is activity.  Mark it as executed unless it
+        // is a strictly ordered load that needs to hit the head of
+        // commit.
         if (!inst->readPredicate())
             inst->forwardOldRegs();
         DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n",
                 inst->seqNum,
                 (load_fault != NoFault ? "fault" : "predication"));
-        if (!(inst->hasRequest() && inst->uncacheable()) ||
+        if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
             inst->isAtCommit()) {
             inst->setExecuted();
         }
diff --git a/src/cpu/translation.hh b/src/cpu/translation.hh
index 4ff75546a..a7372f3ee 100644
--- a/src/cpu/translation.hh
+++ b/src/cpu/translation.hh
@@ -153,14 +153,14 @@ class WholeTranslationState
     }
 
     /**
-     * Check if this request is uncacheable.  We only need to check the main
-     * request because the flags will have been copied here on a split
-     * translation.
+     * Check if this request is strictly ordered device access.  We
+     * only need to check the main request because the flags will have
+     * been copied here on a split translation.
      */
     bool
-    isUncacheable() const
+    isStrictlyOrdered() const
     {
-        return mainReq->isUncacheable();
+        return mainReq->isStrictlyOrdered();
     }
 
     /**
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 029636100..5a2130029 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -101,8 +101,24 @@ class Request
     static const FlagsType INST_FETCH                  = 0x00000100;
     /** The virtual address is also the physical address. */
     static const FlagsType PHYSICAL                    = 0x00000200;
-    /** The request is to an uncacheable address. */
-    static const FlagsType UNCACHEABLE                 = 0x00001000;
+    /**
+     * The request is to an uncacheable address.
+     *
+     * @note Uncacheable accesses may be reordered by CPU models. The
+     * STRICT_ORDER flag should be set if such reordering is
+     * undesirable.
+     */
+    static const FlagsType UNCACHEABLE                = 0x00000400;
+    /**
+     * The request is required to be strictly ordered by <i>CPU
+     * models</i> and is non-speculative.
+     *
+     * A strictly ordered request is guaranteed to never be re-ordered
+     * or executed speculatively by a CPU model. The memory system may
+     * still reorder requests in caches unless the UNCACHEABLE flag is
+     * set as well.
+     */
+    static const FlagsType STRICT_ORDER                = 0x00000800;
     /** This request is to a memory mapped register. */
     static const FlagsType MMAPPED_IPR                 = 0x00002000;
     /** This request is a clear exclusive. */
@@ -618,6 +634,7 @@ class Request
     /** Accessor functions for flags.  Note that these are for testing
        only; setting flags should be done via setFlags(). */
     bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); }
+    bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); }
     bool isInstFetch() const { return _flags.isSet(INST_FETCH); }
     bool isPrefetch() const { return _flags.isSet(PREFETCH); }
     bool isLLSC() const { return _flags.isSet(LLSC); }