summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAli Saidi <saidi@eecs.umich.edu>2011-09-13 12:58:08 -0400
committerAli Saidi <saidi@eecs.umich.edu>2011-09-13 12:58:08 -0400
commit649c239ceef2d107fae253b1008c6f214f242d73 (patch)
tree06edeb7bfa7b9d1e9167b9cb80f7f9600d41b9a0 /src
parentbb921b1459ef3ec55f9cea4ac8d203cd3c801cfd (diff)
downloadgem5-649c239ceef2d107fae253b1008c6f214f242d73.tar.xz
LSQ: Only trigger a memory violation with a load/load if the value changes.
Only create a memory ordering violation when the value could have changed between two subsequent loads, instead of just when loads go out-of-order to the same address. While not very common in the case of Alpha, with an architecture with a hardware table walker this can happen reasonably frequently beacuse a translation will miss and start a table walk and before the CPU re-schedules the faulting instruction another one will pass it to the same address (or cache block depending on the dendency checking). This patch has been tested with a couple of self-checking hand crafted programs to stress ordering between two cores. The performance improvement on SPEC benchmarks can be substantial (2-10%).
Diffstat (limited to 'src')
-rw-r--r--src/arch/arm/faults.cc14
-rw-r--r--src/arch/arm/faults.hh10
-rw-r--r--src/cpu/base_dyn_inst.hh13
-rw-r--r--src/cpu/base_dyn_inst_impl.hh2
-rw-r--r--src/cpu/o3/lsq_impl.hh14
-rw-r--r--src/cpu/o3/lsq_unit.hh13
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh109
-rw-r--r--src/sim/faults.cc6
-rw-r--r--src/sim/faults.hh10
9 files changed, 150 insertions, 41 deletions
diff --git a/src/arch/arm/faults.cc b/src/arch/arm/faults.cc
index 3c361404e..68c5fa0e8 100644
--- a/src/arch/arm/faults.cc
+++ b/src/arch/arm/faults.cc
@@ -75,9 +75,6 @@ template<> ArmFault::FaultVals ArmFaultVals<FastInterrupt>::vals =
template<> ArmFault::FaultVals ArmFaultVals<FlushPipe>::vals =
{"Pipe Flush", 0x00, MODE_SVC, 0, 0, true, true}; // some dummy values
-template<> ArmFault::FaultVals ArmFaultVals<ReExec>::vals =
- {"ReExec Flush", 0x00, MODE_SVC, 0, 0, true, true}; // some dummy values
-
template<> ArmFault::FaultVals ArmFaultVals<ArmSev>::vals =
{"ArmSev Flush", 0x00, MODE_SVC, 0, 0, true, true}; // some dummy values
Addr
@@ -240,17 +237,6 @@ FlushPipe::invoke(ThreadContext *tc, StaticInstPtr inst) {
tc->pcState(pc);
}
-void
-ReExec::invoke(ThreadContext *tc, StaticInstPtr inst) {
- DPRINTF(Faults, "Invoking ReExec Fault\n");
-
- // Set the PC to then the faulting instruction.
- // Net effect is simply squashing all instructions including this
- // instruction and refetching/rexecuting current instruction
- PCState pc = tc->pcState();
- tc->pcState(pc);
-}
-
template void AbortFault<PrefetchAbort>::invoke(ThreadContext *tc,
StaticInstPtr inst);
template void AbortFault<DataAbort>::invoke(ThreadContext *tc,
diff --git a/src/arch/arm/faults.hh b/src/arch/arm/faults.hh
index 54edb336b..fe1258a16 100644
--- a/src/arch/arm/faults.hh
+++ b/src/arch/arm/faults.hh
@@ -242,16 +242,6 @@ class FlushPipe : public ArmFaultVals<FlushPipe>
StaticInstPtr inst = StaticInst::nullStaticInstPtr);
};
-// A fault that flushes the pipe, including the faulting instructions
-class ReExec : public ArmFaultVals<ReExec>
-{
- public:
- ReExec() {}
- void invoke(ThreadContext *tc,
- StaticInstPtr inst = StaticInst::nullStaticInstPtr);
-};
-
-
static inline Fault genMachineCheckFault()
{
return new Reset();
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 5f0a6106e..5719fc84d 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -146,6 +146,19 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** True if the DTB address translation has completed. */
bool translationCompleted;
+ /** True if this address was found to match a previous load and they issued
+ * out of order. If that happend, then it's only a problem if an incoming
+ * snoop invalidate modifies the line, in which case we need to squash.
+ * If nothing modified the line the order doesn't matter.
+ */
+ bool possibleLoadViolation;
+
+ /** True if the address hit a external snoop while sitting in the LSQ.
+ * If this is true and a older instruction sees it, this instruction must
+ * reexecute
+ */
+ bool hitExternalSnoop;
+
/**
* Returns true if the DTB address translation is being delayed due to a hw
* page table walk.
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index b2819bb26..a37ec5e25 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -110,6 +110,8 @@ BaseDynInst<Impl>::initVars()
translationStarted = false;
translationCompleted = false;
+ possibleLoadViolation = false;
+ hitExternalSnoop = false;
isUncacheable = false;
reqMade = false;
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 41b0f3041..ef9167d8c 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -90,11 +90,17 @@ LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
DPRINTF(LSQ, "Got error packet back for address: %#X\n", pkt->getAddr());
if (pkt->isResponse()) {
lsq->thread[pkt->req->threadId()].completeDataAccess(pkt);
- }
- else {
- // must be a snoop
+ } else {
+ DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
+ pkt->cmdString());
- // @TODO someday may need to process invalidations in LSQ here
+ // must be a snoop
+ if (pkt->isInvalidate()) {
+ DPRINTF(LSQ, "received invalidation for addr:%#x\n", pkt->getAddr());
+ for (ThreadID tid = 0; tid < lsq->numThreads; tid++) {
+ lsq->thread[tid].checkSnoop(pkt);
+ }
+ }
// to provide stronger consistency model
}
return true;
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 693bee82c..af926759c 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -115,12 +115,20 @@ class LSQUnit {
/** Inserts a store instruction. */
void insertStore(DynInstPtr &store_inst);
- /** Check for ordering violations in the LSQ
+ /** Check for ordering violations in the LSQ. For a store squash if we
+ * ever find a conflicting load. For a load, only squash if we
+ * an external snoop invalidate has been seen for that load address
* @param load_idx index to start checking at
* @param inst the instruction to check
*/
Fault checkViolations(int load_idx, DynInstPtr &inst);
+ /** Check if an incoming invalidate hits in the lsq on a load
+ * that might have issued out of order wrt another load beacuse
+ * of the intermediate invalidate.
+ */
+ void checkSnoop(PacketPtr pkt);
+
/** Executes a load instruction. */
Fault executeLoad(DynInstPtr &inst);
@@ -417,6 +425,9 @@ class LSQUnit {
//list<InstSeqNum> mshrSeqNums;
+ /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
+ Addr cacheBlockMask;
+
/** Wire to read information from the issue stage time queue. */
typename TimeBuffer<IssueStruct>::wire fromIssue;
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 79a20a673..f934e2032 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -135,7 +135,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
template <class Impl>
LSQUnit<Impl>::LSQUnit()
- : loads(0), stores(0), storesToWB(0), stalled(false),
+ : loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
isStoreBlocked(false), isLoadBlocked(false),
loadBlockedHandled(false), hasPendingPkt(false)
{
@@ -154,6 +154,8 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
switchedOut = false;
+ cacheBlockMask = 0;
+
lsq = lsq_ptr;
lsqID = id;
@@ -297,6 +299,9 @@ LSQUnit<Impl>::takeOverFrom()
stalled = false;
isLoadBlocked = false;
loadBlockedHandled = false;
+
+ // Just incase the memory system changed out from under us
+ cacheBlockMask = 0;
}
template<class Impl>
@@ -443,6 +448,60 @@ LSQUnit<Impl>::numLoadsReady()
}
template <class Impl>
+void
+LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
+{
+ int load_idx = loadHead;
+
+ if (!cacheBlockMask) {
+ assert(dcachePort);
+ Addr bs = dcachePort->peerBlockSize();
+
+ // Make sure we actually got a size
+ assert(bs != 0);
+
+ cacheBlockMask = ~(bs - 1);
+ }
+
+ // If this is the only load in the LSQ we don't care
+ if (load_idx == loadTail)
+ return;
+ incrLdIdx(load_idx);
+
+ DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
+ Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
+ while (load_idx != loadTail) {
+ DynInstPtr ld_inst = loadQueue[load_idx];
+
+ if (!ld_inst->effAddrValid || ld_inst->uncacheable()) {
+ incrLdIdx(load_idx);
+ continue;
+ }
+
+ Addr load_addr = ld_inst->physEffAddr & cacheBlockMask;
+ DPRINTF(LSQUnit, "-- inst [sn:%lli] load_addr: %#x to pktAddr:%#x\n",
+ ld_inst->seqNum, load_addr, invalidate_addr);
+
+ if (load_addr == invalidate_addr) {
+ if (ld_inst->possibleLoadViolation) {
+ DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
+ ld_inst->physEffAddr, pkt->getAddr(), ld_inst->seqNum);
+
+ // Mark the load for re-execution
+ ld_inst->fault = new ReExec;
+ } else {
+ // If a older load checks this and it's true
+ // then we might have missed the snoop
+ // in which case we need to invalidate to be sure
+ ld_inst->hitExternalSnoop = true;
+ }
+ }
+ incrLdIdx(load_idx);
+ }
+ return;
+}
+
+template <class Impl>
Fault
LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
{
@@ -466,20 +525,46 @@ LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
(ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
- // A load/store incorrectly passed this load/store.
- // Check if we already have a violator, or if it's newer
- // squash and refetch.
- if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
- break;
+ if (inst->isLoad()) {
+ // If this load is to the same block as an external snoop
+ // invalidate that we've observed then the load needs to be
+ // squashed as it could have newer data
+ if (ld_inst->hitExternalSnoop) {
+ if (!memDepViolator ||
+ ld_inst->seqNum < memDepViolator->seqNum) {
+ DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "
+ " and [sn:%lli] at address %#x\n", inst->seqNum,
+ ld_inst->seqNum, ld_eff_addr1);
+ memDepViolator = ld_inst;
+
+ ++lsqMemOrderViolation;
+
+ return TheISA::genMachineCheckFault();
+ }
+ }
+
+ // Otherwise, mark the load has a possible load violation
+ // and if we see a snoop before it's commited, we need to squash
+ ld_inst->possibleLoadViolation = true;
+ DPRINTF(LSQUnit, "Found possible load violaiton at addr: %#x"
+ " between instructions [sn:%lli] and [sn:%lli]\n",
+ inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
+ } else {
+ // A load/store incorrectly passed this store.
+ // Check if we already have a violator, or if it's newer
+ // squash and refetch.
+ if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
+ break;
- DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and [sn:%lli]"
- " at address %#x\n", inst->seqNum, ld_inst->seqNum,
- ld_eff_addr1);
- memDepViolator = ld_inst;
+ DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and [sn:%lli]"
+ " at address %#x\n", inst->seqNum, ld_inst->seqNum,
+ ld_eff_addr1);
+ memDepViolator = ld_inst;
- ++lsqMemOrderViolation;
+ ++lsqMemOrderViolation;
- return TheISA::genMachineCheckFault();
+ return TheISA::genMachineCheckFault();
+ }
}
incrLdIdx(load_idx);
diff --git a/src/sim/faults.cc b/src/sim/faults.cc
index 3f1369bcc..bd05df834 100644
--- a/src/sim/faults.cc
+++ b/src/sim/faults.cc
@@ -56,6 +56,12 @@ void UnimpFault::invoke(ThreadContext * tc, StaticInstPtr inst)
panic("Unimpfault: %s\n", panicStr.c_str());
}
+void ReExec::invoke(ThreadContext *tc, StaticInstPtr inst)
+{
+ tc->pcState(tc->pcState());
+}
+
+
#if !FULL_SYSTEM
void GenericPageTableFault::invoke(ThreadContext *tc, StaticInstPtr inst)
{
diff --git a/src/sim/faults.hh b/src/sim/faults.hh
index 7f431b313..f5e9aded1 100644
--- a/src/sim/faults.hh
+++ b/src/sim/faults.hh
@@ -75,6 +75,16 @@ class UnimpFault : public FaultBase
StaticInstPtr inst = StaticInst::nullStaticInstPtr);
};
+class ReExec : public FaultBase
+{
+ public:
+ virtual FaultName name() const { return "Re-execution fault";}
+ ReExec() {}
+ void invoke(ThreadContext *tc,
+ StaticInstPtr inst = StaticInst::nullStaticInstPtr);
+};
+
+
#if !FULL_SYSTEM
class GenericPageTableFault : public FaultBase
{