summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAli Saidi <Ali.Saidi@ARM.com>2011-04-04 11:42:23 -0500
committerAli Saidi <Ali.Saidi@ARM.com>2011-04-04 11:42:23 -0500
commit7dde557fdc51140988092962137e1006d1609bea (patch)
tree7511f093667759fb13a88cb204247b416295b3c9
parentee489a541a8a49de4e8ca6e486081149f57de99f (diff)
downloadgem5-7dde557fdc51140988092962137e1006d1609bea.tar.xz
O3: Tighten memory order violation checking to 16 bytes.
The comment in the code suggests that the checking granularity should be 16 bytes, however in reality the shift by 8 is 256 bytes which seems much larger than required.
-rw-r--r--src/cpu/base_dyn_inst.hh5
-rw-r--r--src/cpu/o3/O3CPU.py3
-rw-r--r--src/cpu/o3/lsq_unit.hh14
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh124
4 files changed, 77 insertions, 69 deletions
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 8b6662d70..a5357a7b0 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -243,6 +243,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** The effective virtual address (lds & stores only). */
Addr effAddr;
+ /** The size of the request */
+ Addr effSize;
+
/** Is the effective virtual address valid. */
bool effAddrValid;
@@ -892,6 +895,7 @@ BaseDynInst<Impl>::readBytes(Addr addr, uint8_t *data,
if (translationCompleted) {
if (fault == NoFault) {
effAddr = req->getVaddr();
+ effSize = size;
effAddrValid = true;
fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
} else {
@@ -962,6 +966,7 @@ BaseDynInst<Impl>::writeBytes(uint8_t *data, unsigned size,
if (fault == NoFault && translationCompleted) {
effAddr = req->getVaddr();
+ effSize = size;
effAddrValid = true;
fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
}
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index f7602cb86..f379fcd8a 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -118,6 +118,9 @@ class DerivO3CPU(BaseCPU):
LQEntries = Param.Unsigned(32, "Number of load queue entries")
SQEntries = Param.Unsigned(32, "Number of store queue entries")
+ LSQDepCheckShift = Param.Unsigned(4, "Number of places to shift addr before check")
+ LSQCheckLoads = Param.Bool(True,
+ "Should dependency violations be checked for loads & stores or just stores")
LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
SSITSize = Param.Unsigned(1024, "Store set ID table size")
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 2bb42cadc..bdc524dec 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -111,6 +111,12 @@ class LSQUnit {
/** Inserts a store instruction. */
void insertStore(DynInstPtr &store_inst);
+ /** Check for ordering violations in the LSQ
+ * @param load_idx index to start checking at
+ * @param inst the instruction to check
+ */
+ Fault checkViolations(int load_idx, DynInstPtr &inst);
+
/** Executes a load instruction. */
Fault executeLoad(DynInstPtr &inst);
@@ -366,6 +372,14 @@ class LSQUnit {
*/
unsigned SQEntries;
+ /** The number of places to shift addresses in the LSQ before checking
+ * for dependency violations
+ */
+ unsigned depCheckShift;
+
+ /** Should loads be checked for dependency issues */
+ bool checkLoads;
+
/** The number of load instructions in the LQ. */
int loads;
/** The number of store instructions in the SQ. */
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 1a4e686a3..70b87ff26 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -162,6 +162,9 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
loadQueue.resize(LQEntries);
storeQueue.resize(SQEntries);
+ depCheckShift = params->LSQDepCheckShift;
+ checkLoads = params->LSQCheckLoads;
+
loadHead = loadTail = 0;
storeHead = storeWBIdx = storeTail = 0;
@@ -438,6 +441,55 @@ LSQUnit<Impl>::numLoadsReady()
template <class Impl>
Fault
+LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
+{
+ Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
+ Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
+
+ /** @todo in theory you only need to check an instruction that has executed
+ * however, there isn't a good way in the pipeline at the moment to check
+ * all instructions that will execute before the store writes back. Thus,
+ * like the implementation that came before it, we're overly conservative.
+ */
+ while (load_idx != loadTail) {
+ DynInstPtr ld_inst = loadQueue[load_idx];
+ if (!ld_inst->effAddrValid || ld_inst->uncacheable()) {
+ incrLdIdx(load_idx);
+ continue;
+ }
+
+ Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
+ Addr ld_eff_addr2 =
+ (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
+
+ if ((inst_eff_addr2 > ld_eff_addr1 && inst_eff_addr1 < ld_eff_addr2) ||
+ inst_eff_addr1 == ld_eff_addr1) {
+ // A load/store incorrectly passed this load/store.
+ // Check if we already have a violator, or if it's newer
+ // squash and refetch.
+ if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
+ break;
+
+ DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and [sn:%lli]"
+ " at address %#x\n", inst->seqNum, ld_inst->seqNum,
+ ld_eff_addr1);
+ memDepViolator = ld_inst;
+
+ ++lsqMemOrderViolation;
+
+ return TheISA::genMachineCheckFault();
+ }
+
+ incrLdIdx(load_idx);
+ }
+ return NoFault;
+}
+
+
+
+
+template <class Impl>
+Fault
LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
{
using namespace TheISA;
@@ -477,39 +529,9 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
assert(inst->effAddrValid);
int load_idx = inst->lqIdx;
incrLdIdx(load_idx);
- while (load_idx != loadTail) {
- // Really only need to check loads that have actually executed
-
- // @todo: For now this is extra conservative, detecting a
- // violation if the addresses match assuming all accesses
- // are quad word accesses.
-
- // @todo: Fix this, magic number being used here
-
- // @todo: Uncachable load is not executed until it reaches
- // the head of the ROB. Once this if checks only the executed
- // loads(as noted above), this check can be removed
- if (loadQueue[load_idx]->effAddrValid &&
- ((loadQueue[load_idx]->effAddr >> 8)
- == (inst->effAddr >> 8)) &&
- !loadQueue[load_idx]->uncacheable()) {
- // A load incorrectly passed this load. Squash and refetch.
- // For now return a fault to show that it was unsuccessful.
- DynInstPtr violator = loadQueue[load_idx];
- if (!memDepViolator ||
- (violator->seqNum < memDepViolator->seqNum)) {
- memDepViolator = violator;
- } else {
- break;
- }
-
- ++lsqMemOrderViolation;
- return genMachineCheckFault();
- }
-
- incrLdIdx(load_idx);
- }
+ if (checkLoads)
+ return checkViolations(load_idx, inst);
}
return load_fault;
@@ -564,44 +586,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
++storesToWB;
}
- assert(store_inst->effAddrValid);
- while (load_idx != loadTail) {
- // Really only need to check loads that have actually executed
- // It's safe to check all loads because effAddr is set to
- // InvalAddr when the dyn inst is created.
-
- // @todo: For now this is extra conservative, detecting a
- // violation if the addresses match assuming all accesses
- // are quad word accesses.
-
- // @todo: Fix this, magic number being used here
-
- // @todo: Uncachable load is not executed until it reaches
- // the head of the ROB. Once this if checks only the executed
- // loads(as noted above), this check can be removed
- if (loadQueue[load_idx]->effAddrValid &&
- ((loadQueue[load_idx]->effAddr >> 8)
- == (store_inst->effAddr >> 8)) &&
- !loadQueue[load_idx]->uncacheable()) {
- // A load incorrectly passed this store. Squash and refetch.
- // For now return a fault to show that it was unsuccessful.
- DynInstPtr violator = loadQueue[load_idx];
- if (!memDepViolator ||
- (violator->seqNum < memDepViolator->seqNum)) {
- memDepViolator = violator;
- } else {
- break;
- }
-
- ++lsqMemOrderViolation;
-
- return genMachineCheckFault();
- }
-
- incrLdIdx(load_idx);
- }
+ return checkViolations(load_idx, store_inst);
- return store_fault;
}
template <class Impl>