summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNilay Vaish <nilay@cs.wisc.edu>2012-01-28 19:09:04 -0600
committerNilay Vaish <nilay@cs.wisc.edu>2012-01-28 19:09:04 -0600
commit5c2fc35e029d8cd8e69e983e1baef6b86e47d64d (patch)
tree03e3f37b4cef15a165f79b5dde0a7abee14e0523
parent4acca8a0536d4445ed25b67edf571ae460446ab9 (diff)
downloadgem5-5c2fc35e029d8cd8e69e983e1baef6b86e47d64d.tar.xz
O3 CPU LSQ: Implement TSO
This patch makes O3's LSQ maintain total order between stores. Essentially only the store at the head of the store buffer is allowed to be in flight. Only after that store completes, the next store is issued to the memory system. By default, the x86 architecture will have TSO.
-rw-r--r--src/cpu/o3/O3CPU.py2
-rw-r--r--src/cpu/o3/lsq_unit.hh6
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh12
3 files changed, 19 insertions, 1 deletions
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index 9dfcc8b9e..6f721a11b 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -143,3 +143,5 @@ class DerivO3CPU(BaseCPU):
smtROBThreshold = Param.Int(100, "SMT ROB Threshold Sharing Parameter")
smtCommitPolicy = Param.String('RoundRobin', "SMT Commit Policy")
+ needsTSO = Param.Bool(buildEnv['TARGET_ISA'] == 'x86',
+ "Enable TSO Memory model")
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 3c1af4533..a11d95f3b 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -453,6 +453,9 @@ class LSQUnit {
/** Has the blocked load been handled. */
bool loadBlockedHandled;
+ /** Whether or not a store is in flight. */
+ bool storeInFlight;
+
/** The sequence number of the blocked load. */
InstSeqNum blockedLoadSeqNum;
@@ -466,6 +469,9 @@ class LSQUnit {
/** The packet that is pending free cache ports. */
PacketPtr pendingPkt;
+ /** Flag for memory model. */
+ bool needsTSO;
+
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index a0452b4ae..d0db6f6fe 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -138,7 +138,7 @@ template <class Impl>
LSQUnit<Impl>::LSQUnit()
: loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
isStoreBlocked(false), isLoadBlocked(false),
- loadBlockedHandled(false), hasPendingPkt(false)
+ loadBlockedHandled(false), storeInFlight(false), hasPendingPkt(false)
{
}
@@ -182,6 +182,7 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
memDepViolator = NULL;
blockedLoadSeqNum = 0;
+ needsTSO = params->needsTSO;
}
template<class Impl>
@@ -770,6 +771,7 @@ LSQUnit<Impl>::writebackStores()
storeWBIdx != storeTail &&
storeQueue[storeWBIdx].inst &&
storeQueue[storeWBIdx].canWB &&
+ ((!needsTSO) || (!storeInFlight)) &&
usedPorts < cachePorts) {
if (isStoreBlocked || lsq->cacheBlocked()) {
@@ -1090,6 +1092,10 @@ LSQUnit<Impl>::storePostSend(PacketPtr pkt)
#endif
}
+ if (needsTSO) {
+ storeInFlight = true;
+ }
+
incrStIdx(storeWBIdx);
}
@@ -1163,6 +1169,10 @@ LSQUnit<Impl>::completeStore(int store_idx)
storeQueue[store_idx].inst->setCompleted();
+ if (needsTSO) {
+ storeInFlight = false;
+ }
+
// Tell the checker we've completed this instruction. Some stores
// may get reported twice to the checker, but the checker can
// handle that case.