summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAli Saidi <Ali.Saidi@ARM.com>2011-08-19 15:08:07 -0500
committerAli Saidi <Ali.Saidi@ARM.com>2011-08-19 15:08:07 -0500
commitb6203360ef684a8dc32981221336f5d216ce2668 (patch)
treee4c8e7b374b2685f19016b0f3e79633a858ef8d2
parent5f425b8bd1ac70b61fc57b7ec44c52cd7d8de9fb (diff)
downloadgem5-b6203360ef684a8dc32981221336f5d216ce2668.tar.xz
LSQ: Set store predictor to periodically clear itself as recommended in the storesets paper.
This patch improves performance by as much as 10% on some spec benchmarks.
-rw-r--r--src/cpu/o3/O3CPU.py2
-rw-r--r--src/cpu/o3/mem_dep_unit_impl.hh9
-rw-r--r--src/cpu/o3/store_set.cc25
-rw-r--r--src/cpu/o3/store_set.hh18
4 files changed, 46 insertions, 8 deletions
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index f379fcd8a..47b18a3ec 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -121,6 +121,8 @@ class DerivO3CPU(BaseCPU):
LSQDepCheckShift = Param.Unsigned(4, "Number of places to shift addr before check")
LSQCheckLoads = Param.Bool(True,
"Should dependency violations be checked for loads & stores or just stores")
+ store_set_clear_period = Param.Unsigned(250000,
+ "Number of load/store insts before the dep predictor should be invalidated")
LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
SSITSize = Param.Unsigned(1024, "Store set ID table size")
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index 0208a622e..d30dcbd3d 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -45,8 +45,10 @@ MemDepUnit<MemDepPred, Impl>::MemDepUnit()
template <class MemDepPred, class Impl>
MemDepUnit<MemDepPred, Impl>::MemDepUnit(DerivO3CPUParams *params)
: _name(params->name + ".memdepunit"),
- depPred(params->SSITSize, params->LFSTSize), loadBarrier(false),
- loadBarrierSN(0), storeBarrier(false), storeBarrierSN(0), iqPtr(NULL)
+ depPred(params->store_set_clear_period, params->SSITSize,
+ params->LFSTSize),
+ loadBarrier(false), loadBarrierSN(0), storeBarrier(false),
+ storeBarrierSN(0), iqPtr(NULL)
{
DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n");
}
@@ -85,7 +87,8 @@ MemDepUnit<MemDepPred, Impl>::init(DerivO3CPUParams *params, ThreadID tid)
_name = csprintf("%s.memDep%d", params->name, tid);
id = tid;
- depPred.init(params->SSITSize, params->LFSTSize);
+ depPred.init(params->store_set_clear_period, params->SSITSize,
+ params->LFSTSize);
}
template <class MemDepPred, class Impl>
diff --git a/src/cpu/o3/store_set.cc b/src/cpu/o3/store_set.cc
index fc87c417e..acd4a8d0a 100644
--- a/src/cpu/o3/store_set.cc
+++ b/src/cpu/o3/store_set.cc
@@ -34,8 +34,8 @@
#include "cpu/o3/store_set.hh"
#include "debug/StoreSet.hh"
-StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
- : SSITSize(_SSIT_size), LFSTSize(_LFST_size)
+StoreSet::StoreSet(uint64_t clear_period, int _SSIT_size, int _LFST_size)
+ : clearPeriod(clear_period), SSITSize(_SSIT_size), LFSTSize(_LFST_size)
{
DPRINTF(StoreSet, "StoreSet: Creating store set object.\n");
DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
@@ -68,6 +68,8 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
indexMask = SSITSize - 1;
offsetBits = 2;
+
+ memOpsPred = 0;
}
StoreSet::~StoreSet()
@@ -75,10 +77,11 @@ StoreSet::~StoreSet()
}
void
-StoreSet::init(int _SSIT_size, int _LFST_size)
+StoreSet::init(uint64_t clear_period, int _SSIT_size, int _LFST_size)
{
SSITSize = _SSIT_size;
LFSTSize = _LFST_size;
+ clearPeriod = clear_period;
DPRINTF(StoreSet, "StoreSet: Creating store set object.\n");
DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
@@ -103,6 +106,8 @@ StoreSet::init(int _SSIT_size, int _LFST_size)
indexMask = SSITSize - 1;
offsetBits = 2;
+
+ memOpsPred = 0;
}
@@ -180,8 +185,21 @@ StoreSet::violation(Addr store_PC, Addr load_PC)
}
void
+StoreSet::checkClear()
+{
+ memOpsPred++;
+ if (memOpsPred > clearPeriod) {
+ DPRINTF(StoreSet, "Wiping predictor state beacuse %d ld/st executed\n",
+ clearPeriod);
+ memOpsPred = 0;
+ clear();
+ }
+}
+
+void
StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num)
{
+ checkClear();
// Does nothing.
return;
}
@@ -193,6 +211,7 @@ StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num, ThreadID tid)
int store_SSID;
+ checkClear();
assert(index < SSITSize);
if (!validSSIT[index]) {
diff --git a/src/cpu/o3/store_set.hh b/src/cpu/o3/store_set.hh
index ce4591f68..973b83b42 100644
--- a/src/cpu/o3/store_set.hh
+++ b/src/cpu/o3/store_set.hh
@@ -63,18 +63,24 @@ class StoreSet
StoreSet() { };
/** Creates store set predictor with given table sizes. */
- StoreSet(int SSIT_size, int LFST_size);
+ StoreSet(uint64_t clear_period, int SSIT_size, int LFST_size);
/** Default destructor. */
~StoreSet();
/** Initializes the store set predictor with the given table sizes. */
- void init(int SSIT_size, int LFST_size);
+ void init(uint64_t clear_period, int SSIT_size, int LFST_size);
/** Records a memory ordering violation between the younger load
* and the older store. */
void violation(Addr store_PC, Addr load_PC);
+ /** Clears the store set predictor every so often so that all the
+ * entries aren't used and stores are constantly predicted as
+ * conflicting.
+ */
+ void checkClear();
+
/** Inserts a load into the store set predictor. This does nothing but
* is included in case other predictors require a similar function.
*/
@@ -130,6 +136,11 @@ class StoreSet
typedef std::map<InstSeqNum, int, ltseqnum>::iterator SeqNumMapIt;
+ /** Number of loads/stores to process before wiping predictor so all
+ * entries don't get saturated
+ */
+ uint64_t clearPeriod;
+
/** Store Set ID Table size, in entries. */
int SSITSize;
@@ -141,6 +152,9 @@ class StoreSet
// HACK: Hardcoded for now.
int offsetBits;
+
+ /** Number of memory operations predicted since last clear of predictor */
+ int memOpsPred;
};
#endif // __CPU_O3_STORE_SET_HH__