diff options
author | Ali Saidi <Ali.Saidi@ARM.com> | 2011-08-19 15:08:07 -0500 |
---|---|---|
committer | Ali Saidi <Ali.Saidi@ARM.com> | 2011-08-19 15:08:07 -0500 |
commit | b6203360ef684a8dc32981221336f5d216ce2668 (patch) | |
tree | e4c8e7b374b2685f19016b0f3e79633a858ef8d2 | |
parent | 5f425b8bd1ac70b61fc57b7ec44c52cd7d8de9fb (diff) | |
download | gem5-b6203360ef684a8dc32981221336f5d216ce2668.tar.xz |
LSQ: Set store predictor to periodically clear itself as recommended in the storesets paper.
This patch improves performance by as much as 10% on some spec benchmarks.
-rw-r--r-- | src/cpu/o3/O3CPU.py | 2 | ||||
-rw-r--r-- | src/cpu/o3/mem_dep_unit_impl.hh | 9 | ||||
-rw-r--r-- | src/cpu/o3/store_set.cc | 25 | ||||
-rw-r--r-- | src/cpu/o3/store_set.hh | 18 |
4 files changed, 46 insertions, 8 deletions
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index f379fcd8a..47b18a3ec 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -121,6 +121,8 @@ class DerivO3CPU(BaseCPU): LSQDepCheckShift = Param.Unsigned(4, "Number of places to shift addr before check") LSQCheckLoads = Param.Bool(True, "Should dependency violations be checked for loads & stores or just stores") + store_set_clear_period = Param.Unsigned(250000, + "Number of load/store insts before the dep predictor should be invalidated") LFSTSize = Param.Unsigned(1024, "Last fetched store table size") SSITSize = Param.Unsigned(1024, "Store set ID table size") diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh index 0208a622e..d30dcbd3d 100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -45,8 +45,10 @@ MemDepUnit<MemDepPred, Impl>::MemDepUnit() template <class MemDepPred, class Impl> MemDepUnit<MemDepPred, Impl>::MemDepUnit(DerivO3CPUParams *params) : _name(params->name + ".memdepunit"), - depPred(params->SSITSize, params->LFSTSize), loadBarrier(false), - loadBarrierSN(0), storeBarrier(false), storeBarrierSN(0), iqPtr(NULL) + depPred(params->store_set_clear_period, params->SSITSize, + params->LFSTSize), + loadBarrier(false), loadBarrierSN(0), storeBarrier(false), + storeBarrierSN(0), iqPtr(NULL) { DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n"); } @@ -85,7 +87,8 @@ MemDepUnit<MemDepPred, Impl>::init(DerivO3CPUParams *params, ThreadID tid) _name = csprintf("%s.memDep%d", params->name, tid); id = tid; - depPred.init(params->SSITSize, params->LFSTSize); + depPred.init(params->store_set_clear_period, params->SSITSize, + params->LFSTSize); } template <class MemDepPred, class Impl> diff --git a/src/cpu/o3/store_set.cc b/src/cpu/o3/store_set.cc index fc87c417e..acd4a8d0a 100644 --- a/src/cpu/o3/store_set.cc +++ b/src/cpu/o3/store_set.cc @@ -34,8 +34,8 @@ #include "cpu/o3/store_set.hh" #include "debug/StoreSet.hh" -StoreSet::StoreSet(int _SSIT_size, int _LFST_size) - : SSITSize(_SSIT_size), LFSTSize(_LFST_size) +StoreSet::StoreSet(uint64_t clear_period, int _SSIT_size, int _LFST_size) + : clearPeriod(clear_period), SSITSize(_SSIT_size), LFSTSize(_LFST_size) { DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", @@ -68,6 +68,8 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size) indexMask = SSITSize - 1; offsetBits = 2; + + memOpsPred = 0; } StoreSet::~StoreSet() @@ -75,10 +77,11 @@ StoreSet::~StoreSet() } void -StoreSet::init(int _SSIT_size, int _LFST_size) +StoreSet::init(uint64_t clear_period, int _SSIT_size, int _LFST_size) { SSITSize = _SSIT_size; LFSTSize = _LFST_size; + clearPeriod = clear_period; DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", @@ -103,6 +106,8 @@ StoreSet::init(int _SSIT_size, int _LFST_size) indexMask = SSITSize - 1; offsetBits = 2; + + memOpsPred = 0; } @@ -180,8 +185,21 @@ StoreSet::violation(Addr store_PC, Addr load_PC) } void +StoreSet::checkClear() +{ + memOpsPred++; + if (memOpsPred > clearPeriod) { + DPRINTF(StoreSet, "Wiping predictor state beacuse %d ld/st executed\n", + clearPeriod); + memOpsPred = 0; + clear(); + } +} + +void StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num) { + checkClear(); // Does nothing. return; } @@ -193,6 +211,7 @@ StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num, ThreadID tid) int store_SSID; + checkClear(); assert(index < SSITSize); if (!validSSIT[index]) { diff --git a/src/cpu/o3/store_set.hh b/src/cpu/o3/store_set.hh index ce4591f68..973b83b42 100644 --- a/src/cpu/o3/store_set.hh +++ b/src/cpu/o3/store_set.hh @@ -63,18 +63,24 @@ class StoreSet StoreSet() { }; /** Creates store set predictor with given table sizes. */ - StoreSet(int SSIT_size, int LFST_size); + StoreSet(uint64_t clear_period, int SSIT_size, int LFST_size); /** Default destructor. */ ~StoreSet(); /** Initializes the store set predictor with the given table sizes. */ - void init(int SSIT_size, int LFST_size); + void init(uint64_t clear_period, int SSIT_size, int LFST_size); /** Records a memory ordering violation between the younger load * and the older store. */ void violation(Addr store_PC, Addr load_PC); + /** Clears the store set predictor every so often so that all the + * entries aren't used and stores are constantly predicted as + * conflicting. + */ + void checkClear(); + /** Inserts a load into the store set predictor. This does nothing but * is included in case other predictors require a similar function. */ @@ -130,6 +136,11 @@ class StoreSet typedef std::map<InstSeqNum, int, ltseqnum>::iterator SeqNumMapIt; + /** Number of loads/stores to process before wiping predictor so all + * entries don't get saturated + */ + uint64_t clearPeriod; + /** Store Set ID Table size, in entries. */ int SSITSize; @@ -141,6 +152,9 @@ class StoreSet // HACK: Hardcoded for now. int offsetBits; + + /** Number of memory operations predicted since last clear of predictor */ + int memOpsPred; }; #endif // __CPU_O3_STORE_SET_HH__ |