summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJavier Bueno <javier.bueno@metempsy.com>2019-03-07 16:13:03 +0100
committerJavier Bueno Hedo <javier.bueno@metempsy.com>2019-03-20 11:35:16 +0000
commit81e34b308f3738cc9e88ae3066b5ed47c918c9de (patch)
tree80d824dd9019b087e52de0853e8373544ad05f56 /src
parentdee6fe7b6a4ce4f2d1e08af9250b6d1d6ce96836 (diff)
downloadgem5-81e34b308f3738cc9e88ae3066b5ed47c918c9de.tar.xz
mem-cache: Added the STeMS prefetcher
Reference: Stephen Somogyi, Thomas F. Wenisch, Anastasia Ailamaki, and Babak Falsafi. 2009. Spatio-temporal memory streaming. In Proceedings of the 36th annual international symposium on Computer architecture (ISCA '09). ACM, New York, NY, USA, 69-80. Change-Id: I58cea1a7faa9391f8aa4469eb4973feabd31097a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/16423 Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br> Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/mem/cache/prefetch/Prefetcher.py36
-rw-r--r--src/mem/cache/prefetch/SConscript1
-rw-r--r--src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc257
-rw-r--r--src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh199
4 files changed, 493 insertions, 0 deletions
diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py
index eacdf78f7..cb0a1c3ee 100644
--- a/src/mem/cache/prefetch/Prefetcher.py
+++ b/src/mem/cache/prefetch/Prefetcher.py
@@ -408,3 +408,39 @@ class SBOOEPrefetcher(QueuedPrefetcher):
sandbox_entries = Param.Int(1024, "Size of the address buffer")
score_threshold_pct = Param.Percent(25, "Min. threshold to issue a \
prefetch. The value is the percentage of sandbox entries to use")
+
+class STeMSPrefetcher(QueuedPrefetcher):
+ type = "STeMSPrefetcher"
+ cxx_class = "STeMSPrefetcher"
+ cxx_header = "mem/cache/prefetch/spatio_temporal_memory_streaming.hh"
+
+ spatial_region_size = Param.MemorySize("2kB",
+ "Memory covered by a hot zone")
+ active_generation_table_entries = Param.MemorySize("64",
+ "Number of entries in the active generation table")
+ active_generation_table_assoc = Param.Unsigned(64,
+ "Associativity of the active generation table")
+ active_generation_table_indexing_policy = Param.BaseIndexingPolicy(
+ SetAssociative(entry_size = 1,
+ assoc = Parent.active_generation_table_assoc,
+ size = Parent.active_generation_table_entries),
+ "Indexing policy of the active generation table")
+ active_generation_table_replacement_policy = Param.BaseReplacementPolicy(
+ LRURP(), "Replacement policy of the active generation table")
+
+ pattern_sequence_table_entries = Param.MemorySize("16384",
+ "Number of entries in the pattern sequence table")
+ pattern_sequence_table_assoc = Param.Unsigned(16384,
+ "Associativity of the pattern sequence table")
+ pattern_sequence_table_indexing_policy = Param.BaseIndexingPolicy(
+ SetAssociative(entry_size = 1,
+ assoc = Parent.pattern_sequence_table_assoc,
+ size = Parent.pattern_sequence_table_entries),
+ "Indexing policy of the pattern sequence table")
+ pattern_sequence_table_replacement_policy = Param.BaseReplacementPolicy(
+ LRURP(), "Replacement policy of the pattern sequence table")
+
+ region_miss_order_buffer_entries = Param.Unsigned(131072,
+ "Number of entries of the Region Miss Order Buffer")
+ reconstruction_entries = Param.Unsigned(256,
+ "Number of reconstruction entries")
diff --git a/src/mem/cache/prefetch/SConscript b/src/mem/cache/prefetch/SConscript
index f533b3c52..5b653382a 100644
--- a/src/mem/cache/prefetch/SConscript
+++ b/src/mem/cache/prefetch/SConscript
@@ -43,5 +43,6 @@ Source('sbooe.cc')
Source('signature_path.cc')
Source('signature_path_v2.cc')
Source('slim_ampm.cc')
+Source('spatio_temporal_memory_streaming.cc')
Source('stride.cc')
Source('tagged.cc')
diff --git a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc
new file mode 100644
index 000000000..df5190a97
--- /dev/null
+++ b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc
@@ -0,0 +1,257 @@
+/**
+ * Copyright (c) 2019 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Javier Bueno
+ */
+
+#include "mem/cache/prefetch/spatio_temporal_memory_streaming.hh"
+
+#include "debug/HWPrefetch.hh"
+#include "mem/cache/prefetch/associative_set_impl.hh"
+#include "params/STeMSPrefetcher.hh"
+
+STeMSPrefetcher::STeMSPrefetcher(const STeMSPrefetcherParams *p)
+ : QueuedPrefetcher(p), spatialRegionSize(p->spatial_region_size),
+ spatialRegionSizeBits(floorLog2(p->spatial_region_size)),
+ reconstructionEntries(p->reconstruction_entries),
+ activeGenerationTable(p->active_generation_table_assoc,
+ p->active_generation_table_entries,
+ p->active_generation_table_indexing_policy,
+ p->active_generation_table_replacement_policy,
+ ActiveGenerationTableEntry(
+ spatialRegionSize / blkSize)),
+ patternSequenceTable(p->pattern_sequence_table_assoc,
+ p->pattern_sequence_table_entries,
+ p->pattern_sequence_table_indexing_policy,
+ p->pattern_sequence_table_replacement_policy,
+ ActiveGenerationTableEntry(
+ spatialRegionSize / blkSize)),
+ rmob(p->region_miss_order_buffer_entries), rmobHead(0)
+{
+ fatal_if(!isPowerOf2(spatialRegionSize),
+ "The spatial region size must be a power of 2.");
+}
+
+void
+STeMSPrefetcher::checkForActiveGenerationsEnd() {
+ // This prefetcher operates attached to the L1 and it observes all
+ // accesses, this guarantees that no evictions are missed
+
+ // Iterate over all entries, if any recorded cacheline has been evicted,
+ // the generation finishes, move the entry to the PST
+ for (auto &agt_entry : activeGenerationTable) {
+ if (agt_entry.isValid()) {
+ bool generation_ended = false;
+ bool sr_is_secure = agt_entry.isSecure();
+ for (auto &seq_entry : agt_entry.sequence) {
+ if (seq_entry.counter > 0) {
+ Addr cache_addr =
+ agt_entry.paddress + seq_entry.offset * blkSize;
+ if (!inCache(cache_addr, sr_is_secure) &&
+ !inMissQueue(cache_addr, sr_is_secure)) {
+ generation_ended = true;
+ break;
+ }
+ }
+ }
+ if (generation_ended) {
+ // PST is indexed using the PC (secure bit is unused)
+ ActiveGenerationTableEntry *pst_entry =
+ patternSequenceTable.findEntry(agt_entry.pc,
+ false /*unused*/);
+ if (pst_entry == nullptr) {
+ // Tipically an entry will not exist
+ pst_entry = patternSequenceTable.findVictim(agt_entry.pc);
+ assert(pst_entry != nullptr);
+ patternSequenceTable.insertEntry(agt_entry.pc,
+ false /*unused*/, pst_entry);
+ } else {
+ patternSequenceTable.accessEntry(pst_entry);
+ }
+ // If the entry existed, this will update the values, if not,
+ // this also sets the values of the entry
+ pst_entry->update(agt_entry);
+ // Free the AGT entry
+ agt_entry.setInvalid();
+ }
+ }
+ }
+}
+
+void
+STeMSPrefetcher::addToRMOB(Addr sr_addr, Addr pst_addr, unsigned int delta)
+{
+ RegionMissOrderBufferEntry &rmob_entry = rmob[rmobHead];
+ rmobHead = (rmobHead + 1) % rmob.size();
+
+ rmob_entry.srAddress = sr_addr;
+ rmob_entry.pstAddress = pst_addr;
+ rmob_entry.delta = delta;
+ rmob_entry.valid = true;
+}
+
+void
+STeMSPrefetcher::calculatePrefetch(const PrefetchInfo &pfi,
+ std::vector<AddrPriority> &addresses)
+{
+ if (!pfi.hasPC()) {
+ DPRINTF(HWPrefetch, "Ignoring request with no PC.\n");
+ return;
+ }
+
+ Addr pc = pfi.getPC();
+ bool is_secure = pfi.isSecure();
+ // Spatial region address
+ Addr sr_addr = pfi.getAddr() / spatialRegionSize;
+ Addr paddr = pfi.getPaddr();
+
+ // Offset in cachelines within the spatial region
+ Addr sr_offset = (pfi.getAddr() % spatialRegionSize) / blkSize;
+
+ // Check if any active generation has ended
+ checkForActiveGenerationsEnd();
+
+ ActiveGenerationTableEntry *agt_entry =
+ activeGenerationTable.findEntry(sr_addr, is_secure);
+ if (agt_entry != nullptr) {
+ // found an entry in the AGT, entry is currently being recorded,
+ // add the offset
+ activeGenerationTable.accessEntry(agt_entry);
+ agt_entry->addOffset(sr_offset);
+ lastTriggerCounter += 1;
+ } else {
+ // Not found, this is the first access (Trigger access)
+
+ // Add entry to RMOB
+ Addr pst_addr = (pc << spatialRegionSizeBits) + sr_offset;
+ addToRMOB(sr_addr, pst_addr, lastTriggerCounter);
+ // Reset last trigger counter
+ lastTriggerCounter = 0;
+
+ // allocate a new AGT entry
+ agt_entry = activeGenerationTable.findVictim(sr_addr);
+ assert(agt_entry != nullptr);
+ activeGenerationTable.insertEntry(sr_addr, is_secure, agt_entry);
+ agt_entry->pc = pc;
+ agt_entry->paddress = paddr;
+ agt_entry->addOffset(sr_offset);
+ }
+ // increase the seq Counter for other entries
+ for (auto &agt_e : activeGenerationTable) {
+ if (agt_e.isValid() && agt_entry != &agt_e) {
+ agt_e.seqCounter += 1;
+ }
+ }
+
+ // Prefetch generation: if this is a miss, search for the most recent
+ // entry in the RMOB, and reconstruct the registered access sequence
+ if (pfi.isCacheMiss()) {
+ for (unsigned int idx = (rmobHead - 1) % rmob.size();
+ idx != rmobHead && rmob[idx].valid;
+ idx = (idx - 1) % rmob.size())
+ {
+ if (rmob[idx].srAddress == sr_addr) {
+ // reconstruct the access sequence
+ reconstructSequence(idx, addresses);
+ break;
+ }
+ }
+ }
+}
+
+void
+STeMSPrefetcher::reconstructSequence(unsigned int rmob_idx,
+ std::vector<AddrPriority> &addresses)
+{
+ std::vector<Addr> reconstruction(reconstructionEntries, MaxAddr);
+ unsigned int idx = 0;
+ // process rmob entries from rmob_idx (most recent with
+ // address = sr_addr) to the last one (rmobHead)
+ for (int i = rmob_idx;
+ i != rmobHead && idx < reconstructionEntries;
+ i = (i + 1) % rmob.size())
+ {
+ reconstruction[idx] = rmob[i].srAddress * spatialRegionSize;
+ unsigned int next_i = (i + 1) % rmob.size();
+ idx += rmob[next_i].delta + 1;
+ }
+ // Now query the PST with the PC of each RMOB entry
+ idx = 0;
+ for (int i = rmob_idx;
+ i != rmobHead && idx < reconstructionEntries;
+ i = (i + 1) % rmob.size())
+ {
+ ActiveGenerationTableEntry *pst_entry =
+ patternSequenceTable.findEntry(rmob[i].pstAddress,
+ false /* unused */);
+ if (pst_entry != nullptr) {
+ patternSequenceTable.accessEntry(pst_entry);
+ for (auto &seq_entry : pst_entry->sequence) {
+ if (seq_entry.counter > 1) {
+ // 3-bit counter: high enough confidence with a
+ // value greater than 1
+ Addr rec_addr = rmob[i].srAddress * spatialRegionSize +
+ seq_entry.offset;
+ unsigned ridx = idx + seq_entry.delta;
+ // Try to use the corresponding position, if it has been
+ // already used, look the surrounding positions
+ if (ridx < reconstructionEntries &&
+ reconstruction[ridx] == MaxAddr) {
+ reconstruction[ridx] = rec_addr;
+ } else if ((ridx + 1) < reconstructionEntries &&
+ reconstruction[ridx + 1] == MaxAddr) {
+ reconstruction[ridx + 1] = rec_addr;
+ } else if ((ridx + 2) < reconstructionEntries &&
+ reconstruction[ridx + 2] == MaxAddr) {
+ reconstruction[ridx + 2] = rec_addr;
+ } else if ((ridx > 0) &&
+ ((ridx - 1) < reconstructionEntries) &&
+ reconstruction[ridx - 1] == MaxAddr) {
+ reconstruction[ridx - 1] = rec_addr;
+ } else if ((ridx > 1) &&
+ ((ridx - 2) < reconstructionEntries) &&
+ reconstruction[ridx - 2] == MaxAddr) {
+ reconstruction[ridx - 2] = rec_addr;
+ }
+ }
+ }
+ }
+ unsigned int next_i = (i + 1) % rmob.size();
+ idx += rmob[next_i].delta + 1;
+ }
+ for (Addr pf_addr : reconstruction) {
+ if (pf_addr != MaxAddr) {
+ addresses.push_back(AddrPriority(pf_addr, 0));
+ }
+ }
+}
+
+STeMSPrefetcher *
+STeMSPrefetcherParams::create()
+{
+ return new STeMSPrefetcher(this);
+}
diff --git a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh
new file mode 100644
index 000000000..a7e25fe02
--- /dev/null
+++ b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh
@@ -0,0 +1,199 @@
+/**
+ * Copyright (c) 2019 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Javier Bueno
+ */
+
+ /**
+ * Implementation of the Spatio-Temporal Memory Streaming Prefetcher (STeMS)
+ * Reference:
+ * Spatio-temporal memory streaming.
+ * Somogyi, S., Wenisch, T. F., Ailamaki, A., & Falsafi, B. (2009).
+ * ACM SIGARCH Computer Architecture News, 37(3), 69-80.
+ *
+ * Notes:
+ * - The functionality described in the paper as Streamed Value Buffer (SVB)
+ * is not implemented here, as this is handled by the QueuedPrefetcher class
+ */
+
+#ifndef __MEM_CACHE_PREFETCH_SPATIO_TEMPORAL_MEMORY_STREAMING_HH__
+#define __MEM_CACHE_PREFETCH_SPATIO_TEMPORAL_MEMORY_STREAMING_HH__
+
+#include <vector>
+
+#include "mem/cache/prefetch/associative_set.hh"
+#include "mem/cache/prefetch/queued.hh"
+
+struct STeMSPrefetcherParams;
+
+class STeMSPrefetcher : public QueuedPrefetcher
+{
+ /** Size of each spatial region */
+ const size_t spatialRegionSize;
+ /** log_2 of the spatial region size */
+ const size_t spatialRegionSizeBits;
+ /** Number of reconstruction entries */
+ const unsigned int reconstructionEntries;
+
+ /**
+ * Entry data type for the Active Generation Table (AGT) and the Pattern
+ * Sequence Table (PST)
+ */
+ struct ActiveGenerationTableEntry : public TaggedEntry {
+ /** Physical address of the spatial region */
+ Addr paddress;
+ /** PC that started this generation */
+ Addr pc;
+ /** Counter to keep track of the interleaving between sequences */
+ unsigned int seqCounter;
+
+ /** Sequence entry data type */
+ struct SequenceEntry {
+ /** 2-bit confidence counter */
+ unsigned int counter;
+ /** Offset, in cache lines, within the spatial region */
+ unsigned int offset;
+ /** Intearleaving position on the global access sequence */
+ unsigned int delta;
+ SequenceEntry() : counter(0), offset(0), delta(0)
+ {}
+ };
+ /** Sequence of accesses */
+ std::vector<SequenceEntry> sequence;
+
+ ActiveGenerationTableEntry(int num_positions) : paddress(0), pc(0),
+ seqCounter(0), sequence(num_positions)
+ {}
+
+ void reset() override
+ {
+ paddress = 0;
+ pc = 0;
+ seqCounter = 0;
+ for (auto &seq_entry : sequence) {
+ seq_entry.counter = 0;
+ seq_entry.offset = 0;
+ seq_entry.delta = 0;
+ }
+ }
+
+ /**
+ * Update the entry data with an entry from a generation that just
+ * ended. This operation can not be done with the copy constructor,
+ * becasuse the TaggedEntry component must not be copied.
+ * @param e entry which generation has ended
+ */
+ void update(ActiveGenerationTableEntry const &e)
+ {
+ paddress = e.paddress;
+ pc = e.pc;
+ seqCounter = e.seqCounter;
+ sequence = e.sequence;
+ }
+
+ /**
+ * Add a new access to the sequence
+ * @param offset offset in cachelines within the spatial region
+ */
+ void addOffset(unsigned int offset) {
+ // Search for the offset in the deltas array, if it exist, update
+ // the corresponding counter, if not, add the offset to the array
+ for (auto &seq_entry : sequence) {
+ if (seq_entry.counter > 0) {
+ if (seq_entry.offset == offset) {
+ //2 bit counter, saturates at 3
+ if (seq_entry.counter < 3) {
+ seq_entry.counter += 1;
+ }
+ }
+ } else {
+ // If the counter is 0 it means that this position is not
+ // being used, and we can allocate the new offset here
+ seq_entry.counter = 1;
+ seq_entry.offset = offset;
+ seq_entry.delta = seqCounter;
+ break;
+ }
+ }
+ seqCounter = 0;
+ }
+ };
+
+ /** Active Generation Table (AGT) */
+ AssociativeSet<ActiveGenerationTableEntry> activeGenerationTable;
+ /** Pattern Sequence Table (PST) */
+ AssociativeSet<ActiveGenerationTableEntry> patternSequenceTable;
+
+ /** Data type of the Region Miss Order Buffer entry */
+ struct RegionMissOrderBufferEntry {
+ /** Address of the spatial region */
+ Addr srAddress;
+ /**
+ * Address used to index the PST table, generated using the PC and the
+ * offset within the spatial region
+ */
+ Addr pstAddress;
+ /** Delta within the global miss order sequence */
+ unsigned int delta;
+ /** Valid bit */
+ bool valid;
+ };
+
+ /** Region Miss Order Buffer (RMOB) */
+ std::vector<RegionMissOrderBufferEntry> rmob;
+ /** First free position (or older, if it is full) of the RMOB */
+ unsigned int rmobHead;
+
+ /** Counter to keep the count of accesses between trigger accesses */
+ unsigned int lastTriggerCounter;
+
+ /** Checks if the active generations have ended */
+ void checkForActiveGenerationsEnd();
+ /**
+ * Adds an entry to the RMOB
+ * @param sr_addr Spatial region address
+ * @param pst_addr Corresponding PST address
+ * @param delta Number of entries skipped in the global miss order
+ */
+ void addToRMOB(Addr sr_addr, Addr pst_addr, unsigned int delta);
+
+ /**
+ * Reconstructs a sequence of accesses and generates the prefetch
+ * addresses, adding them to the addresses vector
+ * @param rmob_idx rmob position to start generating from
+ * @param addresses vector to add the addresses to be prefetched
+ */
+ void reconstructSequence(unsigned int rmob_idx,
+ std::vector<AddrPriority> &addresses);
+ public:
+ STeMSPrefetcher(const STeMSPrefetcherParams* p);
+ ~STeMSPrefetcher() {}
+ void calculatePrefetch(const PrefetchInfo &pfi,
+ std::vector<AddrPriority> &addresses) override;
+};
+
+#endif//__MEM_CACHE_PREFETCH_SPATIO_TEMPORAL_MEMORY_STREAMING_HH__