From 60995026786d7e8000d379c0f47c7ee36b9c2444 Mon Sep 17 00:00:00 2001 From: Javier Bueno Date: Thu, 13 Dec 2018 12:08:35 +0100 Subject: mem-cache: Access Map Pattern Matching Prefetcher Implementation of the Access Map Pattern Matching prefetcher Based in the description of the following paper: Access map pattern matching for high performance data cache prefetch. Ishii, Y., Inaba, M., & Hiraki, K. (2011). Journal of Instruction-Level Parallelism, 13, 1-24. Change-Id: I0d4b7f7afc2ab4938bdd8755bfed26e26a28530c Reviewed-on: https://gem5-review.googlesource.com/c/15096 Reviewed-by: Nikos Nikoleris Maintainer: Nikos Nikoleris --- src/mem/cache/prefetch/Prefetcher.py | 34 +++ src/mem/cache/prefetch/SConscript | 1 + .../cache/prefetch/access_map_pattern_matching.cc | 252 +++++++++++++++++++++ .../cache/prefetch/access_map_pattern_matching.hh | 182 +++++++++++++++ 4 files changed, 469 insertions(+) create mode 100644 src/mem/cache/prefetch/access_map_pattern_matching.cc create mode 100644 src/mem/cache/prefetch/access_map_pattern_matching.hh diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py index a868a2583..3d9c66565 100644 --- a/src/mem/cache/prefetch/Prefetcher.py +++ b/src/mem/cache/prefetch/Prefetcher.py @@ -179,3 +179,37 @@ class SignaturePathPrefetcher(QueuedPrefetcher): "Minimum confidence to issue prefetches") lookahead_confidence_threshold = Param.Float(0.75, "Minimum confidence to continue exploring lookahead entries") + +class AccessMapPatternMatchingPrefetcher(QueuedPrefetcher): + type = 'AccessMapPatternMatchingPrefetcher' + cxx_class = 'AccessMapPatternMatchingPrefetcher' + cxx_header = "mem/cache/prefetch/access_map_pattern_matching.hh" + + start_degree = Param.Unsigned(4, + "Initial degree (Maximum number of prefetches generated") + hot_zone_size = Param.MemorySize("2kB", "Memory covered by a hot zone") + access_map_table_entries = Param.MemorySize("256", + "Number of entries in the access map table") + access_map_table_assoc = Param.Unsigned(8, + "Associativity of the access map table") + access_map_table_indexing_policy = Param.BaseIndexingPolicy( + SetAssociative(entry_size = 1, assoc = Parent.access_map_table_assoc, + size = Parent.access_map_table_entries), + "Indexing policy of the access map table") + access_map_table_replacement_policy = Param.BaseReplacementPolicy(LRURP(), + "Replacement policy of the access map table") + high_coverage_threshold = Param.Float(0.25, + "A prefetch coverage factor bigger than this is considered high") + low_coverage_threshold = Param.Float(0.125, + "A prefetch coverage factor smaller than this is considered low") + high_accuracy_threshold = Param.Float(0.5, + "A prefetch accuracy factor bigger than this is considered high") + low_accuracy_threshold = Param.Float(0.25, + "A prefetch accuracy factor smaller than this is considered low") + high_cache_hit_threshold = Param.Float(0.875, + "A cache hit ratio bigger than this is considered high") + low_cache_hit_threshold = Param.Float(0.75, + "A cache hit ratio smaller than this is considered low") + epoch_cycles = Param.Cycles(256000, "Cycles in an epoch period") + offchip_memory_latency = Param.Latency("30ns", + "Memory latency used to compute the required memory bandwidth") diff --git a/src/mem/cache/prefetch/SConscript b/src/mem/cache/prefetch/SConscript index ccbc2e3b8..b46158624 100644 --- a/src/mem/cache/prefetch/SConscript +++ b/src/mem/cache/prefetch/SConscript @@ -32,6 +32,7 @@ Import('*') SimObject('Prefetcher.py') +Source('access_map_pattern_matching.cc') Source('base.cc') Source('queued.cc') Source('signature_path.cc') diff --git a/src/mem/cache/prefetch/access_map_pattern_matching.cc b/src/mem/cache/prefetch/access_map_pattern_matching.cc new file mode 100644 index 000000000..0f46effbf --- /dev/null +++ b/src/mem/cache/prefetch/access_map_pattern_matching.cc @@ -0,0 +1,252 @@ +/** + * Copyright (c) 2018 Metempsy Technology Consulting + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Javier Bueno + */ + +#include "mem/cache/prefetch/access_map_pattern_matching.hh" + +#include "debug/HWPrefetch.hh" +#include "mem/cache/prefetch/associative_set_impl.hh" +#include "params/AccessMapPatternMatchingPrefetcher.hh" + +AccessMapPatternMatchingPrefetcher::AccessMapPatternMatchingPrefetcher( + const AccessMapPatternMatchingPrefetcherParams *p) + : QueuedPrefetcher(p), + startDegree(p->start_degree), hotZoneSize(p->hot_zone_size), + highCoverageThreshold(p->high_coverage_threshold), + lowCoverageThreshold(p->low_coverage_threshold), + highAccuracyThreshold(p->high_accuracy_threshold), + lowAccuracyThreshold(p->low_accuracy_threshold), + highCacheHitThreshold(p->high_cache_hit_threshold), + lowCacheHitThreshold(p->low_cache_hit_threshold), + epochCycles(p->epoch_cycles), + offChipMemoryLatency(p->offchip_memory_latency), + accessMapTable(p->access_map_table_assoc, p->access_map_table_entries, + p->access_map_table_indexing_policy, + p->access_map_table_replacement_policy, + AccessMapEntry(hotZoneSize / blkSize)), + numGoodPrefetches(0), numTotalPrefetches(0), numRawCacheMisses(0), + numRawCacheHits(0), degree(startDegree), usefulDegree(startDegree), + epochEvent([this]{ processEpochEvent(); }, name()) +{ + fatal_if(!isPowerOf2(hotZoneSize), + "the hot zone size must be a power of 2"); + if (!epochEvent.scheduled()) { + schedule(epochEvent, clockEdge(epochCycles)); + } +} + +void +AccessMapPatternMatchingPrefetcher::processEpochEvent() +{ + schedule(epochEvent, clockEdge(epochCycles)); + double prefetch_accuracy = + ((double) numGoodPrefetches) / ((double) numTotalPrefetches); + double prefetch_coverage = + ((double) numGoodPrefetches) / ((double) numRawCacheMisses); + double cache_hit_ratio = ((double) numRawCacheHits) / + ((double) (numRawCacheHits + numRawCacheMisses)); + double num_requests = (double) (numRawCacheMisses - numGoodPrefetches + + numTotalPrefetches); + double memory_bandwidth = num_requests * offChipMemoryLatency / + clockEdge(epochCycles); + + if (prefetch_coverage > highCoverageThreshold && + (prefetch_accuracy > highAccuracyThreshold || + cache_hit_ratio < lowCacheHitThreshold)) { + usefulDegree += 1; + } else if ((prefetch_coverage < lowCoverageThreshold && + (prefetch_accuracy < lowAccuracyThreshold || + cache_hit_ratio > highCacheHitThreshold)) || + (prefetch_accuracy < lowAccuracyThreshold && + cache_hit_ratio > highCacheHitThreshold)) { + usefulDegree -= 1; + } + degree = std::min((unsigned) memory_bandwidth, usefulDegree); + // reset epoch stats + numGoodPrefetches = 0.0; + numTotalPrefetches = 0.0; + numRawCacheMisses = 0.0; + numRawCacheHits = 0.0; +} + +AccessMapPatternMatchingPrefetcher::AccessMapEntry * +AccessMapPatternMatchingPrefetcher::getAccessMapEntry(Addr am_addr, + bool is_secure) +{ + AccessMapEntry *am_entry = accessMapTable.findEntry(am_addr, is_secure); + if (am_entry != nullptr) { + accessMapTable.accessEntry(am_entry); + } else { + am_entry = accessMapTable.findVictim(am_addr); + assert(am_entry != nullptr); + + accessMapTable.insertEntry(am_addr, is_secure, am_entry); + } + return am_entry; +} + +void +AccessMapPatternMatchingPrefetcher::setEntryState(AccessMapEntry &entry, + Addr block, enum AccessMapState state) +{ + enum AccessMapState old = entry.states[block]; + entry.states[block] = state; + + //do not update stats when initializing + if (state == AM_INIT) return; + + switch (old) { + case AM_INIT: + if (state == AM_PREFETCH) { + numTotalPrefetches += 1; + } else if (state == AM_ACCESS) { + numRawCacheMisses += 1; + } + break; + case AM_PREFETCH: + if (state == AM_ACCESS) { + numGoodPrefetches += 1; + numRawCacheMisses += 1; + } + break; + case AM_ACCESS: + if (state == AM_ACCESS) { + numRawCacheHits += 1; + } + break; + default: + panic("Impossible path\n"); + break; + } +} + +void +AccessMapPatternMatchingPrefetcher::calculatePrefetch(const PrefetchInfo &pfi, + std::vector &addresses) +{ + assert(addresses.empty()); + bool is_secure = pfi.isSecure(); + Addr am_addr = pfi.getAddr() / hotZoneSize; + Addr current_block = (pfi.getAddr() % hotZoneSize) / blkSize; + uint64_t lines_per_zone = hotZoneSize / blkSize; + + // Get the entries of the curent block (am_addr), the previous, and the + // following ones + AccessMapEntry *am_entry_curr = getAccessMapEntry(am_addr, is_secure); + AccessMapEntry *am_entry_prev = (am_addr > 0) ? + getAccessMapEntry(am_addr-1, is_secure) : nullptr; + AccessMapEntry *am_entry_next = (am_addr < (MaxAddr/hotZoneSize)) ? + getAccessMapEntry(am_addr+1, is_secure) : nullptr; + assert(am_entry_curr != am_entry_prev); + assert(am_entry_curr != am_entry_next); + assert(am_entry_prev != am_entry_next); + assert(am_entry_curr != nullptr); + + //Mark the current access as Accessed + setEntryState(*am_entry_curr, current_block, AM_ACCESS); + + /** + * Create a contiguous copy of the 3 entries states. + * With this, we avoid doing boundaries checking in the loop that looks + * for prefetch candidates, mark out of range positions with AM_INVALID + */ + std::vector states(3 * lines_per_zone); + for (unsigned idx = 0; idx < lines_per_zone; idx += 1) { + states[idx] = + am_entry_prev != nullptr ? am_entry_prev->states[idx] : AM_INVALID; + states[idx + lines_per_zone] = am_entry_curr->states[idx]; + states[idx + 2 * lines_per_zone] = + am_entry_next != nullptr ? am_entry_next->states[idx] : AM_INVALID; + } + + /** + * am_entry_prev->states => states[ 0 .. lines_per_zone-1] + * am_entry_curr->states => states[ lines_per_zone .. 2*lines_per_zone-1] + * am_entry_next->states => states[2*lines_per_zone .. 3*lines_per_zone-1] + */ + + // index of the current_block in the new vector + Addr states_current_block = current_block + lines_per_zone; + // consider strides 1..lines_per_zone/2 + for (int stride = 1; stride < lines_per_zone/2; stride += 1) { + // Test accessed positive strides + if (checkCandidate(states, states_current_block, stride)) { + // candidate found, current_block - stride + Addr pf_addr; + if (stride > current_block) { + // The index (current_block - stride) falls in the range of + // the previous zone (am_entry_prev), adjust the address + // accordingly + Addr blk = states_current_block - stride; + pf_addr = (am_addr - 1) * hotZoneSize + blk * blkSize; + setEntryState(*am_entry_prev, blk, AM_PREFETCH); + } else { + // The index (current_block - stride) falls within + // am_entry_curr + Addr blk = current_block - stride; + pf_addr = am_addr * hotZoneSize + blk * blkSize; + setEntryState(*am_entry_curr, blk, AM_PREFETCH); + } + addresses.push_back(AddrPriority(pf_addr, 0)); + if (addresses.size() == degree) { + break; + } + } + + // Test accessed negative strides + if (checkCandidate(states, states_current_block, -stride)) { + // candidate found, current_block + stride + Addr pf_addr; + if (current_block + stride >= lines_per_zone) { + // The index (current_block + stride) falls in the range of + // the next zone (am_entry_next), adjust the address + // accordingly + Addr blk = (states_current_block + stride) % lines_per_zone; + pf_addr = (am_addr + 1) * hotZoneSize + blk * blkSize; + setEntryState(*am_entry_next, blk, AM_PREFETCH); + } else { + // The index (current_block + stride) falls within + // am_entry_curr + Addr blk = current_block + stride; + pf_addr = am_addr * hotZoneSize + blk * blkSize; + setEntryState(*am_entry_curr, blk, AM_PREFETCH); + } + addresses.push_back(AddrPriority(pf_addr, 0)); + if (addresses.size() == degree) { + break; + } + } + } +} + +AccessMapPatternMatchingPrefetcher* +AccessMapPatternMatchingPrefetcherParams::create() +{ + return new AccessMapPatternMatchingPrefetcher(this); +} diff --git a/src/mem/cache/prefetch/access_map_pattern_matching.hh b/src/mem/cache/prefetch/access_map_pattern_matching.hh new file mode 100644 index 000000000..33a20a27e --- /dev/null +++ b/src/mem/cache/prefetch/access_map_pattern_matching.hh @@ -0,0 +1,182 @@ +/** + * Copyright (c) 2018 Metempsy Technology Consulting + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Javier Bueno + */ + + /** + * Implementation of the Access Map Pattern Matching Prefetcher + * + * References: + * Access map pattern matching for high performance data cache prefetch. + * Ishii, Y., Inaba, M., & Hiraki, K. (2011). + * Journal of Instruction-Level Parallelism, 13, 1-24. + */ + +#ifndef __MEM_CACHE_PREFETCH_ACCESS_MAP_PATTERN_MATCHING_HH__ +#define __MEM_CACHE_PREFETCH_ACCESS_MAP_PATTERN_MATCHING_HH__ + +#include "mem/cache/base.hh" +#include "mem/cache/prefetch/associative_set.hh" +#include "mem/cache/prefetch/queued.hh" +#include "mem/packet.hh" + +struct AccessMapPatternMatchingPrefetcherParams; + +class AccessMapPatternMatchingPrefetcher : public QueuedPrefetcher +{ + /** Maximum number of prefetch generated */ + const unsigned startDegree; + /** Amount of memory covered by a hot zone */ + const uint64_t hotZoneSize; + /** A prefetch coverage factor bigger than this is considered high */ + const double highCoverageThreshold; + /** A prefetch coverage factor smaller than this is considered low */ + const double lowCoverageThreshold; + /** A prefetch accuracy factor bigger than this is considered high */ + const double highAccuracyThreshold; + /** A prefetch accuracy factor smaller than this is considered low */ + const double lowAccuracyThreshold; + /** A cache hit ratio bigger than this is considered high */ + const double highCacheHitThreshold; + /** A cache hit ratio smaller than this is considered low */ + const double lowCacheHitThreshold; + /** Cycles in an epoch period */ + const Cycles epochCycles; + /** Off chip memory latency to use for the epoch bandwidth calculation */ + const Tick offChipMemoryLatency; + + /** Data type representing the state of a cacheline in the access map */ + enum AccessMapState + { + AM_INIT, + AM_PREFETCH, + AM_ACCESS, + AM_INVALID + }; + + /** AccessMapEntry data type */ + struct AccessMapEntry : public TaggedEntry + { + /** vector containing the state of the cachelines in this zone */ + std::vector states; + + AccessMapEntry(size_t num_entries) : states(num_entries, AM_INIT) + {} + + /** Reset the entries to their initial values */ + void reset() override + { + for (auto &entry : states) { + entry = AM_INIT; + } + } + }; + /** Access map table */ + AssociativeSet accessMapTable; + + /** + * Number of good prefetches + * - State transitions from PREFETCH to ACCESS + */ + uint64_t numGoodPrefetches; + /** + * Number of prefetches issued + * - State transitions from INIT to PREFETCH + */ + uint64_t numTotalPrefetches; + /** + * Number of raw cache misses + * - State transitions from INIT or PREFETCH to ACCESS + */ + uint64_t numRawCacheMisses; + /** + * Number of raw cache hits + * - State transitions from ACCESS to ACCESS + */ + uint64_t numRawCacheHits; + /** Current degree */ + unsigned degree; + /** Current useful degree */ + unsigned usefulDegree; + + /** + * Given a target cacheline, this function checks if the cachelines + * that follow the provided stride have been accessed. If so, the line + * is considered a good candidate. + * @param states vector containing the states of three contiguous hot zones + * @param current target block (cacheline) + * @param stride access stride to obtain the reference cachelines + * @return true if current is a prefetch candidate + */ + inline bool checkCandidate(std::vector const &states, + Addr current, int stride) const + { + enum AccessMapState tgt = states[current - stride]; + enum AccessMapState s = states[current + stride]; + enum AccessMapState s2 = states[current + 2 * stride]; + enum AccessMapState s2_p1 = states[current + 2 * stride + 1]; + return (tgt != AM_INVALID && + ((s == AM_ACCESS && s2 == AM_ACCESS) || + (s == AM_ACCESS && s2_p1 == AM_ACCESS))); + } + + /** + * Obtain an AccessMapEntry from the AccessMapTable, if the entry is not + * found a new one is initialized and inserted. + * @param am_addr address of the hot zone + * @param is_secure whether the address belongs to the secure memory area + * @return the corresponding entry + */ + AccessMapEntry *getAccessMapEntry(Addr am_addr, bool is_secure); + + /** + * Updates the state of a block within an AccessMapEntry, also updates + * the prefetcher metrics. + * @param entry AccessMapEntry to update + * @param block cacheline within the hot zone + * @param state new state + */ + void setEntryState(AccessMapEntry &entry, Addr block, + enum AccessMapState state); + + /** + * This event constitues the epoch of the statistics that keep track of + * the prefetcher accuracy, when this event triggers, the prefetcher degree + * is adjusted and the statistics counters are reset. + */ + void processEpochEvent(); + EventFunctionWrapper epochEvent; + + public: + AccessMapPatternMatchingPrefetcher( + const AccessMapPatternMatchingPrefetcherParams* p); + ~AccessMapPatternMatchingPrefetcher() {} + void calculatePrefetch(const PrefetchInfo &pfi, + std::vector &addresses) override; +}; +#endif//__MEM_CACHE_PREFETCH_ACCESS_MAP_PATTERN_MATCHING_HH__ -- cgit v1.2.3