From 7ce9fe0af9f04a6f94bec9542af025a043c46b35 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Mon, 15 Jul 2019 16:21:24 +0200 Subject: mem-cache: Factor out CPack's dictionary functionality Factor out dictionary functionality of CPack, so that it can be used easily for other compressors. As a side effect, create an addToDictionary function to allow subclasses to chose how to handle insertion. Change-Id: I02fae4e98b02db5a40467ec470b71020d5e867cb Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/21147 Tested-by: kokoro Maintainer: Nikos Nikoleris Reviewed-by: Nikos Nikoleris Reviewed-by: Bobby R. Bruce --- src/mem/cache/compressors/Compressors.py | 10 +- src/mem/cache/compressors/SConscript | 1 + src/mem/cache/compressors/cpack.cc | 168 +--------- src/mem/cache/compressors/cpack.hh | 329 ++------------------ src/mem/cache/compressors/dictionary_compressor.cc | 212 +++++++++++++ src/mem/cache/compressors/dictionary_compressor.hh | 346 +++++++++++++++++++++ 6 files changed, 609 insertions(+), 457 deletions(-) create mode 100644 src/mem/cache/compressors/dictionary_compressor.cc create mode 100644 src/mem/cache/compressors/dictionary_compressor.hh diff --git a/src/mem/cache/compressors/Compressors.py b/src/mem/cache/compressors/Compressors.py index 4f86ec282..b6315ad8d 100644 --- a/src/mem/cache/compressors/Compressors.py +++ b/src/mem/cache/compressors/Compressors.py @@ -49,7 +49,15 @@ class BDI(BaseCacheCompressor): "combinations of base and delta for the compressors. False if using" \ "only the lowest possible delta size for each base size."); -class CPack(BaseCacheCompressor): +class DictionaryCompressor(BaseCacheCompressor): + type = 'DictionaryCompressor' + abstract = True + cxx_header = "mem/cache/compressors/dictionary_compressor.hh" + + dictionary_size = Param.Int(Parent.cache_line_size, + "Number of dictionary entries") + +class CPack(DictionaryCompressor): type = 'CPack' cxx_class = 'CPack' cxx_header = "mem/cache/compressors/cpack.hh" diff --git a/src/mem/cache/compressors/SConscript b/src/mem/cache/compressors/SConscript index 0b3ba6451..68cf7ef10 100644 --- a/src/mem/cache/compressors/SConscript +++ b/src/mem/cache/compressors/SConscript @@ -35,3 +35,4 @@ SimObject('Compressors.py') Source('base.cc') Source('bdi.cc') Source('cpack.cc') +Source('dictionary_compressor.cc') diff --git a/src/mem/cache/compressors/cpack.cc b/src/mem/cache/compressors/cpack.cc index 625bedcd8..9e0cc2d42 100644 --- a/src/mem/cache/compressors/cpack.cc +++ b/src/mem/cache/compressors/cpack.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Inria + * Copyright (c) 2018-2019 Inria * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,116 +34,25 @@ #include "mem/cache/compressors/cpack.hh" -#include -#include - -#include "debug/CacheComp.hh" #include "params/CPack.hh" -CPack::CompData::CompData(const std::size_t dictionary_size) - : CompressionData() -{ -} - -CPack::CompData::~CompData() -{ -} - CPack::CPack(const Params *p) - : BaseCacheCompressor(p), dictionarySize(2*blkSize/8) + : DictionaryCompressor(p) { - dictionary.resize(dictionarySize); - - resetDictionary(); } void -CPack::resetDictionary() +CPack::addToDictionary(std::array data) { - // Reset number of valid entries - numEntries = 0; - - // Set all entries as 0 - std::array zero_word = {0, 0, 0, 0}; - std::fill(dictionary.begin(), dictionary.end(), zero_word); -} - -std::unique_ptr -CPack::compressWord(const uint32_t data) -{ - // Split data in bytes - const std::array bytes = { - static_cast(data & 0xFF), - static_cast((data >> 8) & 0xFF), - static_cast((data >> 16) & 0xFF), - static_cast((data >> 24) & 0xFF) - }; - - // Start as a no-match pattern. A negative match location is used so that - // patterns that depend on the dictionary entry don't match - std::unique_ptr pattern = - PatternFactory::getPattern(bytes, {0, 0, 0, 0}, -1); - - // Search for word on dictionary - for (std::size_t i = 0; i < numEntries; i++) { - // Try matching input with possible patterns - std::unique_ptr temp_pattern = - PatternFactory::getPattern(bytes, dictionary[i], i); - - // Check if found pattern is better than previous - if (temp_pattern->getSizeBits() < pattern->getSizeBits()) { - pattern = std::move(temp_pattern); - } - } - - // Update stats - patternStats[pattern->getPatternNumber()]++; - - // Push into dictionary - if ((numEntries < dictionarySize) && pattern->shouldAllocate()) { - dictionary[numEntries++] = bytes; - } - - return pattern; + assert(numEntries < dictionarySize); + dictionary[numEntries++] = data; } std::unique_ptr CPack::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) { - std::unique_ptr comp_data = - std::unique_ptr(new CompData(dictionarySize)); - - // Compression size - std::size_t size = 0; - - // Reset dictionary - resetDictionary(); - - // Compress every word sequentially - for (std::size_t i = 0; i < blkSize/8; i++) { - const uint32_t first_word = ((data[i])&0xFFFFFFFF00000000) >> 32; - const uint32_t second_word = (data[i])&0x00000000FFFFFFFF; - - // Compress both words - std::unique_ptr first_pattern = compressWord(first_word); - std::unique_ptr second_pattern = compressWord(second_word); - - // Update total line compression size - size += first_pattern->getSizeBits() + second_pattern->getSizeBits(); - - // Print debug information - DPRINTF(CacheComp, "Compressed %08x to %s\n", first_word, - first_pattern->print()); - DPRINTF(CacheComp, "Compressed %08x to %s\n", second_word, - second_pattern->print()); - - // Append to pattern list - comp_data->entries.push_back(std::move(first_pattern)); - comp_data->entries.push_back(std::move(second_pattern)); - } - - // Set final compression size - comp_data->setSizeBits(size); + std::unique_ptr comp_data = + DictionaryCompressor::compress(data); // Set compression latency (Accounts for pattern matching, length // generation, packaging and shifting) @@ -156,69 +65,6 @@ CPack::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) return std::move(comp_data); } -uint32_t -CPack::decompressWord(const Pattern* pattern) -{ - // Search for matching entry - std::vector>::iterator entry_it = - dictionary.begin(); - std::advance(entry_it, pattern->getMatchLocation()); - - // Decompress the match. If the decompressed value must be added to - // the dictionary, do it - const std::array data = pattern->decompress(*entry_it); - if (pattern->shouldAllocate()) { - dictionary[numEntries++] = data; - } - - // Return word - return (((((data[3] << 8) | data[2]) << 8) | data[1]) << 8) | data[0]; -} - -void -CPack::decompress(const CompressionData* comp_data, uint64_t* data) -{ - const CompData* cpack_comp_data = static_cast(comp_data); - - // Reset dictionary - resetDictionary(); - - // Decompress every entry sequentially - std::vector decomp_words; - for (const auto& entry : cpack_comp_data->entries) { - const uint32_t word = decompressWord(&*entry); - decomp_words.push_back(word); - - // Print debug information - DPRINTF(CacheComp, "Decompressed %s to %x\n", entry->print(), word); - } - - // Concatenate the decompressed words to generate the cache lines - for (std::size_t i = 0; i < blkSize/8; i++) { - data[i] = (static_cast(decomp_words[2*i]) << 32) | - decomp_words[2*i+1]; - } -} - -void -CPack::regStats() -{ - BaseCacheCompressor::regStats(); - - // We store the frequency of each pattern - patternStats - .init(Pattern::getNumPatterns()) - .name(name() + ".pattern") - .desc("Number of data entries that were compressed to this pattern.") - ; - - for (unsigned i = 0; i < Pattern::getNumPatterns(); ++i) { - patternStats.subname(i, Pattern::getName(i)); - patternStats.subdesc(i, "Number of data entries that match pattern " + - Pattern::getName(i)); - } -} - CPack* CPackParams::create() { diff --git a/src/mem/cache/compressors/cpack.hh b/src/mem/cache/compressors/cpack.hh index 1a271eeae..bd258b372 100644 --- a/src/mem/cache/compressors/cpack.hh +++ b/src/mem/cache/compressors/cpack.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Inria + * Copyright (c) 2018-2019 Inria * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,14 +31,6 @@ /** @file * Definition of CPack compression, from "C-Pack: A High-Performance * Microprocessor Cache Compression Algorithm". - * - * The dictionary is composed of 32-bit entries. - * - * The patterns are implemented as individual classes that have a checking - * function isPattern(), to determine if the data fits the pattern, and a - * decompress() function, which decompresses the contents of a pattern. - * Every new pattern must inherit from the Pattern class and be added to the - * patternFactory. */ #ifndef __MEM_CACHE_COMPRESSORS_CPACK_HH__ @@ -48,23 +40,16 @@ #include #include #include -#include #include "base/types.hh" -#include "mem/cache/compressors/base.hh" +#include "mem/cache/compressors/dictionary_compressor.hh" struct CPackParams; -class CPack : public BaseCacheCompressor +class CPack : public DictionaryCompressor { private: - /** - * Compression data for CPack. It consists of a vector of patterns. - */ - class CompData; - // Forward declaration of all possible patterns - class Pattern; class PatternZZZZ; class PatternXXXX; class PatternMMMM; @@ -73,46 +58,14 @@ class CPack : public BaseCacheCompressor class PatternMMMX; /** - * Create a factory to determine if input matches a pattern. The if else - * chains are constructed by recursion. The patterns should be explored - * sorted by size for correct behaviour. - */ - template - struct Factory - { - static std::unique_ptr getPattern( - const std::array& bytes, - const std::array& dict_bytes, const int match_location) - { - // If match this pattern, instantiate it. If a negative match - // location is used, the patterns that use the dictionary bytes - // must return false. This is used when there are no dictionary - // entries yet - if (Head::isPattern(bytes, dict_bytes, match_location)) { - return std::unique_ptr( - new Head(bytes, match_location)); - // Otherwise, go for next pattern - } else { - return Factory::getPattern(bytes, dict_bytes, - match_location); - } - } - }; - - /** - * Specialization to end the recursion. + * The patterns proposed in the paper. Each letter represents a byte: + * Z is a null byte, M is a dictionary match, X is a new value. + * These are used as indexes to reference the pattern data. If a new + * pattern is added, it must be done before NUM_PATTERNS. */ - template - struct Factory - { - static std::unique_ptr getPattern( - const std::array& bytes, - const std::array& dict_bytes, const int match_location) - { - // Instantiate last pattern. Should be the XXXX pattern. - return std::unique_ptr(new Head(bytes, match_location)); - } - }; + typedef enum { + ZZZZ, XXXX, MMMM, MMXX, ZZZX, MMMX, NUM_PATTERNS + } PatternNumber; /** * Convenience factory declaration. The templates must be organized by @@ -121,55 +74,28 @@ class CPack : public BaseCacheCompressor using PatternFactory = Factory; - /** - * The dictionary. - */ - std::vector> dictionary; - - /** - * Dictionary size. - */ - const std::size_t dictionarySize; - - /** - * Number of valid entries in the dictionary. - */ - std::size_t numEntries; - - /** - * @defgroup CompressionStats Compression specific statistics. - * @{ - */ + uint64_t getNumPatterns() const override { return NUM_PATTERNS; } - /** - * Number of data entries that were compressed to each pattern. - */ - Stats::Vector patternStats; - - /** - * @} - */ + std::string + getName(int number) const override + { + static std::map patternNames = { + {ZZZZ, "ZZZZ"}, {XXXX, "XXXX"}, {MMMM, "MMMM"}, + {MMXX, "MMXX"}, {ZZZX, "ZZZX"}, {MMMX, "MMMX"} + }; - /** - * Compress data. - * - * @param data Data to be compressed. - * @return The pattern this data matches. - */ - std::unique_ptr compressWord(const uint32_t data); + return patternNames[number]; + }; - /** - * Decompress a word. - * - * @param pattern The pattern to be decompressed. - * @return The decompressed word. - */ - uint32_t decompressWord(const Pattern* pattern); + std::unique_ptr getPattern( + const std::array& bytes, + const std::array& dict_bytes, + const int match_location) const override + { + return PatternFactory::getPattern(bytes, dict_bytes, match_location); + } - /** - * Clear all dictionary entries. - */ - void resetDictionary(); + void addToDictionary(std::array data) override; /** * Apply compression. @@ -182,14 +108,6 @@ class CPack : public BaseCacheCompressor std::unique_ptr compress( const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) override; - /** - * Decompress data. - * - * @param comp_data Compressed cache line. - * @param data The cache line to be decompressed. - */ - void decompress(const CompressionData* comp_data, uint64_t* data) override; - public: /** Convenience typedef. */ typedef CPackParams Params; @@ -203,167 +121,9 @@ class CPack : public BaseCacheCompressor * Default destructor. */ ~CPack() {}; - - /** - * Register local statistics. - */ - void regStats() override; -}; - -/** - * The compressed data is composed of multiple pattern entries. To add a new - * pattern one should inherit from this class and implement isPattern() and - * decompress. Then the new pattern must be added to the PatternFactory - * declaration in crescent order of size (in the CPack class). The pattern - * must be also added to the Name enum in the CPack::Pattern class before - * NUM_PATTERNS. - */ -class CPack::Pattern -{ - protected: - - /** - * The patterns proposed in the paper. Each letter represents a byte: - * Z is a null byte, M is a dictionary match, X is a new value. - * These are used as indexes to reference the pattern data. If a new - * pattern is added, it must be done before NUM_PATTERNS. - */ - typedef enum { - ZZZZ, XXXX, MMMM, MMXX, ZZZX, MMMX, NUM_PATTERNS - } PatternNumber; - - /** - * Pattern enum number. - */ - const PatternNumber patternNumber; - - /** - * Code associated to the pattern. - */ - const uint8_t code; - - /** - * Length, in bits, of the code and match location. - */ - const uint8_t length; - - /** - * Number of unmatched bytes; - */ - const uint8_t numUnmatchedBytes; - - /** - * Index representing the the match location. - */ - const int matchLocation; - - /** - * Wether the pattern allocates a dictionary entry or not. - */ - const bool allocate; - - /** - * Get code of this pattern. - * - * @return The code. - */ - uint8_t getCode() const { return code; } - - public: - /** - * Default constructor. - * - * @param number Pattern number. - * @param code Code associated to this pattern. - * @param metadata_length Length, in bits, of the code and match location. - * @param num_unmatched_bytes Number of unmatched bytes. - * @param match_location Index of the match location. - */ - Pattern(const PatternNumber number, const uint64_t code, - const uint64_t metadata_length, const uint64_t num_unmatched_bytes, - const int match_location, const bool allocate = true) - : patternNumber(number), code(code), length(metadata_length), - numUnmatchedBytes(num_unmatched_bytes), - matchLocation(match_location), allocate(allocate) {}; - - /** - * Default destructor. - */ - virtual ~Pattern() = default; - - /** - * Trick function to get the number of patterns. - * - * @return The number of defined patterns. - */ - static uint64_t getNumPatterns() { return NUM_PATTERNS; }; - - /** - * Get enum number associated to this pattern. - * - * @return The pattern enum number. - */ - PatternNumber getPatternNumber() const { return patternNumber; }; - - /** - * Get meta-name assigned to the given pattern. - * - * @param number The number of the pattern. - * @return The meta-name of the pattern. - */ - static std::string getName(int number) - { - static std::map patternNames = { - {ZZZZ, "ZZZZ"}, {XXXX, "XXXX"}, {MMMM, "MMMM"}, - {MMXX, "MMXX"}, {ZZZX, "ZZZX"}, {MMMX, "MMMX"} - }; - - return patternNames[(PatternNumber)number]; - }; - - /** - * Get the index of the dictionary match location. - * - * @return The index of the match location. - */ - uint8_t getMatchLocation() const { return matchLocation; } - - /** - * Get size, in bits, of the pattern (excluding prefix). Corresponds to - * unmatched_data_size + code_length. - * - * @return The size. - */ - std::size_t getSizeBits() const { - return numUnmatchedBytes*CHAR_BIT + length; - } - - /** - * Determine if pattern allocates a dictionary entry. - * - * @return True if should allocate a dictionary entry. - */ - bool shouldAllocate() const { - return allocate; - } - - std::string print() const { - return csprintf("pattern %s (encoding %x, size %u bits)", - getName(patternNumber), getCode(), getSizeBits()); - } - - /** - * Decompress the pattern. Each pattern has its own way of interpreting - * its data. - * - * @param dict_bytes The bytes in the corresponding matching entry. - * @return The decompressed pattern. - */ - virtual std::array decompress( - const std::array dict_bytes) const = 0; }; -class CPack::PatternZZZZ : public Pattern +class CPack::PatternZZZZ : public DictionaryCompressor::Pattern { public: PatternZZZZ(const std::array bytes, const int match_location) @@ -384,7 +144,7 @@ class CPack::PatternZZZZ : public Pattern } }; -class CPack::PatternXXXX : public Pattern +class CPack::PatternXXXX : public DictionaryCompressor::Pattern { private: /** @@ -412,7 +172,7 @@ class CPack::PatternXXXX : public Pattern } }; -class CPack::PatternMMMM : public Pattern +class CPack::PatternMMMM : public DictionaryCompressor::Pattern { public: PatternMMMM(const std::array bytes, const int match_location) @@ -432,7 +192,7 @@ class CPack::PatternMMMM : public Pattern } }; -class CPack::PatternMMXX : public Pattern +class CPack::PatternMMXX : public DictionaryCompressor::Pattern { private: /** @@ -464,7 +224,7 @@ class CPack::PatternMMXX : public Pattern } }; -class CPack::PatternZZZX : public Pattern +class CPack::PatternZZZX : public DictionaryCompressor::Pattern { private: /** @@ -491,7 +251,7 @@ class CPack::PatternZZZX : public Pattern } }; -class CPack::PatternMMMX : public Pattern +class CPack::PatternMMMX : public DictionaryCompressor::Pattern { private: /** @@ -520,25 +280,4 @@ class CPack::PatternMMMX : public Pattern } }; -class CPack::CompData : public CompressionData -{ - public: - /** - * The patterns matched in the original line. - */ - std::vector> entries; - - /** - * Default constructor. - * - * @param dictionary_size Number of entries in the dictionary. - */ - CompData(const std::size_t dictionary_size); - - /** - * Default destructor. - */ - ~CompData(); -}; - #endif //__MEM_CACHE_COMPRESSORS_CPACK_HH__ diff --git a/src/mem/cache/compressors/dictionary_compressor.cc b/src/mem/cache/compressors/dictionary_compressor.cc new file mode 100644 index 000000000..c53d14a9b --- /dev/null +++ b/src/mem/cache/compressors/dictionary_compressor.cc @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2018-2019 Inria + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Daniel Carvalho + */ + +/** @file + * Implementation of a dictionary based cache compressor. + */ + +#include "mem/cache/compressors/dictionary_compressor.hh" + +#include + +#include "debug/CacheComp.hh" +#include "params/DictionaryCompressor.hh" + +DictionaryCompressor::CompData::CompData(const std::size_t dictionary_size) + : CompressionData() +{ +} + +DictionaryCompressor::CompData::~CompData() +{ +} + +DictionaryCompressor::DictionaryCompressor(const Params *p) + : BaseCacheCompressor(p), dictionarySize(p->dictionary_size) +{ + dictionary.resize(dictionarySize); + + resetDictionary(); +} + +void +DictionaryCompressor::resetDictionary() +{ + // Reset number of valid entries + numEntries = 0; + + // Set all entries as 0 + std::array zero_word = {0, 0, 0, 0}; + std::fill(dictionary.begin(), dictionary.end(), zero_word); +} + +std::unique_ptr +DictionaryCompressor::compressWord(const uint32_t data) +{ + // Split data in bytes + const std::array bytes = { + static_cast(data & 0xFF), + static_cast((data >> 8) & 0xFF), + static_cast((data >> 16) & 0xFF), + static_cast((data >> 24) & 0xFF) + }; + + // Start as a no-match pattern. A negative match location is used so that + // patterns that depend on the dictionary entry don't match + std::unique_ptr pattern = getPattern(bytes, {0, 0, 0, 0}, -1); + + // Search for word on dictionary + for (std::size_t i = 0; i < numEntries; i++) { + // Try matching input with possible patterns + std::unique_ptr temp_pattern = + getPattern(bytes, dictionary[i], i); + + // Check if found pattern is better than previous + if (temp_pattern->getSizeBits() < pattern->getSizeBits()) { + pattern = std::move(temp_pattern); + } + } + + // Update stats + patternStats[pattern->getPatternNumber()]++; + + // Push into dictionary + if (pattern->shouldAllocate()) { + addToDictionary(bytes); + } + + return pattern; +} + +std::unique_ptr +DictionaryCompressor::compress(const uint64_t* data) +{ + std::unique_ptr comp_data = + std::unique_ptr(new CompData(dictionarySize)); + + // Compression size + std::size_t size = 0; + + // Reset dictionary + resetDictionary(); + + // Compress every word sequentially + for (std::size_t i = 0; i < blkSize/8; i++) { + const uint32_t first_word = ((data[i])&0xFFFFFFFF00000000) >> 32; + const uint32_t second_word = (data[i])&0x00000000FFFFFFFF; + + // Compress both words + std::unique_ptr first_pattern = compressWord(first_word); + std::unique_ptr second_pattern = compressWord(second_word); + + // Update total line compression size + size += first_pattern->getSizeBits() + second_pattern->getSizeBits(); + + // Print debug information + DPRINTF(CacheComp, "Compressed %08x to %s\n", first_word, + first_pattern->print()); + DPRINTF(CacheComp, "Compressed %08x to %s\n", second_word, + second_pattern->print()); + + // Append to pattern list + comp_data->entries.push_back(std::move(first_pattern)); + comp_data->entries.push_back(std::move(second_pattern)); + } + + // Set final compression size + comp_data->setSizeBits(size); + + // Return compressed line + return std::move(comp_data); +} + +uint32_t +DictionaryCompressor::decompressWord(const Pattern* pattern) +{ + // Search for matching entry + std::vector>::iterator entry_it = + dictionary.begin(); + std::advance(entry_it, pattern->getMatchLocation()); + + // Decompress the match. If the decompressed value must be added to + // the dictionary, do it + const std::array data = pattern->decompress(*entry_it); + if (pattern->shouldAllocate()) { + addToDictionary(data); + } + + // Return word + return (((((data[3] << 8) | data[2]) << 8) | data[1]) << 8) | data[0]; +} + +void +DictionaryCompressor::decompress(const CompressionData* comp_data, + uint64_t* data) +{ + const CompData* casted_comp_data = static_cast(comp_data); + + // Reset dictionary + resetDictionary(); + + // Decompress every entry sequentially + std::vector decomp_words; + for (const auto& entry : casted_comp_data->entries) { + const uint32_t word = decompressWord(&*entry); + decomp_words.push_back(word); + + // Print debug information + DPRINTF(CacheComp, "Decompressed %s to %x\n", entry->print(), word); + } + + // Concatenate the decompressed words to generate the cache lines + for (std::size_t i = 0; i < blkSize/8; i++) { + data[i] = (static_cast(decomp_words[2*i]) << 32) | + decomp_words[2*i+1]; + } +} + +void +DictionaryCompressor::regStats() +{ + BaseCacheCompressor::regStats(); + + // We store the frequency of each pattern + patternStats + .init(getNumPatterns()) + .name(name() + ".pattern") + .desc("Number of data entries that were compressed to this pattern.") + ; + + for (unsigned i = 0; i < getNumPatterns(); ++i) { + patternStats.subname(i, getName(i)); + patternStats.subdesc(i, "Number of data entries that match pattern " + + getName(i)); + } +} diff --git a/src/mem/cache/compressors/dictionary_compressor.hh b/src/mem/cache/compressors/dictionary_compressor.hh new file mode 100644 index 000000000..87e69ccc8 --- /dev/null +++ b/src/mem/cache/compressors/dictionary_compressor.hh @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2018-2019 Inria + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Daniel Carvalho + */ + +/** @file + * Definition of a dictionary based cache compressor. Each entry is compared + * against a dictionary to search for matches. + * + * The dictionary is composed of 32-bit entries, and the comparison is done + * byte per byte. + * + * The patterns are implemented as individual classes that have a checking + * function isPattern(), to determine if the data fits the pattern, and a + * decompress() function, which decompresses the contents of a pattern. + * Every new pattern must inherit from the Pattern class and be added to the + * patternFactory. + */ + +#ifndef __MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__ +#define __MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__ + +#include +#include +#include +#include +#include +#include + +#include "base/types.hh" +#include "mem/cache/compressors/base.hh" + +struct DictionaryCompressorParams; + +class DictionaryCompressor : public BaseCacheCompressor +{ + protected: + /** + * Compression data for the dictionary compressor. It consists of a vector + * of patterns. + */ + class CompData; + + // Forward declaration of a pattern + class Pattern; + + /** + * Create a factory to determine if input matches a pattern. The if else + * chains are constructed by recursion. The patterns should be explored + * sorted by size for correct behaviour. + */ + template + struct Factory + { + static std::unique_ptr getPattern( + const std::array& bytes, + const std::array& dict_bytes, const int match_location) + { + // If match this pattern, instantiate it. If a negative match + // location is used, the patterns that use the dictionary bytes + // must return false. This is used when there are no dictionary + // entries yet + if (Head::isPattern(bytes, dict_bytes, match_location)) { + return std::unique_ptr( + new Head(bytes, match_location)); + // Otherwise, go for next pattern + } else { + return Factory::getPattern(bytes, dict_bytes, + match_location); + } + } + }; + + /** Specialization to end the recursion. */ + template + struct Factory + { + static std::unique_ptr getPattern( + const std::array& bytes, + const std::array& dict_bytes, const int match_location) + { + // Instantiate last pattern. Should be the XXXX pattern. + return std::unique_ptr(new Head(bytes, match_location)); + } + }; + + /** The dictionary. */ + std::vector> dictionary; + + /** Dictionary size. */ + const std::size_t dictionarySize; + + /** Number of valid entries in the dictionary. */ + std::size_t numEntries; + + /** + * @defgroup CompressionStats Compression specific statistics. + * @{ + */ + + /** + * Number of data entries that were compressed to each pattern. + */ + Stats::Vector patternStats; + + /** + * @} + */ + + /** + * Trick function to get the number of patterns. + * + * @return The number of defined patterns. + */ + virtual uint64_t getNumPatterns() const = 0; + + /** + * Get meta-name assigned to the given pattern. + * + * @param number The number of the pattern. + * @return The meta-name of the pattern. + */ + virtual std::string getName(int number) const = 0; + + /** + * Since the factory cannot be instantiated here, classes that inherit + * from this base class have to implement the call to their factory's + * getPattern. + */ + virtual std::unique_ptr getPattern( + const std::array& bytes, + const std::array& dict_bytes, + const int match_location) const = 0; + + /** + * Compress data. + * + * @param data Data to be compressed. + * @return The pattern this data matches. + */ + std::unique_ptr compressWord(const uint32_t data); + + /** + * Decompress a word. + * + * @param pattern The pattern to be decompressed. + * @return The decompressed word. + */ + uint32_t decompressWord(const Pattern* pattern); + + /** Clear all dictionary entries. */ + void resetDictionary(); + + /** + * Add an entry to the dictionary. + * + * @param data The new entry. + */ + virtual void addToDictionary(std::array data) = 0; + + /** + * Apply compression. + * + * @param data The cache line to be compressed. + * @return Cache line after compression. + */ + std::unique_ptr compress( + const uint64_t* data); + + /** + * Decompress data. + * + * @param comp_data Compressed cache line. + * @param data The cache line to be decompressed. + */ + void decompress(const CompressionData* comp_data, uint64_t* data) override; + + public: + /** Convenience typedef. */ + typedef DictionaryCompressorParams Params; + + /** Default constructor. */ + DictionaryCompressor(const Params *p); + + /** Default destructor. */ + ~DictionaryCompressor() {}; + + /** Register local statistics. */ + void regStats() override; +}; + +/** + * The compressed data is composed of multiple pattern entries. To add a new + * pattern one should inherit from this class and implement isPattern() and + * decompress(). Then the new pattern must be added to the PatternFactory + * declaration in crescent order of size (in the DictionaryCompressor class). + */ +class DictionaryCompressor::Pattern +{ + protected: + /** Pattern enum number. */ + const int patternNumber; + + /** Code associated to the pattern. */ + const uint8_t code; + + /** Length, in bits, of the code and match location. */ + const uint8_t length; + + /** Number of unmatched bytes. */ + const uint8_t numUnmatchedBytes; + + /** Index representing the the match location. */ + const int matchLocation; + + /** Wether the pattern allocates a dictionary entry or not. */ + const bool allocate; + + public: + /** + * Default constructor. + * + * @param number Pattern number. + * @param code Code associated to this pattern. + * @param metadata_length Length, in bits, of the code and match location. + * @param num_unmatched_bytes Number of unmatched bytes. + * @param match_location Index of the match location. + */ + Pattern(const int number, const uint64_t code, + const uint64_t metadata_length, const uint64_t num_unmatched_bytes, + const int match_location, const bool allocate = true) + : patternNumber(number), code(code), length(metadata_length), + numUnmatchedBytes(num_unmatched_bytes), + matchLocation(match_location), allocate(allocate) + { + } + + /** Default destructor. */ + virtual ~Pattern() = default; + + /** + * Get enum number associated to this pattern. + * + * @return The pattern enum number. + */ + int getPatternNumber() const { return patternNumber; }; + + /** + * Get code of this pattern. + * + * @return The code. + */ + uint8_t getCode() const { return code; } + + /** + * Get the index of the dictionary match location. + * + * @return The index of the match location. + */ + uint8_t getMatchLocation() const { return matchLocation; } + + /** + * Get size, in bits, of the pattern (excluding prefix). Corresponds to + * unmatched_data_size + code_length. + * + * @return The size. + */ + std::size_t + getSizeBits() const + { + return numUnmatchedBytes*CHAR_BIT + length; + } + + /** + * Determine if pattern allocates a dictionary entry. + * + * @return True if should allocate a dictionary entry. + */ + bool shouldAllocate() const { return allocate; } + + /** + * Extract pattern's information to a string. + * + * @return A string containing the relevant pattern metadata. + */ + std::string + print() const + { + return csprintf("pattern %s (encoding %x, size %u bits)", + getPatternNumber(), getCode(), getSizeBits()); + } + + /** + * Decompress the pattern. Each pattern has its own way of interpreting + * its data. + * + * @param dict_bytes The bytes in the corresponding matching entry. + * @return The decompressed pattern. + */ + virtual std::array decompress( + const std::array dict_bytes) const = 0; +}; + +class DictionaryCompressor::CompData : public CompressionData +{ + public: + /** The patterns matched in the original line. */ + std::vector> entries; + + /** + * Default constructor. + * + * @param dictionary_size Number of entries in the dictionary. + */ + CompData(const std::size_t dictionary_size); + + /** Default destructor. */ + ~CompData(); +}; + +#endif //__MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__ -- cgit v1.2.3