summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel R. Carvalho <odanrc@yahoo.com.br>2019-07-15 16:21:24 +0200
committerDaniel Carvalho <odanrc@yahoo.com.br>2019-10-29 21:32:02 +0000
commit7ce9fe0af9f04a6f94bec9542af025a043c46b35 (patch)
tree39ee68ae2ad98e3bf1d419686c5721d2715c5adf
parent70dc35a659d024a4362c7b3f08887f04285b34f9 (diff)
downloadgem5-7ce9fe0af9f04a6f94bec9542af025a043c46b35.tar.xz
mem-cache: Factor out CPack's dictionary functionality
Factor out dictionary functionality of CPack, so that it can be used easily for other compressors. As a side effect, create an addToDictionary function to allow subclasses to chose how to handle insertion. Change-Id: I02fae4e98b02db5a40467ec470b71020d5e867cb Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/21147 Tested-by: kokoro <noreply+kokoro@google.com> Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com> Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com> Reviewed-by: Bobby R. Bruce <bbruce@ucdavis.edu>
-rw-r--r--src/mem/cache/compressors/Compressors.py10
-rw-r--r--src/mem/cache/compressors/SConscript1
-rw-r--r--src/mem/cache/compressors/cpack.cc168
-rw-r--r--src/mem/cache/compressors/cpack.hh329
-rw-r--r--src/mem/cache/compressors/dictionary_compressor.cc212
-rw-r--r--src/mem/cache/compressors/dictionary_compressor.hh346
6 files changed, 609 insertions, 457 deletions
diff --git a/src/mem/cache/compressors/Compressors.py b/src/mem/cache/compressors/Compressors.py
index 4f86ec282..b6315ad8d 100644
--- a/src/mem/cache/compressors/Compressors.py
+++ b/src/mem/cache/compressors/Compressors.py
@@ -49,7 +49,15 @@ class BDI(BaseCacheCompressor):
"combinations of base and delta for the compressors. False if using" \
"only the lowest possible delta size for each base size.");
-class CPack(BaseCacheCompressor):
+class DictionaryCompressor(BaseCacheCompressor):
+ type = 'DictionaryCompressor'
+ abstract = True
+ cxx_header = "mem/cache/compressors/dictionary_compressor.hh"
+
+ dictionary_size = Param.Int(Parent.cache_line_size,
+ "Number of dictionary entries")
+
+class CPack(DictionaryCompressor):
type = 'CPack'
cxx_class = 'CPack'
cxx_header = "mem/cache/compressors/cpack.hh"
diff --git a/src/mem/cache/compressors/SConscript b/src/mem/cache/compressors/SConscript
index 0b3ba6451..68cf7ef10 100644
--- a/src/mem/cache/compressors/SConscript
+++ b/src/mem/cache/compressors/SConscript
@@ -35,3 +35,4 @@ SimObject('Compressors.py')
Source('base.cc')
Source('bdi.cc')
Source('cpack.cc')
+Source('dictionary_compressor.cc')
diff --git a/src/mem/cache/compressors/cpack.cc b/src/mem/cache/compressors/cpack.cc
index 625bedcd8..9e0cc2d42 100644
--- a/src/mem/cache/compressors/cpack.cc
+++ b/src/mem/cache/compressors/cpack.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Inria
+ * Copyright (c) 2018-2019 Inria
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,116 +34,25 @@
#include "mem/cache/compressors/cpack.hh"
-#include <algorithm>
-#include <cstdint>
-
-#include "debug/CacheComp.hh"
#include "params/CPack.hh"
-CPack::CompData::CompData(const std::size_t dictionary_size)
- : CompressionData()
-{
-}
-
-CPack::CompData::~CompData()
-{
-}
-
CPack::CPack(const Params *p)
- : BaseCacheCompressor(p), dictionarySize(2*blkSize/8)
+ : DictionaryCompressor(p)
{
- dictionary.resize(dictionarySize);
-
- resetDictionary();
}
void
-CPack::resetDictionary()
+CPack::addToDictionary(std::array<uint8_t, 4> data)
{
- // Reset number of valid entries
- numEntries = 0;
-
- // Set all entries as 0
- std::array<uint8_t, 4> zero_word = {0, 0, 0, 0};
- std::fill(dictionary.begin(), dictionary.end(), zero_word);
-}
-
-std::unique_ptr<CPack::Pattern>
-CPack::compressWord(const uint32_t data)
-{
- // Split data in bytes
- const std::array<uint8_t, 4> bytes = {
- static_cast<uint8_t>(data & 0xFF),
- static_cast<uint8_t>((data >> 8) & 0xFF),
- static_cast<uint8_t>((data >> 16) & 0xFF),
- static_cast<uint8_t>((data >> 24) & 0xFF)
- };
-
- // Start as a no-match pattern. A negative match location is used so that
- // patterns that depend on the dictionary entry don't match
- std::unique_ptr<Pattern> pattern =
- PatternFactory::getPattern(bytes, {0, 0, 0, 0}, -1);
-
- // Search for word on dictionary
- for (std::size_t i = 0; i < numEntries; i++) {
- // Try matching input with possible patterns
- std::unique_ptr<Pattern> temp_pattern =
- PatternFactory::getPattern(bytes, dictionary[i], i);
-
- // Check if found pattern is better than previous
- if (temp_pattern->getSizeBits() < pattern->getSizeBits()) {
- pattern = std::move(temp_pattern);
- }
- }
-
- // Update stats
- patternStats[pattern->getPatternNumber()]++;
-
- // Push into dictionary
- if ((numEntries < dictionarySize) && pattern->shouldAllocate()) {
- dictionary[numEntries++] = bytes;
- }
-
- return pattern;
+ assert(numEntries < dictionarySize);
+ dictionary[numEntries++] = data;
}
std::unique_ptr<BaseCacheCompressor::CompressionData>
CPack::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat)
{
- std::unique_ptr<CompData> comp_data =
- std::unique_ptr<CompData>(new CompData(dictionarySize));
-
- // Compression size
- std::size_t size = 0;
-
- // Reset dictionary
- resetDictionary();
-
- // Compress every word sequentially
- for (std::size_t i = 0; i < blkSize/8; i++) {
- const uint32_t first_word = ((data[i])&0xFFFFFFFF00000000) >> 32;
- const uint32_t second_word = (data[i])&0x00000000FFFFFFFF;
-
- // Compress both words
- std::unique_ptr<Pattern> first_pattern = compressWord(first_word);
- std::unique_ptr<Pattern> second_pattern = compressWord(second_word);
-
- // Update total line compression size
- size += first_pattern->getSizeBits() + second_pattern->getSizeBits();
-
- // Print debug information
- DPRINTF(CacheComp, "Compressed %08x to %s\n", first_word,
- first_pattern->print());
- DPRINTF(CacheComp, "Compressed %08x to %s\n", second_word,
- second_pattern->print());
-
- // Append to pattern list
- comp_data->entries.push_back(std::move(first_pattern));
- comp_data->entries.push_back(std::move(second_pattern));
- }
-
- // Set final compression size
- comp_data->setSizeBits(size);
+ std::unique_ptr<BaseCacheCompressor::CompressionData> comp_data =
+ DictionaryCompressor::compress(data);
// Set compression latency (Accounts for pattern matching, length
// generation, packaging and shifting)
@@ -156,69 +65,6 @@ CPack::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat)
return std::move(comp_data);
}
-uint32_t
-CPack::decompressWord(const Pattern* pattern)
-{
- // Search for matching entry
- std::vector<std::array<uint8_t, 4>>::iterator entry_it =
- dictionary.begin();
- std::advance(entry_it, pattern->getMatchLocation());
-
- // Decompress the match. If the decompressed value must be added to
- // the dictionary, do it
- const std::array<uint8_t, 4> data = pattern->decompress(*entry_it);
- if (pattern->shouldAllocate()) {
- dictionary[numEntries++] = data;
- }
-
- // Return word
- return (((((data[3] << 8) | data[2]) << 8) | data[1]) << 8) | data[0];
-}
-
-void
-CPack::decompress(const CompressionData* comp_data, uint64_t* data)
-{
- const CompData* cpack_comp_data = static_cast<const CompData*>(comp_data);
-
- // Reset dictionary
- resetDictionary();
-
- // Decompress every entry sequentially
- std::vector<uint32_t> decomp_words;
- for (const auto& entry : cpack_comp_data->entries) {
- const uint32_t word = decompressWord(&*entry);
- decomp_words.push_back(word);
-
- // Print debug information
- DPRINTF(CacheComp, "Decompressed %s to %x\n", entry->print(), word);
- }
-
- // Concatenate the decompressed words to generate the cache lines
- for (std::size_t i = 0; i < blkSize/8; i++) {
- data[i] = (static_cast<uint64_t>(decomp_words[2*i]) << 32) |
- decomp_words[2*i+1];
- }
-}
-
-void
-CPack::regStats()
-{
- BaseCacheCompressor::regStats();
-
- // We store the frequency of each pattern
- patternStats
- .init(Pattern::getNumPatterns())
- .name(name() + ".pattern")
- .desc("Number of data entries that were compressed to this pattern.")
- ;
-
- for (unsigned i = 0; i < Pattern::getNumPatterns(); ++i) {
- patternStats.subname(i, Pattern::getName(i));
- patternStats.subdesc(i, "Number of data entries that match pattern " +
- Pattern::getName(i));
- }
-}
-
CPack*
CPackParams::create()
{
diff --git a/src/mem/cache/compressors/cpack.hh b/src/mem/cache/compressors/cpack.hh
index 1a271eeae..bd258b372 100644
--- a/src/mem/cache/compressors/cpack.hh
+++ b/src/mem/cache/compressors/cpack.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Inria
+ * Copyright (c) 2018-2019 Inria
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,14 +31,6 @@
/** @file
* Definition of CPack compression, from "C-Pack: A High-Performance
* Microprocessor Cache Compression Algorithm".
- *
- * The dictionary is composed of 32-bit entries.
- *
- * The patterns are implemented as individual classes that have a checking
- * function isPattern(), to determine if the data fits the pattern, and a
- * decompress() function, which decompresses the contents of a pattern.
- * Every new pattern must inherit from the Pattern class and be added to the
- * patternFactory.
*/
#ifndef __MEM_CACHE_COMPRESSORS_CPACK_HH__
@@ -48,23 +40,16 @@
#include <cstdint>
#include <map>
#include <memory>
-#include <vector>
#include "base/types.hh"
-#include "mem/cache/compressors/base.hh"
+#include "mem/cache/compressors/dictionary_compressor.hh"
struct CPackParams;
-class CPack : public BaseCacheCompressor
+class CPack : public DictionaryCompressor
{
private:
- /**
- * Compression data for CPack. It consists of a vector of patterns.
- */
- class CompData;
-
// Forward declaration of all possible patterns
- class Pattern;
class PatternZZZZ;
class PatternXXXX;
class PatternMMMM;
@@ -73,46 +58,14 @@ class CPack : public BaseCacheCompressor
class PatternMMMX;
/**
- * Create a factory to determine if input matches a pattern. The if else
- * chains are constructed by recursion. The patterns should be explored
- * sorted by size for correct behaviour.
- */
- template <class Head, class... Tail>
- struct Factory
- {
- static std::unique_ptr<Pattern> getPattern(
- const std::array<uint8_t, 4>& bytes,
- const std::array<uint8_t, 4>& dict_bytes, const int match_location)
- {
- // If match this pattern, instantiate it. If a negative match
- // location is used, the patterns that use the dictionary bytes
- // must return false. This is used when there are no dictionary
- // entries yet
- if (Head::isPattern(bytes, dict_bytes, match_location)) {
- return std::unique_ptr<Pattern>(
- new Head(bytes, match_location));
- // Otherwise, go for next pattern
- } else {
- return Factory<Tail...>::getPattern(bytes, dict_bytes,
- match_location);
- }
- }
- };
-
- /**
- * Specialization to end the recursion.
+ * The patterns proposed in the paper. Each letter represents a byte:
+ * Z is a null byte, M is a dictionary match, X is a new value.
+ * These are used as indexes to reference the pattern data. If a new
+ * pattern is added, it must be done before NUM_PATTERNS.
*/
- template <class Head>
- struct Factory<Head>
- {
- static std::unique_ptr<Pattern> getPattern(
- const std::array<uint8_t, 4>& bytes,
- const std::array<uint8_t, 4>& dict_bytes, const int match_location)
- {
- // Instantiate last pattern. Should be the XXXX pattern.
- return std::unique_ptr<Pattern>(new Head(bytes, match_location));
- }
- };
+ typedef enum {
+ ZZZZ, XXXX, MMMM, MMXX, ZZZX, MMMX, NUM_PATTERNS
+ } PatternNumber;
/**
* Convenience factory declaration. The templates must be organized by
@@ -121,55 +74,28 @@ class CPack : public BaseCacheCompressor
using PatternFactory = Factory<PatternZZZZ, PatternMMMM, PatternZZZX,
PatternMMMX, PatternMMXX, PatternXXXX>;
- /**
- * The dictionary.
- */
- std::vector<std::array<uint8_t, 4>> dictionary;
-
- /**
- * Dictionary size.
- */
- const std::size_t dictionarySize;
-
- /**
- * Number of valid entries in the dictionary.
- */
- std::size_t numEntries;
-
- /**
- * @defgroup CompressionStats Compression specific statistics.
- * @{
- */
+ uint64_t getNumPatterns() const override { return NUM_PATTERNS; }
- /**
- * Number of data entries that were compressed to each pattern.
- */
- Stats::Vector patternStats;
-
- /**
- * @}
- */
+ std::string
+ getName(int number) const override
+ {
+ static std::map<int, std::string> patternNames = {
+ {ZZZZ, "ZZZZ"}, {XXXX, "XXXX"}, {MMMM, "MMMM"},
+ {MMXX, "MMXX"}, {ZZZX, "ZZZX"}, {MMMX, "MMMX"}
+ };
- /**
- * Compress data.
- *
- * @param data Data to be compressed.
- * @return The pattern this data matches.
- */
- std::unique_ptr<Pattern> compressWord(const uint32_t data);
+ return patternNames[number];
+ };
- /**
- * Decompress a word.
- *
- * @param pattern The pattern to be decompressed.
- * @return The decompressed word.
- */
- uint32_t decompressWord(const Pattern* pattern);
+ std::unique_ptr<Pattern> getPattern(
+ const std::array<uint8_t, 4>& bytes,
+ const std::array<uint8_t, 4>& dict_bytes,
+ const int match_location) const override
+ {
+ return PatternFactory::getPattern(bytes, dict_bytes, match_location);
+ }
- /**
- * Clear all dictionary entries.
- */
- void resetDictionary();
+ void addToDictionary(std::array<uint8_t, 4> data) override;
/**
* Apply compression.
@@ -182,14 +108,6 @@ class CPack : public BaseCacheCompressor
std::unique_ptr<BaseCacheCompressor::CompressionData> compress(
const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) override;
- /**
- * Decompress data.
- *
- * @param comp_data Compressed cache line.
- * @param data The cache line to be decompressed.
- */
- void decompress(const CompressionData* comp_data, uint64_t* data) override;
-
public:
/** Convenience typedef. */
typedef CPackParams Params;
@@ -203,167 +121,9 @@ class CPack : public BaseCacheCompressor
* Default destructor.
*/
~CPack() {};
-
- /**
- * Register local statistics.
- */
- void regStats() override;
-};
-
-/**
- * The compressed data is composed of multiple pattern entries. To add a new
- * pattern one should inherit from this class and implement isPattern() and
- * decompress. Then the new pattern must be added to the PatternFactory
- * declaration in crescent order of size (in the CPack class). The pattern
- * must be also added to the Name enum in the CPack::Pattern class before
- * NUM_PATTERNS.
- */
-class CPack::Pattern
-{
- protected:
-
- /**
- * The patterns proposed in the paper. Each letter represents a byte:
- * Z is a null byte, M is a dictionary match, X is a new value.
- * These are used as indexes to reference the pattern data. If a new
- * pattern is added, it must be done before NUM_PATTERNS.
- */
- typedef enum {
- ZZZZ, XXXX, MMMM, MMXX, ZZZX, MMMX, NUM_PATTERNS
- } PatternNumber;
-
- /**
- * Pattern enum number.
- */
- const PatternNumber patternNumber;
-
- /**
- * Code associated to the pattern.
- */
- const uint8_t code;
-
- /**
- * Length, in bits, of the code and match location.
- */
- const uint8_t length;
-
- /**
- * Number of unmatched bytes;
- */
- const uint8_t numUnmatchedBytes;
-
- /**
- * Index representing the the match location.
- */
- const int matchLocation;
-
- /**
- * Wether the pattern allocates a dictionary entry or not.
- */
- const bool allocate;
-
- /**
- * Get code of this pattern.
- *
- * @return The code.
- */
- uint8_t getCode() const { return code; }
-
- public:
- /**
- * Default constructor.
- *
- * @param number Pattern number.
- * @param code Code associated to this pattern.
- * @param metadata_length Length, in bits, of the code and match location.
- * @param num_unmatched_bytes Number of unmatched bytes.
- * @param match_location Index of the match location.
- */
- Pattern(const PatternNumber number, const uint64_t code,
- const uint64_t metadata_length, const uint64_t num_unmatched_bytes,
- const int match_location, const bool allocate = true)
- : patternNumber(number), code(code), length(metadata_length),
- numUnmatchedBytes(num_unmatched_bytes),
- matchLocation(match_location), allocate(allocate) {};
-
- /**
- * Default destructor.
- */
- virtual ~Pattern() = default;
-
- /**
- * Trick function to get the number of patterns.
- *
- * @return The number of defined patterns.
- */
- static uint64_t getNumPatterns() { return NUM_PATTERNS; };
-
- /**
- * Get enum number associated to this pattern.
- *
- * @return The pattern enum number.
- */
- PatternNumber getPatternNumber() const { return patternNumber; };
-
- /**
- * Get meta-name assigned to the given pattern.
- *
- * @param number The number of the pattern.
- * @return The meta-name of the pattern.
- */
- static std::string getName(int number)
- {
- static std::map<PatternNumber, std::string> patternNames = {
- {ZZZZ, "ZZZZ"}, {XXXX, "XXXX"}, {MMMM, "MMMM"},
- {MMXX, "MMXX"}, {ZZZX, "ZZZX"}, {MMMX, "MMMX"}
- };
-
- return patternNames[(PatternNumber)number];
- };
-
- /**
- * Get the index of the dictionary match location.
- *
- * @return The index of the match location.
- */
- uint8_t getMatchLocation() const { return matchLocation; }
-
- /**
- * Get size, in bits, of the pattern (excluding prefix). Corresponds to
- * unmatched_data_size + code_length.
- *
- * @return The size.
- */
- std::size_t getSizeBits() const {
- return numUnmatchedBytes*CHAR_BIT + length;
- }
-
- /**
- * Determine if pattern allocates a dictionary entry.
- *
- * @return True if should allocate a dictionary entry.
- */
- bool shouldAllocate() const {
- return allocate;
- }
-
- std::string print() const {
- return csprintf("pattern %s (encoding %x, size %u bits)",
- getName(patternNumber), getCode(), getSizeBits());
- }
-
- /**
- * Decompress the pattern. Each pattern has its own way of interpreting
- * its data.
- *
- * @param dict_bytes The bytes in the corresponding matching entry.
- * @return The decompressed pattern.
- */
- virtual std::array<uint8_t, 4> decompress(
- const std::array<uint8_t, 4> dict_bytes) const = 0;
};
-class CPack::PatternZZZZ : public Pattern
+class CPack::PatternZZZZ : public DictionaryCompressor::Pattern
{
public:
PatternZZZZ(const std::array<uint8_t, 4> bytes, const int match_location)
@@ -384,7 +144,7 @@ class CPack::PatternZZZZ : public Pattern
}
};
-class CPack::PatternXXXX : public Pattern
+class CPack::PatternXXXX : public DictionaryCompressor::Pattern
{
private:
/**
@@ -412,7 +172,7 @@ class CPack::PatternXXXX : public Pattern
}
};
-class CPack::PatternMMMM : public Pattern
+class CPack::PatternMMMM : public DictionaryCompressor::Pattern
{
public:
PatternMMMM(const std::array<uint8_t, 4> bytes, const int match_location)
@@ -432,7 +192,7 @@ class CPack::PatternMMMM : public Pattern
}
};
-class CPack::PatternMMXX : public Pattern
+class CPack::PatternMMXX : public DictionaryCompressor::Pattern
{
private:
/**
@@ -464,7 +224,7 @@ class CPack::PatternMMXX : public Pattern
}
};
-class CPack::PatternZZZX : public Pattern
+class CPack::PatternZZZX : public DictionaryCompressor::Pattern
{
private:
/**
@@ -491,7 +251,7 @@ class CPack::PatternZZZX : public Pattern
}
};
-class CPack::PatternMMMX : public Pattern
+class CPack::PatternMMMX : public DictionaryCompressor::Pattern
{
private:
/**
@@ -520,25 +280,4 @@ class CPack::PatternMMMX : public Pattern
}
};
-class CPack::CompData : public CompressionData
-{
- public:
- /**
- * The patterns matched in the original line.
- */
- std::vector<std::unique_ptr<Pattern>> entries;
-
- /**
- * Default constructor.
- *
- * @param dictionary_size Number of entries in the dictionary.
- */
- CompData(const std::size_t dictionary_size);
-
- /**
- * Default destructor.
- */
- ~CompData();
-};
-
#endif //__MEM_CACHE_COMPRESSORS_CPACK_HH__
diff --git a/src/mem/cache/compressors/dictionary_compressor.cc b/src/mem/cache/compressors/dictionary_compressor.cc
new file mode 100644
index 000000000..c53d14a9b
--- /dev/null
+++ b/src/mem/cache/compressors/dictionary_compressor.cc
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2018-2019 Inria
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Daniel Carvalho
+ */
+
+/** @file
+ * Implementation of a dictionary based cache compressor.
+ */
+
+#include "mem/cache/compressors/dictionary_compressor.hh"
+
+#include <algorithm>
+
+#include "debug/CacheComp.hh"
+#include "params/DictionaryCompressor.hh"
+
+DictionaryCompressor::CompData::CompData(const std::size_t dictionary_size)
+ : CompressionData()
+{
+}
+
+DictionaryCompressor::CompData::~CompData()
+{
+}
+
+DictionaryCompressor::DictionaryCompressor(const Params *p)
+ : BaseCacheCompressor(p), dictionarySize(p->dictionary_size)
+{
+ dictionary.resize(dictionarySize);
+
+ resetDictionary();
+}
+
+void
+DictionaryCompressor::resetDictionary()
+{
+ // Reset number of valid entries
+ numEntries = 0;
+
+ // Set all entries as 0
+ std::array<uint8_t, 4> zero_word = {0, 0, 0, 0};
+ std::fill(dictionary.begin(), dictionary.end(), zero_word);
+}
+
+std::unique_ptr<DictionaryCompressor::Pattern>
+DictionaryCompressor::compressWord(const uint32_t data)
+{
+ // Split data in bytes
+ const std::array<uint8_t, 4> bytes = {
+ static_cast<uint8_t>(data & 0xFF),
+ static_cast<uint8_t>((data >> 8) & 0xFF),
+ static_cast<uint8_t>((data >> 16) & 0xFF),
+ static_cast<uint8_t>((data >> 24) & 0xFF)
+ };
+
+ // Start as a no-match pattern. A negative match location is used so that
+ // patterns that depend on the dictionary entry don't match
+ std::unique_ptr<Pattern> pattern = getPattern(bytes, {0, 0, 0, 0}, -1);
+
+ // Search for word on dictionary
+ for (std::size_t i = 0; i < numEntries; i++) {
+ // Try matching input with possible patterns
+ std::unique_ptr<Pattern> temp_pattern =
+ getPattern(bytes, dictionary[i], i);
+
+ // Check if found pattern is better than previous
+ if (temp_pattern->getSizeBits() < pattern->getSizeBits()) {
+ pattern = std::move(temp_pattern);
+ }
+ }
+
+ // Update stats
+ patternStats[pattern->getPatternNumber()]++;
+
+ // Push into dictionary
+ if (pattern->shouldAllocate()) {
+ addToDictionary(bytes);
+ }
+
+ return pattern;
+}
+
+std::unique_ptr<BaseCacheCompressor::CompressionData>
+DictionaryCompressor::compress(const uint64_t* data)
+{
+ std::unique_ptr<CompData> comp_data =
+ std::unique_ptr<CompData>(new CompData(dictionarySize));
+
+ // Compression size
+ std::size_t size = 0;
+
+ // Reset dictionary
+ resetDictionary();
+
+ // Compress every word sequentially
+ for (std::size_t i = 0; i < blkSize/8; i++) {
+ const uint32_t first_word = ((data[i])&0xFFFFFFFF00000000) >> 32;
+ const uint32_t second_word = (data[i])&0x00000000FFFFFFFF;
+
+ // Compress both words
+ std::unique_ptr<Pattern> first_pattern = compressWord(first_word);
+ std::unique_ptr<Pattern> second_pattern = compressWord(second_word);
+
+ // Update total line compression size
+ size += first_pattern->getSizeBits() + second_pattern->getSizeBits();
+
+ // Print debug information
+ DPRINTF(CacheComp, "Compressed %08x to %s\n", first_word,
+ first_pattern->print());
+ DPRINTF(CacheComp, "Compressed %08x to %s\n", second_word,
+ second_pattern->print());
+
+ // Append to pattern list
+ comp_data->entries.push_back(std::move(first_pattern));
+ comp_data->entries.push_back(std::move(second_pattern));
+ }
+
+ // Set final compression size
+ comp_data->setSizeBits(size);
+
+ // Return compressed line
+ return std::move(comp_data);
+}
+
+uint32_t
+DictionaryCompressor::decompressWord(const Pattern* pattern)
+{
+ // Search for matching entry
+ std::vector<std::array<uint8_t, 4>>::iterator entry_it =
+ dictionary.begin();
+ std::advance(entry_it, pattern->getMatchLocation());
+
+ // Decompress the match. If the decompressed value must be added to
+ // the dictionary, do it
+ const std::array<uint8_t, 4> data = pattern->decompress(*entry_it);
+ if (pattern->shouldAllocate()) {
+ addToDictionary(data);
+ }
+
+ // Return word
+ return (((((data[3] << 8) | data[2]) << 8) | data[1]) << 8) | data[0];
+}
+
+void
+DictionaryCompressor::decompress(const CompressionData* comp_data,
+ uint64_t* data)
+{
+ const CompData* casted_comp_data = static_cast<const CompData*>(comp_data);
+
+ // Reset dictionary
+ resetDictionary();
+
+ // Decompress every entry sequentially
+ std::vector<uint32_t> decomp_words;
+ for (const auto& entry : casted_comp_data->entries) {
+ const uint32_t word = decompressWord(&*entry);
+ decomp_words.push_back(word);
+
+ // Print debug information
+ DPRINTF(CacheComp, "Decompressed %s to %x\n", entry->print(), word);
+ }
+
+ // Concatenate the decompressed words to generate the cache lines
+ for (std::size_t i = 0; i < blkSize/8; i++) {
+ data[i] = (static_cast<uint64_t>(decomp_words[2*i]) << 32) |
+ decomp_words[2*i+1];
+ }
+}
+
+void
+DictionaryCompressor::regStats()
+{
+ BaseCacheCompressor::regStats();
+
+ // We store the frequency of each pattern
+ patternStats
+ .init(getNumPatterns())
+ .name(name() + ".pattern")
+ .desc("Number of data entries that were compressed to this pattern.")
+ ;
+
+ for (unsigned i = 0; i < getNumPatterns(); ++i) {
+ patternStats.subname(i, getName(i));
+ patternStats.subdesc(i, "Number of data entries that match pattern " +
+ getName(i));
+ }
+}
diff --git a/src/mem/cache/compressors/dictionary_compressor.hh b/src/mem/cache/compressors/dictionary_compressor.hh
new file mode 100644
index 000000000..87e69ccc8
--- /dev/null
+++ b/src/mem/cache/compressors/dictionary_compressor.hh
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2018-2019 Inria
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Daniel Carvalho
+ */
+
+/** @file
+ * Definition of a dictionary based cache compressor. Each entry is compared
+ * against a dictionary to search for matches.
+ *
+ * The dictionary is composed of 32-bit entries, and the comparison is done
+ * byte per byte.
+ *
+ * The patterns are implemented as individual classes that have a checking
+ * function isPattern(), to determine if the data fits the pattern, and a
+ * decompress() function, which decompresses the contents of a pattern.
+ * Every new pattern must inherit from the Pattern class and be added to the
+ * patternFactory.
+ */
+
+#ifndef __MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__
+#define __MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__
+
+#include <array>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "base/types.hh"
+#include "mem/cache/compressors/base.hh"
+
+struct DictionaryCompressorParams;
+
+class DictionaryCompressor : public BaseCacheCompressor
+{
+ protected:
+ /**
+ * Compression data for the dictionary compressor. It consists of a vector
+ * of patterns.
+ */
+ class CompData;
+
+ // Forward declaration of a pattern
+ class Pattern;
+
+ /**
+ * Create a factory to determine if input matches a pattern. The if else
+ * chains are constructed by recursion. The patterns should be explored
+ * sorted by size for correct behaviour.
+ */
+ template <class Head, class... Tail>
+ struct Factory
+ {
+ static std::unique_ptr<Pattern> getPattern(
+ const std::array<uint8_t, 4>& bytes,
+ const std::array<uint8_t, 4>& dict_bytes, const int match_location)
+ {
+ // If match this pattern, instantiate it. If a negative match
+ // location is used, the patterns that use the dictionary bytes
+ // must return false. This is used when there are no dictionary
+ // entries yet
+ if (Head::isPattern(bytes, dict_bytes, match_location)) {
+ return std::unique_ptr<Pattern>(
+ new Head(bytes, match_location));
+ // Otherwise, go for next pattern
+ } else {
+ return Factory<Tail...>::getPattern(bytes, dict_bytes,
+ match_location);
+ }
+ }
+ };
+
+ /** Specialization to end the recursion. */
+ template <class Head>
+ struct Factory<Head>
+ {
+ static std::unique_ptr<Pattern> getPattern(
+ const std::array<uint8_t, 4>& bytes,
+ const std::array<uint8_t, 4>& dict_bytes, const int match_location)
+ {
+ // Instantiate last pattern. Should be the XXXX pattern.
+ return std::unique_ptr<Pattern>(new Head(bytes, match_location));
+ }
+ };
+
+ /** The dictionary. */
+ std::vector<std::array<uint8_t, 4>> dictionary;
+
+ /** Dictionary size. */
+ const std::size_t dictionarySize;
+
+ /** Number of valid entries in the dictionary. */
+ std::size_t numEntries;
+
+ /**
+ * @defgroup CompressionStats Compression specific statistics.
+ * @{
+ */
+
+ /**
+ * Number of data entries that were compressed to each pattern.
+ */
+ Stats::Vector patternStats;
+
+ /**
+ * @}
+ */
+
+ /**
+ * Trick function to get the number of patterns.
+ *
+ * @return The number of defined patterns.
+ */
+ virtual uint64_t getNumPatterns() const = 0;
+
+ /**
+ * Get meta-name assigned to the given pattern.
+ *
+ * @param number The number of the pattern.
+ * @return The meta-name of the pattern.
+ */
+ virtual std::string getName(int number) const = 0;
+
+ /**
+ * Since the factory cannot be instantiated here, classes that inherit
+ * from this base class have to implement the call to their factory's
+ * getPattern.
+ */
+ virtual std::unique_ptr<Pattern> getPattern(
+ const std::array<uint8_t, 4>& bytes,
+ const std::array<uint8_t, 4>& dict_bytes,
+ const int match_location) const = 0;
+
+ /**
+ * Compress data.
+ *
+ * @param data Data to be compressed.
+ * @return The pattern this data matches.
+ */
+ std::unique_ptr<Pattern> compressWord(const uint32_t data);
+
+ /**
+ * Decompress a word.
+ *
+ * @param pattern The pattern to be decompressed.
+ * @return The decompressed word.
+ */
+ uint32_t decompressWord(const Pattern* pattern);
+
+ /** Clear all dictionary entries. */
+ void resetDictionary();
+
+ /**
+ * Add an entry to the dictionary.
+ *
+ * @param data The new entry.
+ */
+ virtual void addToDictionary(std::array<uint8_t, 4> data) = 0;
+
+ /**
+ * Apply compression.
+ *
+ * @param data The cache line to be compressed.
+ * @return Cache line after compression.
+ */
+ std::unique_ptr<BaseCacheCompressor::CompressionData> compress(
+ const uint64_t* data);
+
+ /**
+ * Decompress data.
+ *
+ * @param comp_data Compressed cache line.
+ * @param data The cache line to be decompressed.
+ */
+ void decompress(const CompressionData* comp_data, uint64_t* data) override;
+
+ public:
+ /** Convenience typedef. */
+ typedef DictionaryCompressorParams Params;
+
+ /** Default constructor. */
+ DictionaryCompressor(const Params *p);
+
+ /** Default destructor. */
+ ~DictionaryCompressor() {};
+
+ /** Register local statistics. */
+ void regStats() override;
+};
+
+/**
+ * The compressed data is composed of multiple pattern entries. To add a new
+ * pattern one should inherit from this class and implement isPattern() and
+ * decompress(). Then the new pattern must be added to the PatternFactory
+ * declaration in crescent order of size (in the DictionaryCompressor class).
+ */
+class DictionaryCompressor::Pattern
+{
+ protected:
+ /** Pattern enum number. */
+ const int patternNumber;
+
+ /** Code associated to the pattern. */
+ const uint8_t code;
+
+ /** Length, in bits, of the code and match location. */
+ const uint8_t length;
+
+ /** Number of unmatched bytes. */
+ const uint8_t numUnmatchedBytes;
+
+ /** Index representing the the match location. */
+ const int matchLocation;
+
+ /** Wether the pattern allocates a dictionary entry or not. */
+ const bool allocate;
+
+ public:
+ /**
+ * Default constructor.
+ *
+ * @param number Pattern number.
+ * @param code Code associated to this pattern.
+ * @param metadata_length Length, in bits, of the code and match location.
+ * @param num_unmatched_bytes Number of unmatched bytes.
+ * @param match_location Index of the match location.
+ */
+ Pattern(const int number, const uint64_t code,
+ const uint64_t metadata_length, const uint64_t num_unmatched_bytes,
+ const int match_location, const bool allocate = true)
+ : patternNumber(number), code(code), length(metadata_length),
+ numUnmatchedBytes(num_unmatched_bytes),
+ matchLocation(match_location), allocate(allocate)
+ {
+ }
+
+ /** Default destructor. */
+ virtual ~Pattern() = default;
+
+ /**
+ * Get enum number associated to this pattern.
+ *
+ * @return The pattern enum number.
+ */
+ int getPatternNumber() const { return patternNumber; };
+
+ /**
+ * Get code of this pattern.
+ *
+ * @return The code.
+ */
+ uint8_t getCode() const { return code; }
+
+ /**
+ * Get the index of the dictionary match location.
+ *
+ * @return The index of the match location.
+ */
+ uint8_t getMatchLocation() const { return matchLocation; }
+
+ /**
+ * Get size, in bits, of the pattern (excluding prefix). Corresponds to
+ * unmatched_data_size + code_length.
+ *
+ * @return The size.
+ */
+ std::size_t
+ getSizeBits() const
+ {
+ return numUnmatchedBytes*CHAR_BIT + length;
+ }
+
+ /**
+ * Determine if pattern allocates a dictionary entry.
+ *
+ * @return True if should allocate a dictionary entry.
+ */
+ bool shouldAllocate() const { return allocate; }
+
+ /**
+ * Extract pattern's information to a string.
+ *
+ * @return A string containing the relevant pattern metadata.
+ */
+ std::string
+ print() const
+ {
+ return csprintf("pattern %s (encoding %x, size %u bits)",
+ getPatternNumber(), getCode(), getSizeBits());
+ }
+
+ /**
+ * Decompress the pattern. Each pattern has its own way of interpreting
+ * its data.
+ *
+ * @param dict_bytes The bytes in the corresponding matching entry.
+ * @return The decompressed pattern.
+ */
+ virtual std::array<uint8_t, 4> decompress(
+ const std::array<uint8_t, 4> dict_bytes) const = 0;
+};
+
+class DictionaryCompressor::CompData : public CompressionData
+{
+ public:
+ /** The patterns matched in the original line. */
+ std::vector<std::unique_ptr<Pattern>> entries;
+
+ /**
+ * Default constructor.
+ *
+ * @param dictionary_size Number of entries in the dictionary.
+ */
+ CompData(const std::size_t dictionary_size);
+
+ /** Default destructor. */
+ ~CompData();
+};
+
+#endif //__MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__