diff options
author | Daniel R. Carvalho <odanrc@yahoo.com.br> | 2018-06-13 16:39:38 +0200 |
---|---|---|
committer | Daniel Carvalho <odanrc@yahoo.com.br> | 2019-05-08 17:41:09 +0000 |
commit | 77a49860f98a86f467bae242e6c52f6b7150631c (patch) | |
tree | 3d7bb9f91116086aae535842c201feb3b4d46382 /src/mem/cache/compressors/bdi.cc | |
parent | 0e276f6512cac540ab546baf02fd931b7181d55b (diff) | |
download | gem5-77a49860f98a86f467bae242e6c52f6b7150631c.tar.xz |
mem-cache: Create BDI Compressor
Implement Base-Delta-Immediate compression, as described in
'Base-Delta-Immediate Compression: Practical Data Compression
for On-Chip Caches'
Change-Id: I7980c340ab53a086b748f4b2108de4adc775fac8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/11412
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
Diffstat (limited to 'src/mem/cache/compressors/bdi.cc')
-rw-r--r-- | src/mem/cache/compressors/bdi.cc | 481 |
1 files changed, 481 insertions, 0 deletions
diff --git a/src/mem/cache/compressors/bdi.cc b/src/mem/cache/compressors/bdi.cc new file mode 100644 index 000000000..e29179fc0 --- /dev/null +++ b/src/mem/cache/compressors/bdi.cc @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2018 Inria + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Daniel Carvalho + */ + +/** @file + * Implementation of the BDI cache compressor. + */ + +#include "mem/cache/compressors/bdi.hh" + +#include <algorithm> +#include <climits> +#include <cstring> +#include <type_traits> + +#include "debug/CacheComp.hh" +#include "params/BDI.hh" + +// Number of bytes in a qword +#define BYTES_PER_QWORD 8 + +// Declare BDI encoding names +const char* BDI::ENCODING_NAMES[] = + {"Zero", "Repeated_Values", "Base8_1", "Base8_2", "Base8_4", "Base4_1", + "Base4_2", "Base2_1", "Uncompressed"}; + +BDI::BDICompData::BDICompData(const uint8_t encoding) + : CompressionData(), _encoding(encoding) +{ +} + +uint8_t +BDI::BDICompData::getEncoding() const +{ + return _encoding; +} + +std::string +BDI::BDICompData::getName() const +{ + return ENCODING_NAMES[_encoding]; +} + +BDI::BDICompDataZeros::BDICompDataZeros() + : BDICompData(ZERO) +{ + // Calculate compressed size + calculateCompressedSize(); +} + +uint64_t +BDI::BDICompDataZeros::access(const int index) const +{ + return 0; +} + +void +BDI::BDICompDataZeros::calculateCompressedSize() +{ + // Number of bits used by Encoding + std::size_t size = encodingBits; + + setSizeBits(size); +} + +BDI::BDICompDataRep::BDICompDataRep(const uint64_t rep_value) + : BDICompData(REP_VALUES) +{ + // Set base value + base = rep_value; + + // Calculate compressed size + calculateCompressedSize(); +} + +uint64_t +BDI::BDICompDataRep::access(const int index) const +{ + return base; +} + +void +BDI::BDICompDataRep::calculateCompressedSize() +{ + // Number of bits used by Encoding + std::size_t size = encodingBits; + + // Number of bits used by Base + size += sizeof(base)*CHAR_BIT; + + setSizeBits(size); +} + +BDI::BDICompDataUncompressed::BDICompDataUncompressed( + const uint64_t* data, const std::size_t blk_size) + : BDICompData(UNCOMPRESSED), blkSize(blk_size), + _data(data, data + blk_size/CHAR_BIT) +{ + // Calculate compressed size + calculateCompressedSize(); +} + +uint64_t +BDI::BDICompDataUncompressed::access(const int index) const +{ + return _data[index]; +} + +void +BDI::BDICompDataUncompressed::calculateCompressedSize() +{ + // Number of bits used by Encoding + std::size_t size = encodingBits; + + // Number of bits used by uncompressed line + size += blkSize*CHAR_BIT; + + setSizeBits(size); +} + +template <class TB, class TD> +BDI::BDICompDataBaseDelta<TB, TD>::BDICompDataBaseDelta(const uint8_t encoding, + const std::size_t blk_size, const std::size_t max_num_bases) + : BDICompData(encoding), maxNumBases(max_num_bases) +{ + // Reserve the maximum possible size for the vectors + bases.reserve(maxNumBases); + bitMask.reserve(blk_size/sizeof(TD)); + deltas.reserve(blk_size/sizeof(TD)); + + // Push virtual base 0 to bases list + bases.push_back(0); +} + +template <class TB, class TD> +void +BDI::BDICompDataBaseDelta<TB, TD>::calculateCompressedSize() +{ + // Number of bits used by Encoding + std::size_t size = encodingBits; + + // Number of bits used by BitMask + size += bitMask.size()*std::ceil(std::log2(bases.size())); + + // Number of bits used by Bases. bases[0] is implicit in a hardware + // implementation, therefore its size is 0 + size += (maxNumBases-1)*sizeof(TB)*CHAR_BIT; + + // Number of bits used by Deltas + size += deltas.size()*sizeof(TD)*CHAR_BIT; + + CompressionData::setSizeBits(size); +} + +template <class TB, class TD> +bool +BDI::BDICompDataBaseDelta<TB, TD>::addBase(const TB base) +{ + // Can't add base if reached limit of number of bases + if (bases.size() >= maxNumBases) { + return false; + } + + // Push new base to end of bases list + bases.push_back(base); + + // New delta is 0, as it is a difference between the new base and itself + addDelta(bases.size() - 1, 0); + + return true; +} + +template <class TB, class TD> +void +BDI::BDICompDataBaseDelta<TB, TD>::addDelta(const std::size_t base_index, + const TD delta) +{ + // Insert new delta with respect to the given base + bitMask.push_back(base_index); + + // Insert new delta + deltas.push_back(delta); +} + +template <class TB, class TD> bool +BDI::BDICompDataBaseDelta<TB, TD>::compress(const uint64_t* data, + const std::size_t blk_size) +{ + // Parse through data in a sizeof(TB) granularity + for (std::size_t byte_start = 0; byte_start < blk_size; + byte_start += sizeof(TB)) + { + // Get current value + TB curValue; + std::memcpy(&curValue, ((uint8_t*)data) + byte_start, + sizeof(TB)); + + // Iterate through all bases to search for a valid delta + bool foundDelta = false; + for (int base_index = 0; base_index < bases.size(); base_index++) { + // Calculate delta relative to currently parsed base + typename std::make_signed<TB>::type delta = curValue - + bases[base_index]; + + // Check if the delta is within the limits of the delta size. If + // that is the case, add delta to compressed data and keep parsing + // the input data + typename std::make_signed<TB>::type limit = + ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TD))*CHAR_BIT+1); + if ((delta >= -limit) && (delta <= limit)) { + addDelta(base_index, delta); + foundDelta = true; + break; + } + } + + // If we cannot find a base that can accommodate this new line's data, + // add this value as the new base and insert its respective delta of 0. + // If the new base can't be added, it means that we reached the base + // limit, so line is uncompressible using the given encoding + if (!foundDelta && !addBase(curValue)) { + return false; + } + } + + // Calculate compressed size + calculateCompressedSize(); + + return true; +} + +template <class TB, class TD> +uint64_t +BDI::BDICompDataBaseDelta<TB, TD>::access(const int index) const +{ + // We decompress all base-delta pairs that form the 64-bit entry + // corresponding to the given 64-bit-array index + uint64_t value = 0; + + // Get relationship between the size of an uint64_t base and size of TB + const std::size_t size_diff = sizeof(uint64_t)/sizeof(TB); + + // Mask for a base entry + const uint64_t mask = ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TB))*CHAR_BIT); + + // Size, in bits, of a base entry. Cant be const because compiler will + // optimize and create a 64 bit instance, which will generate a shift size + // compilation error + int base_size_bits = sizeof(TB)*CHAR_BIT; + + // Concatenate all base-delta entries until they form a 64-bit entry + for (int delta_index = size_diff * (index + 1) - 1; + delta_index >= (int)(size_diff * index); delta_index--) { + // Get base and delta entries corresponding to the current delta + assert(delta_index < deltas.size()); + const TD delta = deltas[delta_index]; + const int base_index = bitMask[delta_index]; + assert(base_index < bases.size()); + const TB base = bases[base_index]; + + // Concatenate decompressed value + value <<= base_size_bits; + value |= static_cast<uint64_t>((base + delta) & mask); + } + + return value; +} + +BDI::BDI(const Params *p) + : BaseCacheCompressor(p), useMoreCompressors(p->use_more_compressors), + qwordsPerCacheLine(blkSize/BYTES_PER_QWORD) +{ + static_assert(sizeof(ENCODING_NAMES)/sizeof(char*) == NUM_ENCODINGS, + "Number of encodings doesn't match the number of names"); +} + +bool +BDI::isZeroPackable(const uint64_t* data) const +{ + return std::all_of(data, data + qwordsPerCacheLine, + [](const uint64_t entry){ return entry == 0; }); +} + +bool +BDI::isSameValuePackable(const uint64_t* data) const +{ + // We don't want to copy the whole array to the lambda expression + const uint64_t rep_value = data[0]; + return std::all_of(data, data + qwordsPerCacheLine, + [rep_value](const uint64_t entry) + {return entry == rep_value;}); +} + +template <class TB, class TD> +std::unique_ptr<BDI::BDICompData> +BDI::tryCompress(const uint64_t* data, const uint8_t encoding) const +{ + // Instantiate compressor + auto temp_data = std::unique_ptr<BDICompDataBaseDelta<TB, TD>>( + new BDICompDataBaseDelta<TB, TD>(encoding, blkSize)); + + // Try compressing. Return nullptr if compressor can't compress given line + if (temp_data->compress(data, blkSize)) { + return std::move(temp_data); + } else { + return std::unique_ptr<BDICompData>{}; + } +} + +void +BDI::decompress(const BaseCacheCompressor::CompressionData* comp_data, + uint64_t* data) +{ + // Decompress and go back to host endianness + for (std::size_t i = 0; i < qwordsPerCacheLine; i++) + data[i] = static_cast<const BDICompData*>(comp_data)->access(i); +} + +std::unique_ptr<BaseCacheCompressor::CompressionData> +BDI::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) +{ + std::unique_ptr<BDICompData> bdi_data; + + // Check if it is a zero line + if (isZeroPackable(data)) { + bdi_data = std::unique_ptr<BDICompData>(new BDICompDataZeros()); + + // Set compression latency (compare 1 qword per cycle) + comp_lat = Cycles(qwordsPerCacheLine); + // Check if all values in the line are the same + } else if (isSameValuePackable(data)) { + bdi_data = std::unique_ptr<BDICompData>(new BDICompDataRep(data[0])); + + // Set compression latency (compare 1 qword per cycle) + comp_lat = Cycles(qwordsPerCacheLine); + } else { + // Initialize compressed data as an uncompressed instance + bdi_data = std::unique_ptr<BDICompData>( + new BDICompDataUncompressed(data, blkSize)); + + // Base size-delta size ratio. Used to optimize run and try to + // compress less combinations when their size is worse than the + // current best. It does not reflect the compression ratio, as + // it does not take the metadata into account. + int base_delta_ratio = 2; + + // Check which base-delta size combination is the best. This is + // optimized by giving priority to trying the compressor that would + // generate the smallest compression size. This way we waste less + // simulation time by not doing all possible combinations + for (int ratio = 8; ratio >= base_delta_ratio; ratio/=2) { + for (int base_size = 8; base_size >= ratio; base_size/=2) { + // If using more compressors, parse all delta sizes from 1 to + // one size smaller than the base size, otherwise just parse + // highest possible delta. When we only instantiate one delta + // size per base size, we use less area and energy, at the + // cost of lower compression efficiency + const int delta_size = base_size/ratio; + if (!useMoreCompressors && delta_size != base_size/2) { + continue; + } + + std::unique_ptr<BDICompData> temp_bdi_data; + + // Get the compression result for the current combination + if ((base_size == 8)&&(delta_size == 4)) { + temp_bdi_data = tryCompress<uint64_t, int32_t>(data, + BASE8_4); + } else if ((base_size == 8)&&(delta_size == 2)) { + temp_bdi_data = tryCompress<uint64_t, int16_t>(data, + BASE8_2); + } else if ((base_size == 8)&&(delta_size == 1)) { + temp_bdi_data = tryCompress<uint64_t, int8_t>(data, + BASE8_1); + } else if ((base_size == 4)&&(delta_size == 2)) { + temp_bdi_data = tryCompress<uint32_t, int16_t>(data, + BASE4_2); + } else if ((base_size == 4)&&(delta_size == 1)) { + temp_bdi_data = tryCompress<uint32_t, int8_t>(data, + BASE4_1); + } else if ((base_size == 2)&&(delta_size == 1)) { + temp_bdi_data = tryCompress<uint16_t, int8_t>(data, + BASE2_1); + } else { + fatal("Invalid combination of base and delta sizes."); + } + + // If compressor was successful, check if new compression + // improves the compression factor + if (temp_bdi_data && + (bdi_data->getSizeBits() > temp_bdi_data->getSizeBits())) + { + bdi_data = std::move(temp_bdi_data); + base_delta_ratio = ratio; + } + + // Clear temp pointer + temp_bdi_data.reset(nullptr); + } + } + + // Set compression latency. A successful compressor will stop all + // other compressors when it knows no other will generate a better + // compression. However, this requires either hard-coding, or a complex + // function that can calculate the exact size of every compressor for + // every cache line size. We decide to take a conservative + // optimization: assume that all compressors with a given base size + // delta size ratio (no-metadata ratio) must wait for each other. + // In the worst case scenario the data is left uncompressed, so + // it loses the time of the worst base size (2 bytes per cycle) + comp_lat = Cycles(blkSize/base_delta_ratio); + } + + // Update stats + encodingStats[bdi_data->getEncoding()]++; + + // Pack compression results (1 extra cycle) + comp_lat += Cycles(1); + + // Set decompression latency (latency of an adder) + decomp_lat = Cycles(1); + + // Print debug information + DPRINTF(CacheComp, "BDI: Compressed cache line to encoding %s (%d bits)\n", + bdi_data->getName(), bdi_data->getSizeBits()); + + return std::move(bdi_data); +} + +void +BDI::regStats() +{ + BaseCacheCompressor::regStats(); + + // We store the frequency of each encoding + encodingStats + .init(NUM_ENCODINGS) + .name(name() + ".encoding") + .desc("Number of data entries that were compressed to this encoding.") + ; + + for (unsigned i = 0; i < NUM_ENCODINGS; ++i) { + encodingStats.subname(i, ENCODING_NAMES[i]); + encodingStats.subdesc(i, "Number of data entries that match " \ + "encoding " + std::string(ENCODING_NAMES[i])); + } +} + +BDI* +BDIParams::create() +{ + return new BDI(this); +} + +#undef BYTES_PER_QWORD |