diff options
author | Daniel R. Carvalho <odanrc@yahoo.com.br> | 2019-09-09 18:24:20 +0200 |
---|---|---|
committer | Daniel Carvalho <odanrc@yahoo.com.br> | 2019-11-04 21:32:22 +0000 |
commit | 3029ef270c75e7521bd7e14fe3a5d465568a9bb5 (patch) | |
tree | 4052cca80d4611ea1b875e997e653418c0d028b9 /src/mem/cache/compressors/bdi.cc | |
parent | b8a4a911eed17315320a6948d369f791617adfeb (diff) | |
download | gem5-3029ef270c75e7521bd7e14fe3a5d465568a9bb5.tar.xz |
mem-cache: Make BDI a multi compressor
BDI is a compressor containing multiple sub-compressors.
Change-Id: I98411e2ef9dcc2182801a172dfc59ed7a8ee7dd4
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/21159
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <jason@lowepower.com>
Reviewed-by: Bobby R. Bruce <bbruce@ucdavis.edu>
Diffstat (limited to 'src/mem/cache/compressors/bdi.cc')
-rw-r--r-- | src/mem/cache/compressors/bdi.cc | 481 |
1 files changed, 0 insertions, 481 deletions
diff --git a/src/mem/cache/compressors/bdi.cc b/src/mem/cache/compressors/bdi.cc deleted file mode 100644 index a8068c614..000000000 --- a/src/mem/cache/compressors/bdi.cc +++ /dev/null @@ -1,481 +0,0 @@ -/* - * Copyright (c) 2018 Inria - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Daniel Carvalho - */ - -/** @file - * Implementation of the BDI cache compressor. - */ - -#include "mem/cache/compressors/bdi.hh" - -#include <algorithm> -#include <climits> -#include <cstring> -#include <type_traits> - -#include "debug/CacheComp.hh" -#include "params/BDI.hh" - -// Number of bytes in a qword -#define BYTES_PER_QWORD 8 - -// Declare BDI encoding names -const char* BDI::ENCODING_NAMES[] = - {"Zero", "Repeated_Values", "Base8_1", "Base8_2", "Base8_4", "Base4_1", - "Base4_2", "Base2_1", "Uncompressed"}; - -BDI::BDICompData::BDICompData(const uint8_t encoding) - : CompressionData(), _encoding(encoding) -{ -} - -uint8_t -BDI::BDICompData::getEncoding() const -{ - return _encoding; -} - -std::string -BDI::BDICompData::getName() const -{ - return ENCODING_NAMES[_encoding]; -} - -BDI::BDICompDataZeros::BDICompDataZeros() - : BDICompData(ZERO) -{ - // Calculate compressed size - calculateCompressedSize(); -} - -uint64_t -BDI::BDICompDataZeros::access(const int index) const -{ - return 0; -} - -void -BDI::BDICompDataZeros::calculateCompressedSize() -{ - // Number of bits used by Encoding - std::size_t size = encodingBits; - - setSizeBits(size); -} - -BDI::BDICompDataRep::BDICompDataRep(const uint64_t rep_value) - : BDICompData(REP_VALUES) -{ - // Set base value - base = rep_value; - - // Calculate compressed size - calculateCompressedSize(); -} - -uint64_t -BDI::BDICompDataRep::access(const int index) const -{ - return base; -} - -void -BDI::BDICompDataRep::calculateCompressedSize() -{ - // Number of bits used by Encoding - std::size_t size = encodingBits; - - // Number of bits used by Base - size += sizeof(base)*CHAR_BIT; - - setSizeBits(size); -} - -BDI::BDICompDataUncompressed::BDICompDataUncompressed( - const uint64_t* data, const std::size_t blk_size) - : BDICompData(UNCOMPRESSED), blkSize(blk_size), - _data(data, data + blk_size/CHAR_BIT) -{ - // Calculate compressed size - calculateCompressedSize(); -} - -uint64_t -BDI::BDICompDataUncompressed::access(const int index) const -{ - return _data[index]; -} - -void -BDI::BDICompDataUncompressed::calculateCompressedSize() -{ - // Number of bits used by Encoding - std::size_t size = encodingBits; - - // Number of bits used by uncompressed line - size += blkSize*CHAR_BIT; - - setSizeBits(size); -} - -template <class TB, class TD> -BDI::BDICompDataBaseDelta<TB, TD>::BDICompDataBaseDelta(const uint8_t encoding, - const std::size_t blk_size, const std::size_t max_num_bases) - : BDICompData(encoding), maxNumBases(max_num_bases) -{ - // Reserve the maximum possible size for the vectors - bases.reserve(maxNumBases); - bitMask.reserve(blk_size/sizeof(TD)); - deltas.reserve(blk_size/sizeof(TD)); - - // Push virtual base 0 to bases list - bases.push_back(0); -} - -template <class TB, class TD> -void -BDI::BDICompDataBaseDelta<TB, TD>::calculateCompressedSize() -{ - // Number of bits used by Encoding - std::size_t size = encodingBits; - - // Number of bits used by BitMask - size += bitMask.size()*std::ceil(std::log2(maxNumBases)); - - // Number of bits used by Bases. bases[0] is implicit in a hardware - // implementation, therefore its size is 0 - size += (maxNumBases-1)*sizeof(TB)*CHAR_BIT; - - // Number of bits used by Deltas - size += deltas.size()*sizeof(TD)*CHAR_BIT; - - CompressionData::setSizeBits(size); -} - -template <class TB, class TD> -bool -BDI::BDICompDataBaseDelta<TB, TD>::addBase(const TB base) -{ - // Can't add base if reached limit of number of bases - if (bases.size() >= maxNumBases) { - return false; - } - - // Push new base to end of bases list - bases.push_back(base); - - // New delta is 0, as it is a difference between the new base and itself - addDelta(bases.size() - 1, 0); - - return true; -} - -template <class TB, class TD> -void -BDI::BDICompDataBaseDelta<TB, TD>::addDelta(const std::size_t base_index, - const TD delta) -{ - // Insert new delta with respect to the given base - bitMask.push_back(base_index); - - // Insert new delta - deltas.push_back(delta); -} - -template <class TB, class TD> bool -BDI::BDICompDataBaseDelta<TB, TD>::compress(const uint64_t* data, - const std::size_t blk_size) -{ - // Parse through data in a sizeof(TB) granularity - for (std::size_t byte_start = 0; byte_start < blk_size; - byte_start += sizeof(TB)) - { - // Get current value - TB curValue; - std::memcpy(&curValue, ((uint8_t*)data) + byte_start, - sizeof(TB)); - - // Iterate through all bases to search for a valid delta - bool foundDelta = false; - for (int base_index = 0; base_index < bases.size(); base_index++) { - // Calculate delta relative to currently parsed base - typename std::make_signed<TB>::type delta = curValue - - bases[base_index]; - - // Check if the delta is within the limits of the delta size. If - // that is the case, add delta to compressed data and keep parsing - // the input data - typename std::make_signed<TB>::type limit = - ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TD))*CHAR_BIT+1); - if ((delta >= -limit) && (delta <= limit)) { - addDelta(base_index, delta); - foundDelta = true; - break; - } - } - - // If we cannot find a base that can accommodate this new line's data, - // add this value as the new base and insert its respective delta of 0. - // If the new base can't be added, it means that we reached the base - // limit, so line is uncompressible using the given encoding - if (!foundDelta && !addBase(curValue)) { - return false; - } - } - - // Calculate compressed size - calculateCompressedSize(); - - return true; -} - -template <class TB, class TD> -uint64_t -BDI::BDICompDataBaseDelta<TB, TD>::access(const int index) const -{ - // We decompress all base-delta pairs that form the 64-bit entry - // corresponding to the given 64-bit-array index - uint64_t value = 0; - - // Get relationship between the size of an uint64_t base and size of TB - const std::size_t size_diff = sizeof(uint64_t)/sizeof(TB); - - // Mask for a base entry - const uint64_t mask = ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TB))*CHAR_BIT); - - // Size, in bits, of a base entry. Cant be const because compiler will - // optimize and create a 64 bit instance, which will generate a shift size - // compilation error - int base_size_bits = sizeof(TB)*CHAR_BIT; - - // Concatenate all base-delta entries until they form a 64-bit entry - for (int delta_index = size_diff * (index + 1) - 1; - delta_index >= (int)(size_diff * index); delta_index--) { - // Get base and delta entries corresponding to the current delta - assert(delta_index < deltas.size()); - const TD delta = deltas[delta_index]; - const int base_index = bitMask[delta_index]; - assert(base_index < bases.size()); - const TB base = bases[base_index]; - - // Concatenate decompressed value - value <<= base_size_bits; - value |= static_cast<uint64_t>((base + delta) & mask); - } - - return value; -} - -BDI::BDI(const Params *p) - : BaseCacheCompressor(p), useMoreCompressors(p->use_more_compressors), - qwordsPerCacheLine(blkSize/BYTES_PER_QWORD) -{ - static_assert(sizeof(ENCODING_NAMES)/sizeof(char*) == NUM_ENCODINGS, - "Number of encodings doesn't match the number of names"); -} - -bool -BDI::isZeroPackable(const uint64_t* data) const -{ - return std::all_of(data, data + qwordsPerCacheLine, - [](const uint64_t entry){ return entry == 0; }); -} - -bool -BDI::isSameValuePackable(const uint64_t* data) const -{ - // We don't want to copy the whole array to the lambda expression - const uint64_t rep_value = data[0]; - return std::all_of(data, data + qwordsPerCacheLine, - [rep_value](const uint64_t entry) - {return entry == rep_value;}); -} - -template <class TB, class TD> -std::unique_ptr<BDI::BDICompData> -BDI::tryCompress(const uint64_t* data, const uint8_t encoding) const -{ - // Instantiate compressor - auto temp_data = std::unique_ptr<BDICompDataBaseDelta<TB, TD>>( - new BDICompDataBaseDelta<TB, TD>(encoding, blkSize)); - - // Try compressing. Return nullptr if compressor can't compress given line - if (temp_data->compress(data, blkSize)) { - return std::move(temp_data); - } else { - return std::unique_ptr<BDICompData>{}; - } -} - -void -BDI::decompress(const BaseCacheCompressor::CompressionData* comp_data, - uint64_t* data) -{ - // Decompress and go back to host endianness - for (std::size_t i = 0; i < qwordsPerCacheLine; i++) - data[i] = static_cast<const BDICompData*>(comp_data)->access(i); -} - -std::unique_ptr<BaseCacheCompressor::CompressionData> -BDI::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) -{ - std::unique_ptr<BDICompData> bdi_data; - - // Check if it is a zero line - if (isZeroPackable(data)) { - bdi_data = std::unique_ptr<BDICompData>(new BDICompDataZeros()); - - // Set compression latency (compare 1 qword per cycle) - comp_lat = Cycles(qwordsPerCacheLine); - // Check if all values in the line are the same - } else if (isSameValuePackable(data)) { - bdi_data = std::unique_ptr<BDICompData>(new BDICompDataRep(data[0])); - - // Set compression latency (compare 1 qword per cycle) - comp_lat = Cycles(qwordsPerCacheLine); - } else { - // Initialize compressed data as an uncompressed instance - bdi_data = std::unique_ptr<BDICompData>( - new BDICompDataUncompressed(data, blkSize)); - - // Base size-delta size ratio. Used to optimize run and try to - // compress less combinations when their size is worse than the - // current best. It does not reflect the compression ratio, as - // it does not take the metadata into account. - int base_delta_ratio = 2; - - // Check which base-delta size combination is the best. This is - // optimized by giving priority to trying the compressor that would - // generate the smallest compression size. This way we waste less - // simulation time by not doing all possible combinations - for (int ratio = 8; ratio >= base_delta_ratio; ratio/=2) { - for (int base_size = 8; base_size >= ratio; base_size/=2) { - // If using more compressors, parse all delta sizes from 1 to - // one size smaller than the base size, otherwise just parse - // highest possible delta. When we only instantiate one delta - // size per base size, we use less area and energy, at the - // cost of lower compression efficiency - const int delta_size = base_size/ratio; - if (!useMoreCompressors && delta_size != base_size/2) { - continue; - } - - std::unique_ptr<BDICompData> temp_bdi_data; - - // Get the compression result for the current combination - if ((base_size == 8)&&(delta_size == 4)) { - temp_bdi_data = tryCompress<uint64_t, int32_t>(data, - BASE8_4); - } else if ((base_size == 8)&&(delta_size == 2)) { - temp_bdi_data = tryCompress<uint64_t, int16_t>(data, - BASE8_2); - } else if ((base_size == 8)&&(delta_size == 1)) { - temp_bdi_data = tryCompress<uint64_t, int8_t>(data, - BASE8_1); - } else if ((base_size == 4)&&(delta_size == 2)) { - temp_bdi_data = tryCompress<uint32_t, int16_t>(data, - BASE4_2); - } else if ((base_size == 4)&&(delta_size == 1)) { - temp_bdi_data = tryCompress<uint32_t, int8_t>(data, - BASE4_1); - } else if ((base_size == 2)&&(delta_size == 1)) { - temp_bdi_data = tryCompress<uint16_t, int8_t>(data, - BASE2_1); - } else { - fatal("Invalid combination of base and delta sizes."); - } - - // If compressor was successful, check if new compression - // improves the compression factor - if (temp_bdi_data && - (bdi_data->getSizeBits() > temp_bdi_data->getSizeBits())) - { - bdi_data = std::move(temp_bdi_data); - base_delta_ratio = ratio; - } - - // Clear temp pointer - temp_bdi_data.reset(nullptr); - } - } - - // Set compression latency. A successful compressor will stop all - // other compressors when it knows no other will generate a better - // compression. However, this requires either hard-coding, or a complex - // function that can calculate the exact size of every compressor for - // every cache line size. We decide to take a conservative - // optimization: assume that all compressors with a given base size - // delta size ratio (no-metadata ratio) must wait for each other. - // In the worst case scenario the data is left uncompressed, so - // it loses the time of the worst base size (2 bytes per cycle) - comp_lat = Cycles(blkSize/base_delta_ratio); - } - - // Update stats - encodingStats[bdi_data->getEncoding()]++; - - // Pack compression results (1 extra cycle) - comp_lat += Cycles(1); - - // Set decompression latency (latency of an adder) - decomp_lat = Cycles(1); - - // Print debug information - DPRINTF(CacheComp, "BDI: Compressed cache line to encoding %s (%d bits)\n", - bdi_data->getName(), bdi_data->getSizeBits()); - - return std::move(bdi_data); -} - -void -BDI::regStats() -{ - BaseCacheCompressor::regStats(); - - // We store the frequency of each encoding - encodingStats - .init(NUM_ENCODINGS) - .name(name() + ".encoding") - .desc("Number of data entries that were compressed to this encoding.") - ; - - for (unsigned i = 0; i < NUM_ENCODINGS; ++i) { - encodingStats.subname(i, ENCODING_NAMES[i]); - encodingStats.subdesc(i, "Number of data entries that match " \ - "encoding " + std::string(ENCODING_NAMES[i])); - } -} - -BDI* -BDIParams::create() -{ - return new BDI(this); -} - -#undef BYTES_PER_QWORD |