From 0747a432d25ade2c197ca6393270e12606419872 Mon Sep 17 00:00:00 2001 From: Rekai Gonzalez-Alberquilla Date: Wed, 5 Apr 2017 13:20:30 -0500 Subject: arch: added generic vector register This commit adds a new generic vector register to have a cleaner implementation of SIMD ISAs. Nathanael's idea, Rekai's implementation. Change-Id: I60b250bba6423153b7e04d2e6988d517a70a3e6b Reviewed-by: Andreas Sandberg Reviewed-on: https://gem5-review.googlesource.com/2704 Reviewed-by: Anthony Gutierrez Reviewed-by: Tony Gutierrez Maintainer: Andreas Sandberg --- src/arch/generic/vec_reg.hh | 648 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 648 insertions(+) create mode 100644 src/arch/generic/vec_reg.hh (limited to 'src/arch') diff --git a/src/arch/generic/vec_reg.hh b/src/arch/generic/vec_reg.hh new file mode 100644 index 000000000..7a993ea73 --- /dev/null +++ b/src/arch/generic/vec_reg.hh @@ -0,0 +1,648 @@ +/* + * Copyright (c) 2015-2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Giacomo Gabrielli + * Nathanael Premillieu + * Rekai Gonzalez + */ + +/** \file arch/generic/vec_reg.hh + * Vector Registers layout specification. + * + * This register type is to be used to model the SIMD registers. + * It takes into account the possibility that different architectural names + * may overlap (like for ARMv8 AArch32 for example). + * + * The design is having a basic vector register container that holds the + * bytes, unaware of anything else. This is implemented by VecRegContainer. + * As the (maximum) length of the physical vector register is a compile-time + * constant, it is defined as a template parameter. + * + * This file also describes two views of the container that have semantic + * information about the bytes. The first of this views is VecRegT. + * A VecRegT is a view of a VecRegContainer (by reference). The VecRegT has + * a type (VecElem) to which bytes are casted, and the amount of such + * elements that the vector contains (NumElems). The size of a view, + * calculated as sizeof(VecElem) * NumElems must match the size of the + * underlying container. As VecRegT has some degree of type information it + * has vector semantics, and defines the index operator ([]) to get + * references to particular bytes understood as a VecElem. + * The second view of a container implemented in this file is VecLaneT, which + * is a view of a subset of the container. + * A VecLaneT is a view of a lane of a vector register, where a lane is + * identified by a type (VecElem) and an index (although the view is + * unaware of its index). Operations on the lane are directly applied to + * the corresponding bytes of the underlying VecRegContainer through a + * reference. + * + * The intended usage is requesting views to the VecRegContainer via the + * member 'as' for VecRegT and the member 'laneView' for VecLaneT. Kindly + * find an example of usage in the following. + * + * + * // We declare 512 bits vectors + * using Vec512 = VecRegContainer<64>; + * ... + * // We implement the physical vector register file + * Vec512 physicalVecRegFile[NUM_VREGS]; + * ... + * // Usage example, for a macro op: + * VecFloat8Add(ExecContext* xd) { + * // Request source vector register to the execution context (const as it + * // is read only). + * const Vec512& vsrc1raw = xc->readVecRegOperand(this, 0); + * // View it as a vector of floats (we could just specify the first + * // template parametre, the second has a default value that works, and the + * // last one is derived by the constness of vsrc1raw). + * VecRegT& vsrc1 = vsrc1raw->as(); + * + * // Second source and view + * const Vec512& vsrc2raw = xc->readVecRegOperand(this, 1); + * VecRegT& vsrc2 = vsrc2raw->as(); + * + * // Destination and view + * Vec512 vdstraw; + * VecRegT& vdst = vdstraw->as(); + * + * for (auto i = 0; i < 8; i++) { + * // This asignment sets the bits in the underlying Vec512: vdstraw + * vdst[i] = vsrc1[i] + vsrc2[i]; + * } + * xc->setWriteRegOperand(this, 0, vdstraw); + * } + * + * // Usage example, for a micro op that operates over lane number _lidx: + * VecFloatLaneAdd(ExecContext* xd) { + * // Request source vector register to the execution context (const as it + * // is read only). + * const Vec512& vsrc1raw = xc->readVecRegOperand(this, 0); + * // View it as a lane of a vector of floats (we could just specify the + * // first template parametre, the second is derived by the constness of + * // vsrc1raw). + * VecLaneT& src1 = vsrc1raw->laneView(this->_lidx); + * + * // Second source and view + * const Vec512& vsrc2raw = xc->readVecRegOperand(this, 1); + * VecLaneT& src2 = vsrc2raw->laneView(this->_lidx); + * + * // (Writable) destination and view + * // As this is a partial write, we need the exec context to support that + * // through, e.g., 'readVecRegOperandToWrite' returning a writable + * // reference to the register + * Vec512 vdstraw = xc->readVecRegOperandToWrite(this, 3); + * VecLaneT& dst = vdstraw->laneView(this->_lidx); + * + * dst = src1 + src2; + * // There is no need to copy the value back into the exec context, as + * // the assignment to dst modifies the appropriate bytes in vdstraw which + * // is in turn, a reference to the register in the cpu model. + * // For operations that do conditional writeback, we can decouple the + * // write by doing: + * // auto tmp = src1 + src2; + * // if (test) { + * // dst = tmp; // do writeback + * // } else { + * // // do not do writeback + * // } + * } + * + */ + +#ifndef __ARCH_GENERIC_VEC_REG_HH__ +#define __ARCH_GENERIC_VEC_REG_HH__ + +#include +#include +#include +#include +#include +#include + +#include "base/cprintf.hh" +#include "base/misc.hh" + +template +class VecRegContainer; + +/** Vector Register Abstraction + * This generic class is a view in a particularization of MVC, to vector + * registers. There is a VecRegContainer that implements the model, and + * contains the data. To that model we can interpose different instantiations + * of VecRegT to view the container as a vector of NumElems elems of type + * VecElem. + * @tparam VecElem Type of each element of the vector. + * @tparam NumElems Amount of components of the vector. + * @tparam Const Indicate if the underlying container can be modified through + * the view. + */ +template +class VecRegT +{ + /** Size of the register in bytes. */ + static constexpr size_t SIZE = sizeof(VecElem) * NumElems; + public: + /** Container type alias. */ + using Container = typename std::conditional, + VecRegContainer>::type; + private: + /** My type alias. */ + using MyClass = VecRegT; + /** Reference to container. */ + Container& container; + + public: + /** Constructor. */ + VecRegT(Container& cnt) : container(cnt) {}; + + /** Zero the container. */ + template + typename std::enable_if::type + zero() { container.zero(); } + + template + typename std::enable_if::type + operator=(const MyClass& that) + { + container = that.container; + return *this; + } + + /** Index operator. */ + const VecElem& operator[](size_t idx) const + { + return container.template raw_ptr()[idx]; + } + + /** Index operator. */ + template + typename std::enable_if::type + operator[](size_t idx) + { + return container.template raw_ptr()[idx]; + } + + /** Equality operator. + * Required to compare thread contexts. + */ + template + bool + operator==(const VecRegT& that) const + { + return container == that.container; + } + /** Inequality operator. + * Required to compare thread contexts. + */ + template + bool + operator!=(const VecRegT& that) const + { + return !operator==(that); + } + + /** Output stream operator. */ + friend std::ostream& + operator<<(std::ostream& os, const MyClass& vr) + { + /* 0-sized is not allowed */ + os << "[" << std::hex << (uint32_t)vr[0]; + for (uint32_t e = 1; e < vr.SIZE; e++) + os << " " << std::hex << (uint32_t)vr[e]; + os << ']'; + return os; + } + + const std::string print() const { return csprintf("%s", *this); } + /** + * Cast to VecRegContainer& + * It is useful to get the reference to the container for ISA tricks, + * because casting to reference prevents unnecessary copies. + */ + operator Container&() { return container; } +}; + +/* Forward declaration. */ +template +class VecLaneT; + +/** + * Vector Register Abstraction + * This generic class is the model in a particularization of MVC, to vector + * registers. The model has functionality to create views of itself, or a + * portion through the method 'as + * @tparam Sz Size of the container in bytes. + */ +template +class VecRegContainer +{ + static_assert(Sz > 0, + "Cannot create Vector Register Container of zero size"); + public: + static constexpr size_t SIZE = Sz; + using Container = std::array; + private: + Container container; + using MyClass = VecRegContainer; + + public: + VecRegContainer() {} + /* This is required for de-serialisation. */ + VecRegContainer(const std::vector& that) + { + assert(that.size() >= SIZE); + std::memcpy(container.data(), &that[0], SIZE); + } + + /** Zero the container. */ + void zero() { memset(container.data(), 0, SIZE); } + + /** Assignment operators. */ + /** @{ */ + /** From VecRegContainer */ + MyClass& operator=(const MyClass& that) + { + if (&that == this) + return *this; + memcpy(container.data(), that.container.data(), SIZE); + return *this; + } + + /** From appropriately sized uint8_t[]. */ + MyClass& operator=(const Container& that) + { + std::memcpy(container.data(), that.data(), SIZE); + return *this; + } + + /** From vector. + * This is required for de-serialisation. + * */ + MyClass& operator=(const std::vector& that) + { + assert(that.size() >= SIZE); + std::memcpy(container.data(), that.data(), SIZE); + return *this; + } + /** @} */ + + /** Copy the contents into the input buffer. */ + /** @{ */ + /** To appropriately sized uint8_t[] */ + void copyTo(Container& dst) const + { + std::memcpy(dst.data(), container.data(), SIZE); + } + + /** To vector + * This is required for serialisation. + * */ + void copyTo(std::vector& dst) const + { + dst.resize(SIZE); + std::memcpy(dst.data(), container.data(), SIZE); + } + /** @} */ + + /** Equality operator. + * Required to compare thread contexts. + */ + template + inline bool + operator==(const VecRegContainer& that) const + { + return SIZE == S2 && + !memcmp(container.data(), that.container.data(), SIZE); + } + /** Inequality operator. + * Required to compare thread contexts. + */ + template + bool + operator!=(const VecRegContainer& that) const + { + return !operator==(that); + } + + const std::string print() const { return csprintf("%s", *this); } + /** Get pointer to bytes. */ + template + const Ret* raw_ptr() const { return (const Ret*)container.data(); } + + template + Ret* raw_ptr() { return (Ret*)container.data(); } + + /** + * View interposers. + * Create a view of this container as a vector of VecElems with an + * optional amount of elements. If the amount of elements is provided, + * the size of the container is checked, to test bounds. If it is not + * provided, the length is inferred from the container size and the + * element size. + * @tparam VecElem Type of each element of the vector for the view. + * @tparam NumElem Amount of elements in the view. + */ + /** @{ */ + template + VecRegT as() const + { + static_assert(SIZE % sizeof(VecElem) == 0, + "VecElem does not evenly divide the register size"); + static_assert(sizeof(VecElem) * NumElems <= SIZE, + "Viewing VecReg as something bigger than it is"); + return VecRegT(*this); + } + + template + VecRegT as() + { + static_assert(SIZE % sizeof(VecElem) == 0, + "VecElem does not evenly divide the register size"); + static_assert(sizeof(VecElem) * NumElems <= SIZE, + "Viewing VecReg as something bigger than it is"); + return VecRegT(*this); + } + + template + VecLaneT laneView(); + template + VecLaneT laneView() const; + template + VecLaneT laneView(int laneIdx); + template + VecLaneT laneView(int laneIdx) const; + /** @} */ + /** + * Output operator. + * Used for serialization. + */ + friend std::ostream& operator<<(std::ostream& os, const MyClass& v) + { + for (auto& b: v.container) { + os << csprintf("%02x", b); + } + return os; + } +}; + +/** We define an auxiliary abstraction for LaneData. The ISA should care + * about the semantics of a, e.g., 32bit element, treating it as a signed or + * unsigned int, or a float depending on the semantics of a particular + * instruction. On the other hand, the cpu model should only care about it + * being a 32-bit value. */ +enum class LaneSize +{ + Empty = 0, + Byte, + TwoByte, + FourByte, + EightByte, +}; + +/** LaneSize is an abstraction of a LS byte value for the execution and thread + * contexts to handle values just depending on its width. That way, the ISA + * can request, for example, the second 4 byte lane of register 5 to the model. + * The model serves that value, agnostic of the semantics of those bits. Then, + * it is up to the ISA to interpret those bits as a float, or as an uint. + * To maximize the utility, this class implements the assignment operator and + * the casting to equal-size types. + * As opposed to a RegLaneT, LaneData is not 'backed' by a VecRegContainer. + * The idea is: + * When data is passed and is susceptible to being copied, use LaneData, as + * copying the primitive type is build on is cheap. + * When data is passed as references (const or not), use RegLaneT, as all + * operations happen 'in place', avoiding any copies (no copies is always + * cheaper than cheap copies), especially when things are inlined, and + * references are not explicitly passed. + */ +template +class LaneData +{ + public: + /** Alias to the native type of the appropriate size. */ + using UnderlyingType = + typename std::conditional::type + >::type + >::type + >::type; + private: + static constexpr auto ByteSz = sizeof(UnderlyingType); + UnderlyingType _val; + using MyClass = LaneData; + + public: + template explicit + LaneData(typename std::enable_if::type t) + : _val(t) {} + + template + typename std::enable_if::type + operator=(const T& that) + { + _val = that; + return *this; + } + template::type I = 0> + operator T() const { + return *static_cast(&_val); + } +}; + +/** Output operator overload for LaneData. */ +template +inline std::ostream& +operator<<(std::ostream& os, const LaneData& d) +{ + return os << static_cast::UnderlyingType>(d); +} + +/** Vector Lane abstraction + * Another view of a container. This time only a partial part of it is exposed. + * @tparam VecElem Type of each element of the vector. + * @tparam Const Indicate if the underlying container can be modified through + * the view. + */ +/** @{ */ +/* General */ +template +class VecLaneT +{ + public: + /** VecRegContainer friendship to access private VecLaneT constructors. + * Only VecRegContainers can build VecLanes. + */ + /** @{ */ + friend VecLaneT; + + /*template + friend class VecRegContainer;*/ + friend class VecRegContainer<8>; + friend class VecRegContainer<16>; + friend class VecRegContainer<32>; + friend class VecRegContainer<64>; + friend class VecRegContainer<128>; + + /** My type alias. */ + using MyClass = VecLaneT; + + private: + using Cont = typename std::conditional::type; + static_assert(!std::is_const::value || Const, + "Asked for non-const lane of const type!"); + static_assert(std::is_integral::value, + "VecElem type is not integral!"); + /** Reference to data. */ + Cont& container; + + /** Constructor */ + VecLaneT(Cont& cont) : container(cont) { } + + public: + /** Assignment operators. + * Assignment operators are only enabled if the underlying container is + * non-constant. + */ + /** @{ */ + template + typename std::enable_if::type + operator=(const VecElem& that) { + container = that; + return *this; + } + /** + * Generic. + * Generic bitwise assignment. Narrowing and widening assignemnts are + * not allowed, pre-treatment of the rhs is required to conform. + */ + template + typename std::enable_if::type + operator=(const T& that) { + static_assert(sizeof(T) >= sizeof(VecElem), + "Attempt to perform widening bitwise copy."); + static_assert(sizeof(T) <= sizeof(VecElem), + "Attempt to perform narrowing bitwise copy."); + container = static_cast(that); + return *this; + } + /** @} */ + /** Cast to vecElem. */ + operator VecElem() const { return container; } + + /** Constification. */ + template ::type = 0> + operator VecLaneT::type, true>() + { + return VecLaneT(container); + } +}; + +namespace std { + template + struct add_const> { typedef VecLaneT type; }; +} + +/** View as the Nth lane of type VecElem. */ +template +template +VecLaneT +VecRegContainer::laneView() +{ + return VecLaneT(as()[LaneIdx]); +} + +/** View as the const Nth lane of type VecElem. */ +template +template +VecLaneT +VecRegContainer::laneView() const +{ + return VecLaneT(as()[LaneIdx]); +} + +/** View as the Nth lane of type VecElem. */ +template +template +VecLaneT +VecRegContainer::laneView(int laneIdx) +{ + return VecLaneT(as()[laneIdx]); +} + +/** View as the const Nth lane of type VecElem. */ +template +template +VecLaneT +VecRegContainer::laneView(int laneIdx) const +{ + return VecLaneT(as()[laneIdx]); +} + +using VecLane8 = VecLaneT; +using VecLane16 = VecLaneT; +using VecLane32 = VecLaneT; +using VecLane64 = VecLaneT; + +using ConstVecLane8 = VecLaneT; +using ConstVecLane16 = VecLaneT; +using ConstVecLane32 = VecLaneT; +using ConstVecLane64 = VecLaneT; + +/** + * Calls required for serialization/deserialization + */ +/** @{ */ +template +inline bool +to_number(const std::string& value, VecRegContainer& v) +{ + int i = 0; + while (i < Sz) { + std::string byte = value.substr(i<<1, 2); + v.template raw_ptr()[i] = stoul(byte, 0, 16); + i++; + } + return true; +} +/** @} */ + +#endif /* __ARCH_GENERIC_VEC_REG_HH__ */ -- cgit v1.2.3