diff options
65 files changed, 2525 insertions, 75 deletions
diff --git a/configs/common/cores/arm/O3_ARM_v7a.py b/configs/common/cores/arm/O3_ARM_v7a.py index f5c2c711a..fde4d3c60 100644 --- a/configs/common/cores/arm/O3_ARM_v7a.py +++ b/configs/common/cores/arm/O3_ARM_v7a.py @@ -139,6 +139,7 @@ class O3_ARM_v7a_3(DerivO3CPU): forwardComSize = 5 numPhysIntRegs = 128 numPhysFloatRegs = 192 + numPhysVecRegs = 48 numIQEntries = 32 numROBEntries = 40 diff --git a/src/arch/SConscript b/src/arch/SConscript index ea940560d..e30069c04 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -1,5 +1,17 @@ # -*- mode:python -*- +# Copyright (c) 2016 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# # Copyright (c) 2006 The Regents of The University of Michigan # All rights reserved. # @@ -202,6 +214,7 @@ env.Append(BUILDERS = {'ScanISA' : DebugFlag('IntRegs') DebugFlag('FloatRegs') +DebugFlag('VecRegs') DebugFlag('CCRegs') DebugFlag('MiscRegs') CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'CCRegs', 'MiscRegs' ]) diff --git a/src/arch/alpha/isa.hh b/src/arch/alpha/isa.hh index 80d8ab149..36e708450 100644 --- a/src/arch/alpha/isa.hh +++ b/src/arch/alpha/isa.hh @@ -110,6 +110,18 @@ namespace AlphaISA return reg; } + int + flattenVecIndex(int reg) const + { + return reg; + } + + int + flattenVecElemIndex(int reg) const + { + return reg; + } + // dummy int flattenCCIndex(int reg) const diff --git a/src/arch/alpha/registers.hh b/src/arch/alpha/registers.hh index 03bbd8aaf..151ea7d7c 100644 --- a/src/arch/alpha/registers.hh +++ b/src/arch/alpha/registers.hh @@ -34,6 +34,7 @@ #include "arch/alpha/generated/max_inst_regs.hh" #include "arch/alpha/ipr.hh" #include "arch/generic/types.hh" +#include "arch/generic/vec_reg.hh" #include "base/types.hh" namespace AlphaISA { @@ -56,6 +57,15 @@ typedef uint64_t MiscReg; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; +// dummy typedefs since we don't have vector regs +constexpr unsigned NumVecElemPerVecReg = 2; +using VecElem = uint32_t; +using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>; +using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>; +using VecRegContainer = VecReg::Container; +// This has to be one to prevent warnings that are treated as errors +constexpr unsigned NumVecRegs = 1; + union AnyReg { IntReg intreg; diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py index 73ef4a09d..7956570bd 100644 --- a/src/arch/arm/ArmISA.py +++ b/src/arch/arm/ArmISA.py @@ -41,6 +41,7 @@ from m5.proxy import * from m5.SimObject import SimObject from ArmPMU import ArmPMU +from ISACommon import VecRegRenameMode # Enum for DecoderFlavour class DecoderFlavour(Enum): vals = ['Generic'] @@ -86,6 +87,10 @@ class ArmISA(SimObject): id_aa64afr1_el1 = Param.UInt64(0x0000000000000000, "AArch64 Auxiliary Feature Register 1") + # Initial vector register rename mode + vecRegRenameMode = Param.VecRegRenameMode('Full', + "Initial rename mode for vecregs") + # 1 CTX CMPs | 2 WRPs | 2 BRPs | !PMU | !Trace | Debug v8-A id_aa64dfr0_el1 = Param.UInt64(0x0000000000101006, "AArch64 Debug Feature Register 0") diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc index 99d1b817d..8501715d5 100644 --- a/src/arch/arm/insts/static_inst.cc +++ b/src/arch/arm/insts/static_inst.cc @@ -331,6 +331,12 @@ ArmStaticInst::printFloatReg(std::ostream &os, RegIndex reg_idx) const } void +ArmStaticInst::printVecReg(std::ostream &os, RegIndex reg_idx) const +{ + ccprintf(os, "v%d", reg_idx); +} + +void ArmStaticInst::printCCReg(std::ostream &os, RegIndex reg_idx) const { ccprintf(os, "cc_%s", ArmISA::ccRegName[reg_idx]); diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh index 19af99a0f..486d30fe4 100644 --- a/src/arch/arm/insts/static_inst.hh +++ b/src/arch/arm/insts/static_inst.hh @@ -157,6 +157,7 @@ class ArmStaticInst : public StaticInst /// dependence tag number (FP or int). void printIntReg(std::ostream &os, RegIndex reg_idx) const; void printFloatReg(std::ostream &os, RegIndex reg_idx) const; + void printVecReg(std::ostream &os, RegIndex reg_idx) const; void printCCReg(std::ostream &os, RegIndex reg_idx) const; void printMiscReg(std::ostream &os, RegIndex reg_idx) const; void printMnemonic(std::ostream &os, diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index c54d7746d..a490e5fb7 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -209,6 +209,7 @@ ISA::ISA(Params *p) : SimObject(p), system(NULL), _decoderFlavour(p->decoderFlavour), + _vecRegRenameMode(p->vecRegRenameMode), pmu(p->pmu), lookUpMiscReg(NUM_MISCREGS, {0,0}) { diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index 8de90dc93..e96de7922 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -48,7 +48,9 @@ #include "arch/arm/system.hh" #include "arch/arm/tlb.hh" #include "arch/arm/types.hh" +#include "arch/generic/traits.hh" #include "debug/Checkpoint.hh" +#include "enums/VecRegRenameMode.hh" #include "sim/sim_object.hh" #include "enums/DecoderFlavour.hh" @@ -68,6 +70,7 @@ namespace ArmISA // Micro Architecture const Enums::DecoderFlavour _decoderFlavour; + const Enums::VecRegRenameMode _vecRegRenameMode; /** Dummy device for to handle non-existing ISA devices */ DummyISADevice dummyDevice; @@ -185,6 +188,10 @@ namespace ArmISA return RegId(IntRegClass, flattenIntIndex(regId.index())); case FloatRegClass: return RegId(FloatRegClass, flattenFloatIndex(regId.index())); + case VecRegClass: + return RegId(VecRegClass, flattenVecIndex(regId.index())); + case VecElemClass: + return RegId(VecElemClass, flattenVecElemIndex(regId.index())); case CCRegClass: return RegId(CCRegClass, flattenCCIndex(regId.index())); case MiscRegClass: @@ -233,6 +240,20 @@ namespace ArmISA } int + flattenVecIndex(int reg) const + { + assert(reg >= 0); + return reg; + } + + int + flattenVecElemIndex(int reg) const + { + assert(reg >= 0); + return reg; + } + + int flattenCCIndex(int reg) const { assert(reg >= 0); @@ -406,6 +427,12 @@ namespace ArmISA Enums::DecoderFlavour decoderFlavour() const { return _decoderFlavour; } + Enums::VecRegRenameMode + vecRegRenameMode() const + { + return _vecRegRenameMode; + } + /// Explicitly import the otherwise hidden startup using SimObject::startup; @@ -417,4 +444,17 @@ namespace ArmISA }; } +template<> +struct initRenameMode<ArmISA::ISA> +{ + static Enums::VecRegRenameMode mode(const ArmISA::ISA* isa) + { + return isa->vecRegRenameMode(); + } + static bool equals(const ArmISA::ISA* isa1, const ArmISA::ISA* isa2) + { + return mode(isa1) == mode(isa2); + } +}; + #endif diff --git a/src/arch/arm/nativetrace.cc b/src/arch/arm/nativetrace.cc index fcb13fb2a..395232e00 100644 --- a/src/arch/arm/nativetrace.cc +++ b/src/arch/arm/nativetrace.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2011, 2014 ARM Limited + * Copyright (c) 2010-2011, 2014, 2016 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -125,10 +125,10 @@ Trace::ArmNativeTrace::ThreadState::update(ThreadContext *tc) newState[STATE_CPSR] = cpsr; changed[STATE_CPSR] = (newState[STATE_CPSR] != oldState[STATE_CPSR]); - for (int i = 0; i < NumFloatV7ArchRegs; i += 2) { - newState[STATE_F0 + (i >> 1)] = - static_cast<uint64_t>(tc->readFloatRegBits(i + 1)) << 32 | - tc->readFloatRegBits(i); + for (int i = 0; i < NumVecV7ArchRegs; i++) { + auto vec(tc->readVecReg(RegId(VecRegClass,i)).as<uint64_t, 2>()); + newState[STATE_F0 + 2*i] = vec[0]; + newState[STATE_F0 + 2*i + 1] = vec[1]; } newState[STATE_FPSCR] = tc->readMiscRegNoEffect(MISCREG_FPSCR) | tc->readCCReg(CCREG_FP); diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh index 2e1ad1881..0a617e4dc 100644 --- a/src/arch/arm/registers.hh +++ b/src/arch/arm/registers.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2011, 2014 ARM Limited + * Copyright (c) 2010-2011, 2014, 2016 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -43,10 +43,11 @@ #ifndef __ARCH_ARM_REGISTERS_HH__ #define __ARCH_ARM_REGISTERS_HH__ +#include "arch/arm/ccregs.hh" #include "arch/arm/generated/max_inst_regs.hh" #include "arch/arm/intregs.hh" -#include "arch/arm/ccregs.hh" #include "arch/arm/miscregs.hh" +#include "arch/generic/vec_reg.hh" namespace ArmISA { @@ -64,6 +65,13 @@ typedef uint64_t IntReg; typedef uint32_t FloatRegBits; typedef float FloatReg; +// Number of VecElem per Vector Register, computed based on the vector length +constexpr unsigned NumVecElemPerVecReg = 4; +using VecElem = uint32_t; +using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>; +using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>; +using VecRegContainer = VecReg::Container; + // cop-0/cop-1 system control register typedef uint64_t MiscReg; @@ -76,15 +84,19 @@ const int NumIntArchRegs = NUM_ARCH_INTREGS; const int NumFloatV7ArchRegs = 64; const int NumFloatV8ArchRegs = 128; const int NumFloatSpecialRegs = 32; +const int NumVecV7ArchRegs = 64; +const int NumVecV8ArchRegs = 32; +const int NumVecSpecialRegs = 8; const int NumIntRegs = NUM_INTREGS; const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs; +const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs; const int NumCCRegs = NUM_CCREGS; const int NumMiscRegs = NUM_MISCREGS; #define ISA_HAS_CC_REGS -const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumMiscRegs; +const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumVecRegs + NumMiscRegs; // semantically meaningful register indices const int ReturnValueReg = 0; diff --git a/src/arch/arm/remote_gdb.cc b/src/arch/arm/remote_gdb.cc index eefe62b42..d934d53d3 100644 --- a/src/arch/arm/remote_gdb.cc +++ b/src/arch/arm/remote_gdb.cc @@ -1,7 +1,7 @@ /* * Copyright 2015 LabWare * Copyright 2014 Google Inc. - * Copyright (c) 2010, 2013 ARM Limited + * Copyright (c) 2010, 2013, 2016 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -212,6 +212,10 @@ RemoteGDB::AArch64GdbRegCache::getRegs(ThreadContext *context) r.v[i + 2] = context->readFloatRegBits(i + 0); r.v[i + 3] = context->readFloatRegBits(i + 1); } + + for (int i = 0; i < 32; i ++) { + r.vec[i] = context->readVecReg(RegId(VecRegClass,i)); + } } void @@ -234,6 +238,10 @@ RemoteGDB::AArch64GdbRegCache::setRegs(ThreadContext *context) const context->setFloatRegBits(i + 0, r.v[i + 2]); context->setFloatRegBits(i + 1, r.v[i + 3]); } + + for (int i = 0; i < 32; i ++) { + context->setVecReg(RegId(VecRegClass, i), r.vec[i]); + } } void diff --git a/src/arch/arm/remote_gdb.hh b/src/arch/arm/remote_gdb.hh index acd6f32d2..328fbadb3 100644 --- a/src/arch/arm/remote_gdb.hh +++ b/src/arch/arm/remote_gdb.hh @@ -1,7 +1,7 @@ /* * Copyright 2015 LabWare * Copyright 2014 Google, Inc. - * Copyright (c) 2013 ARM Limited + * Copyright (c) 2013, 2016 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -51,6 +51,7 @@ #include <algorithm> #include "arch/arm/utility.hh" +#include "arch/generic/vec_reg.hh" #include "base/remote_gdb.hh" class System; @@ -96,6 +97,7 @@ class RemoteGDB : public BaseRemoteGDB uint64_t pc; uint64_t cpsr; uint32_t v[32*4]; + ArmISA::VecRegContainer vec[32]; } r; public: char *data() const { return (char *)&r; } diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc index 1437801a2..a58ca8111 100644 --- a/src/arch/arm/utility.cc +++ b/src/arch/arm/utility.cc @@ -154,6 +154,9 @@ copyRegs(ThreadContext *src, ThreadContext *dest) for (int i = 0; i < NumFloatRegs; i++) dest->setFloatRegFlat(i, src->readFloatRegFlat(i)); + for (int i = 0; i < NumVecRegs; i++) + dest->setVecRegFlat(i, src->readVecRegFlat(i)); + for (int i = 0; i < NumCCRegs; i++) dest->setCCReg(i, src->readCCReg(i)); diff --git a/src/arch/generic/ISACommon.py b/src/arch/generic/ISACommon.py new file mode 100644 index 000000000..7777dc27e --- /dev/null +++ b/src/arch/generic/ISACommon.py @@ -0,0 +1,50 @@ +# Copyright (c) 2016 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Rekai Gonzalez + +from m5.params import * +from m5.proxy import * +from m5.SimObject import SimObject + +class VecRegRenameMode(Enum): + '''Enum for Rename Mode in rename map + Elem: Each native-elem in a vector register is renamed independently. + Full: Vectors are renamed as one unit.''' + + vals = ['Full', 'Elem'] + + +__all__ = ['VecRegRenameMode'] diff --git a/src/arch/generic/SConscript b/src/arch/generic/SConscript index c87ad671f..7123eaf4a 100644 --- a/src/arch/generic/SConscript +++ b/src/arch/generic/SConscript @@ -1,3 +1,15 @@ +# Copyright (c) 2016 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# # Copyright (c) 2012 Google # All rights reserved. # @@ -36,6 +48,7 @@ Source('mmapped_ipr.cc') Source('tlb.cc') SimObject('BaseTLB.py') +SimObject('ISACommon.py') DebugFlag('TLB') Source('pseudo_inst.cc') diff --git a/src/arch/generic/traits.hh b/src/arch/generic/traits.hh new file mode 100644 index 000000000..3dc6b30ee --- /dev/null +++ b/src/arch/generic/traits.hh @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Rekai Gonzalez + */ + +/* Auxiliary structs for architecture traits. */ + +#ifndef __ARCH_COMMON_TRAITS_HH__ +#define __ARCH_COMMON_TRAITS_HH__ + +#include "enums/VecRegRenameMode.hh" + +/** Helper structure to get the vector register mode for a given ISA. + * This way we implement a default 'full' mode, and only those ISA that care + * have to actually specialise the template to forward the call to the + * appropriate member of the ISA. + */ +template <typename ISA> +struct initRenameMode +{ + static Enums::VecRegRenameMode mode(const ISA*) { return Enums::Full; } + /** + * Compare the initial rename mode of two instances of the ISA. + * Result is true by definition, as the default mode is Full. + * */ + static bool equals(const ISA*, const ISA*) { return true; } +}; + +#endif /* __ARCH_COMMON_TRAITS_HH__ */ diff --git a/src/arch/generic/types.hh b/src/arch/generic/types.hh index bb6eafd66..353112913 100644 --- a/src/arch/generic/types.hh +++ b/src/arch/generic/types.hh @@ -40,6 +40,9 @@ // Logical register index type. typedef uint16_t RegIndex; +/** Logical vector register elem index type. */ +using ElemIndex = uint16_t; + namespace GenericISA { diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py index 610197e38..759b50c0d 100755 --- a/src/arch/isa_parser.py +++ b/src/arch/isa_parser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2014 ARM Limited +# Copyright (c) 2014, 2016 ARM Limited # All rights reserved # # The license below extends only to copyright in the software and shall @@ -865,8 +865,8 @@ class OperandList(object): op_desc = self.find_base(op_base) if op_desc: if op_desc.ext != op_ext: - error('Inconsistent extensions for operand %s' % \ - op_base) + error ('Inconsistent extensions for operand %s' % \ + op_base) op_desc.is_src = op_desc.is_src or is_src op_desc.is_dest = op_desc.is_dest or is_dest else: diff --git a/src/arch/mips/isa.hh b/src/arch/mips/isa.hh index c751cb168..c38b7cec0 100644 --- a/src/arch/mips/isa.hh +++ b/src/arch/mips/isa.hh @@ -180,6 +180,18 @@ namespace MipsISA return reg; } + int + flattenVecIndex(int reg) const + { + return reg; + } + + int + flattenVecElemIndex(int reg) const + { + return reg; + } + // dummy int flattenCCIndex(int reg) const diff --git a/src/arch/mips/registers.hh b/src/arch/mips/registers.hh index c7cdb6522..f5dd325cd 100644 --- a/src/arch/mips/registers.hh +++ b/src/arch/mips/registers.hh @@ -32,6 +32,7 @@ #ifndef __ARCH_MIPS_REGISTERS_HH__ #define __ARCH_MIPS_REGISTERS_HH__ +#include "arch/generic/vec_reg.hh" #include "arch/mips/generated/max_inst_regs.hh" #include "base/misc.hh" #include "base/types.hh" @@ -289,6 +290,15 @@ typedef uint64_t MiscReg; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; +// dummy typedefs since we don't have vector regs +constexpr unsigned NumVecElemPerVecReg = 2; +using VecElem = uint32_t; +using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>; +using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>; +using VecRegContainer = VecReg::Container; +// This has to be one to prevent warnings that are treated as errors +constexpr unsigned NumVecRegs = 1; + typedef union { IntReg intreg; FloatReg fpreg; diff --git a/src/arch/null/registers.hh b/src/arch/null/registers.hh index 6d1ecf1c5..3d27d95a2 100644 --- a/src/arch/null/registers.hh +++ b/src/arch/null/registers.hh @@ -40,6 +40,7 @@ #ifndef __ARCH_NULL_REGISTERS_HH__ #define __ARCH_NULL_REGISTERS_HH__ +#include "arch/generic/vec_reg.hh" #include "arch/types.hh" #include "base/types.hh" @@ -52,6 +53,15 @@ typedef uint8_t CCReg; typedef uint64_t MiscReg; const RegIndex ZeroReg = 0; +// dummy typedefs since we don't have vector regs +constexpr unsigned NumVecElemPerVecReg = 2; +using VecElem = uint32_t; +using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>; +using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>; +using VecRegContainer = VecReg::Container; +// This has to be one to prevent warnings that are treated as errors +constexpr unsigned NumVecRegs = 1; + } #endif // __ARCH_NULL_REGISTERS_HH__ diff --git a/src/arch/power/isa.hh b/src/arch/power/isa.hh index 475b4d2f8..edac96d59 100644 --- a/src/arch/power/isa.hh +++ b/src/arch/power/isa.hh @@ -101,6 +101,18 @@ class ISA : public SimObject return reg; } + int + flattenVecIndex(int reg) const + { + return reg; + } + + int + flattenVecElemIndex(int reg) const + { + return reg; + } + // dummy int flattenCCIndex(int reg) const diff --git a/src/arch/power/registers.hh b/src/arch/power/registers.hh index 742809db1..4e8c9e9f4 100644 --- a/src/arch/power/registers.hh +++ b/src/arch/power/registers.hh @@ -31,6 +31,7 @@ #ifndef __ARCH_POWER_REGISTERS_HH__ #define __ARCH_POWER_REGISTERS_HH__ +#include "arch/generic/vec_reg.hh" #include "arch/power/generated/max_inst_regs.hh" #include "arch/power/miscregs.hh" @@ -53,6 +54,15 @@ typedef uint64_t MiscReg; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; +// dummy typedefs since we don't have vector regs +constexpr unsigned NumVecElemPerVecReg = 2; +using VecElem = uint32_t; +using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>; +using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>; +using VecRegContainer = VecReg::Container; +// This has to be one to prevent warnings that are treated as errors +constexpr unsigned NumVecRegs = 1; + // Constants Related to the number of registers const int NumIntArchRegs = 32; diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh index 3f2412303..578057aa0 100644 --- a/src/arch/riscv/isa.hh +++ b/src/arch/riscv/isa.hh @@ -97,6 +97,18 @@ class ISA : public SimObject return reg; } + int + flattenVecIndex(int reg) const + { + return reg; + } + + int + flattenVecElemIndex(int reg) const + { + return reg; + } + // dummy int flattenCCIndex(int reg) const diff --git a/src/arch/riscv/registers.hh b/src/arch/riscv/registers.hh index 2666784e5..6ae1c1691 100644 --- a/src/arch/riscv/registers.hh +++ b/src/arch/riscv/registers.hh @@ -51,6 +51,7 @@ #include <string> #include "arch/generic/types.hh" +#include "arch/generic/vec_reg.hh" #include "arch/isa_traits.hh" #include "arch/riscv/generated/max_inst_regs.hh" #include "base/types.hh" @@ -67,10 +68,19 @@ typedef double FloatReg; typedef uint8_t CCReg; // Not applicable to Riscv typedef uint64_t MiscReg; +// dummy typedefs since we don't have vector regs +const unsigned NumVecElemPerVecReg = 2; +using VecElem = uint32_t; +using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>; +using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>; +using VecRegContainer = VecReg::Container; + const int NumIntArchRegs = 32; const int NumMicroIntRegs = 1; const int NumIntRegs = NumIntArchRegs + NumMicroIntRegs; const int NumFloatRegs = 32; +// This has to be one to prevent warnings that are treated as errors +const unsigned NumVecRegs = 1; const int NumCCRegs = 0; const int NumMiscRegs = 4096; diff --git a/src/arch/sparc/isa.hh b/src/arch/sparc/isa.hh index ded5b34ff..82fee0d00 100644 --- a/src/arch/sparc/isa.hh +++ b/src/arch/sparc/isa.hh @@ -202,6 +202,8 @@ class ISA : public SimObject return RegId(CCRegClass, flattenCCIndex(regId.index())); case MiscRegClass: return RegId(MiscRegClass, flattenMiscIndex(regId.index())); + default: + break; } return regId; } @@ -221,6 +223,18 @@ class ISA : public SimObject return reg; } + int + flattenVecIndex(int reg) const + { + return reg; + } + + int + flattenVecElemIndex(int reg) const + { + return reg; + } + // dummy int flattenCCIndex(int reg) const diff --git a/src/arch/sparc/registers.hh b/src/arch/sparc/registers.hh index 62c876f3d..596fdf4d0 100644 --- a/src/arch/sparc/registers.hh +++ b/src/arch/sparc/registers.hh @@ -32,6 +32,7 @@ #ifndef __ARCH_SPARC_REGISTERS_HH__ #define __ARCH_SPARC_REGISTERS_HH__ +#include "arch/generic/vec_reg.hh" #include "arch/sparc/generated/max_inst_regs.hh" #include "arch/sparc/miscregs.hh" #include "arch/sparc/sparc_traits.hh" @@ -52,6 +53,15 @@ typedef uint32_t FloatRegBits; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; +// dummy typedefs since we don't have vector regs +constexpr unsigned NumVecElemPerVecReg = 2; +using VecElem = uint32_t; +using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>; +using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>; +using VecRegContainer = VecReg::Container; +// This has to be one to prevent warnings that are treated as errors +constexpr unsigned NumVecRegs = 1; + typedef union { IntReg intReg; diff --git a/src/arch/x86/isa.hh b/src/arch/x86/isa.hh index 099d27c7c..b61face09 100644 --- a/src/arch/x86/isa.hh +++ b/src/arch/x86/isa.hh @@ -82,6 +82,8 @@ namespace X86ISA return RegId(CCRegClass, flattenCCIndex(regId.index())); case MiscRegClass: return RegId(MiscRegClass, flattenMiscIndex(regId.index())); + default: + break; } return regId; } @@ -103,6 +105,18 @@ namespace X86ISA } int + flattenVecIndex(int reg) const + { + return reg; + } + + int + flattenVecElemIndex(int reg) const + { + return reg; + } + + int flattenCCIndex(int reg) const { return reg; diff --git a/src/arch/x86/registers.hh b/src/arch/x86/registers.hh index d23731977..9db3349f0 100644 --- a/src/arch/x86/registers.hh +++ b/src/arch/x86/registers.hh @@ -41,6 +41,7 @@ #ifndef __ARCH_X86_REGISTERS_HH__ #define __ARCH_X86_REGISTERS_HH__ +#include "arch/generic/vec_reg.hh" #include "arch/x86/generated/max_inst_regs.hh" #include "arch/x86/regs/int.hh" #include "arch/x86/regs/ccr.hh" @@ -93,6 +94,15 @@ typedef uint64_t IntReg; typedef uint64_t CCReg; typedef uint64_t MiscReg; +// dummy typedefs since we don't have vector regs +constexpr unsigned NumVecElemPerVecReg = 2; +using VecElem = uint32_t; +using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>; +using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>; +using VecRegContainer = VecReg::Container; +// This has to be one to prevent warnings that are treated as errors +constexpr unsigned NumVecRegs = 1; + //These floating point types are correct for mmx, but not //technically for x87 (80 bits) or at all for xmm (128 bits) typedef double FloatReg; diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index a8e619cd9..132c390b3 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -48,8 +48,8 @@ #include <array> #include <bitset> +#include <deque> #include <list> -#include <queue> #include <string> #include "arch/generic/tlb.hh" @@ -82,6 +82,7 @@ class BaseDynInst : public ExecContext, public RefCounted // Typedef for the CPU. typedef typename Impl::CPUType ImplCPU; typedef typename ImplCPU::ImplState ImplState; + using VecRegContainer = TheISA::VecRegContainer; // The DynInstPtr type. typedef typename Impl::DynInstPtr DynInstPtr; @@ -591,6 +592,10 @@ class BaseDynInst : public ExecContext, public RefCounted int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); } int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } int8_t numCCDestRegs() const { return staticInst->numCCDestRegs(); } + int8_t numVecDestRegs() const { return staticInst->numVecDestRegs(); } + int8_t numVecElemDestRegs() const { + return staticInst->numVecElemDestRegs(); + } /** Returns the logical register index of the i'th destination register. */ const RegId& destRegIdx(int i) const { return staticInst->destRegIdx(i); } @@ -615,6 +620,8 @@ class BaseDynInst : public ExecContext, public RefCounted } /** Pushes a result onto the instResult queue. */ + /** @{ */ + /** Scalar result. */ template<typename T> void setScalarResult(T&& t) { @@ -624,6 +631,27 @@ class BaseDynInst : public ExecContext, public RefCounted } } + /** Full vector result. */ + template<typename T> + void setVecResult(T&& t) + { + if (instFlags[RecordResult]) { + instResult.push(InstResult(std::forward<T>(t), + InstResult::ResultType::VecReg)); + } + } + + /** Vector element result. */ + template<typename T> + void setVecElemResult(T&& t) + { + if (instFlags[RecordResult]) { + instResult.push(InstResult(std::forward<T>(t), + InstResult::ResultType::VecElem)); + } + } + /** @} */ + /** Records an integer register being set to a value. */ void setIntRegOperand(const StaticInst *si, int idx, IntReg val) { @@ -642,6 +670,13 @@ class BaseDynInst : public ExecContext, public RefCounted setScalarResult(val); } + /** Record a vector register being set to a value */ + void setVecRegOperand(const StaticInst *si, int idx, + const VecRegContainer& val) + { + setVecResult(val); + } + /** Records an fp register being set to an integer value. */ void setFloatRegOperandBits(const StaticInst *si, int idx, FloatRegBits val) @@ -649,6 +684,12 @@ class BaseDynInst : public ExecContext, public RefCounted setScalarResult(val); } + /** Record a vector register being set to a value */ + void setVecElemOperand(const StaticInst *si, int idx, const VecElem val) + { + setVecElemResult(val); + } + /** Records that one of the source registers is ready. */ void markSrcRegReady(); diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index 6571d034a..213106bd2 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -96,6 +96,7 @@ class CheckerCPU : public BaseCPU, public ExecContext typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscReg MiscReg; + using VecRegContainer = TheISA::VecRegContainer; /** id attached to all issued requests */ MasterID masterId; @@ -225,6 +226,111 @@ class CheckerCPU : public BaseCPU, public ExecContext return thread->readFloatRegBits(reg.index()); } + /** + * Read source vector register operand. + */ + const VecRegContainer& readVecRegOperand(const StaticInst *si, + int idx) const override + { + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecReg()); + return thread->readVecReg(reg); + } + + /** + * Read destination vector register operand for modification. + */ + VecRegContainer& getWritableVecRegOperand(const StaticInst *si, + int idx) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + return thread->getWritableVecReg(reg); + } + + /** Vector Register Lane Interfaces. */ + /** @{ */ + /** Reads source vector 8bit operand. */ + virtual ConstVecLane8 + readVec8BitLaneOperand(const StaticInst *si, int idx) const + override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + return thread->readVec8BitLaneReg(reg); + } + + /** Reads source vector 16bit operand. */ + virtual ConstVecLane16 + readVec16BitLaneOperand(const StaticInst *si, int idx) const + override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + return thread->readVec16BitLaneReg(reg); + } + + /** Reads source vector 32bit operand. */ + virtual ConstVecLane32 + readVec32BitLaneOperand(const StaticInst *si, int idx) const + override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + return thread->readVec32BitLaneReg(reg); + } + + /** Reads source vector 64bit operand. */ + virtual ConstVecLane64 + readVec64BitLaneOperand(const StaticInst *si, int idx) const + override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + return thread->readVec64BitLaneReg(reg); + } + + /** Write a lane of the destination vector operand. */ + template <typename LD> + void + setVecLaneOperandT(const StaticInst *si, int idx, const LD& val) + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + return thread->setVecLane(reg, val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::Byte>& val) override + { + setVecLaneOperandT(si, idx, val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::TwoByte>& val) override + { + setVecLaneOperandT(si, idx, val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::FourByte>& val) override + { + setVecLaneOperandT(si, idx, val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::EightByte>& val) override + { + setVecLaneOperandT(si, idx, val); + } + /** @} */ + + VecElem readVecElemOperand(const StaticInst *si, int idx) const override + { + const RegId& reg = si->srcRegIdx(idx); + return thread->readVecElem(reg); + } + CCReg readCCRegOperand(const StaticInst *si, int idx) override { const RegId& reg = si->srcRegIdx(idx); @@ -239,6 +345,20 @@ class CheckerCPU : public BaseCPU, public ExecContext InstResult::ResultType::Scalar)); } + template<typename T> + void setVecResult(T&& t) + { + result.push(InstResult(std::forward<T>(t), + InstResult::ResultType::VecReg)); + } + + template<typename T> + void setVecElemResult(T&& t) + { + result.push(InstResult(std::forward<T>(t), + InstResult::ResultType::VecElem)); + } + void setIntRegOperand(const StaticInst *si, int idx, IntReg val) override { @@ -274,6 +394,24 @@ class CheckerCPU : public BaseCPU, public ExecContext setScalarResult((uint64_t)val); } + void setVecRegOperand(const StaticInst *si, int idx, + const VecRegContainer& val) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + thread->setVecReg(reg, val); + setVecResult(val); + } + + void setVecElemOperand(const StaticInst *si, int idx, + const VecElem val) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecElem()); + thread->setVecElem(reg, val); + setVecElemResult(val); + } + bool readPredicate() override { return thread->readPredicate(); } void setPredicate(bool val) override { diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index ed86aec84..d81858c14 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -486,6 +486,7 @@ Checker<Impl>::validateExecution(DynInstPtr &inst) int idx = -1; bool result_mismatch = false; bool scalar_mismatch = false; + bool vector_mismatch = false; if (inst->isUnverifiable()) { // Unverifiable instructions assume they were executed @@ -503,8 +504,10 @@ Checker<Impl>::validateExecution(DynInstPtr &inst) if (checker_val != inst_val) { result_mismatch = true; idx = i; - scalar_mismatch = true; - break; + scalar_mismatch = checker_val.isScalar(); + vector_mismatch = checker_val.isVector(); + panic_if(!(scalar_mismatch || vector_mismatch), + "Unknown type of result\n"); } } } // Checker CPU checks all the saved results in the dyninst passed by @@ -610,6 +613,15 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val, panic_if(!mismatch_val.isScalar(), "Unexpected type of result"); thread->setFloatRegBits(idx.index(), mismatch_val.asInteger()); break; + case VecRegClass: + panic_if(!mismatch_val.isVector(), "Unexpected type of result"); + thread->setVecReg(idx, mismatch_val.asVector()); + break; + case VecElemClass: + panic_if(!mismatch_val.isVecElem(), + "Unexpected type of result"); + thread->setVecElem(idx, mismatch_val.asVectorElem()); + break; case CCRegClass: panic_if(!mismatch_val.isScalar(), "Unexpected type of result"); thread->setCCReg(idx.index(), mismatch_val.asInteger()); @@ -618,6 +630,8 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val, panic_if(!mismatch_val.isScalar(), "Unexpected type of result"); thread->setMiscReg(idx.index(), mismatch_val.asInteger()); break; + default: + panic("Unknown register class: %d", (int)idx.classValue()); } } start_idx++; @@ -634,6 +648,14 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val, panic_if(!res.isScalar(), "Unexpected type of result"); thread->setFloatRegBits(idx.index(), res.asInteger()); break; + case VecRegClass: + panic_if(!res.isVector(), "Unexpected type of result"); + thread->setVecReg(idx, res.asVector()); + break; + case VecElemClass: + panic_if(!res.isVecElem(), "Unexpected type of result"); + thread->setVecElem(idx, res.asVectorElem()); + break; case CCRegClass: panic_if(!res.isScalar(), "Unexpected type of result"); thread->setCCReg(idx.index(), res.asInteger()); @@ -644,6 +666,8 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val, thread->setMiscReg(idx.index(), 0); break; // else Register is out of range... + default: + panic("Unknown register class: %d", (int)idx.classValue()); } } } diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index e48f5936b..5208932de 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 ARM Limited + * Copyright (c) 2011-2012, 2016 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -215,6 +215,55 @@ class CheckerThreadContext : public ThreadContext FloatRegBits readFloatRegBits(int reg_idx) { return actualTC->readFloatRegBits(reg_idx); } + const VecRegContainer& readVecReg(const RegId& reg) const + { return actualTC->readVecReg(reg); } + + /** + * Read vector register for modification, hierarchical indexing. + */ + VecRegContainer& getWritableVecReg(const RegId& reg) + { return actualTC->getWritableVecReg(reg); } + + /** Vector Register Lane Interfaces. */ + /** @{ */ + /** Reads source vector 8bit operand. */ + ConstVecLane8 + readVec8BitLaneReg(const RegId& reg) const + { return actualTC->readVec8BitLaneReg(reg); } + + /** Reads source vector 16bit operand. */ + ConstVecLane16 + readVec16BitLaneReg(const RegId& reg) const + { return actualTC->readVec16BitLaneReg(reg); } + + /** Reads source vector 32bit operand. */ + ConstVecLane32 + readVec32BitLaneReg(const RegId& reg) const + { return actualTC->readVec32BitLaneReg(reg); } + + /** Reads source vector 64bit operand. */ + ConstVecLane64 + readVec64BitLaneReg(const RegId& reg) const + { return actualTC->readVec64BitLaneReg(reg); } + + /** Write a lane of the destination vector register. */ + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::Byte>& val) + { return actualTC->setVecLane(reg, val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::TwoByte>& val) + { return actualTC->setVecLane(reg, val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::FourByte>& val) + { return actualTC->setVecLane(reg, val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::EightByte>& val) + { return actualTC->setVecLane(reg, val); } + /** @} */ + + const VecElem& readVecElem(const RegId& reg) const + { return actualTC->readVecElem(reg); } + CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } @@ -236,6 +285,18 @@ class CheckerThreadContext : public ThreadContext checkerTC->setFloatRegBits(reg_idx, val); } + void setVecReg(const RegId& reg, const VecRegContainer& val) + { + actualTC->setVecReg(reg, val); + checkerTC->setVecReg(reg, val); + } + + void setVecElem(const RegId& reg, const VecElem& val) + { + actualTC->setVecElem(reg, val); + checkerTC->setVecElem(reg, val); + } + void setCCReg(int reg_idx, CCReg val) { actualTC->setCCReg(reg_idx, val); @@ -333,6 +394,26 @@ class CheckerThreadContext : public ThreadContext void setFloatRegBitsFlat(int idx, FloatRegBits val) { actualTC->setFloatRegBitsFlat(idx, val); } + const VecRegContainer& readVecRegFlat(int idx) const + { return actualTC->readVecRegFlat(idx); } + + /** + * Read vector register for modification, flat indexing. + */ + VecRegContainer& getWritableVecRegFlat(int idx) + { return actualTC->getWritableVecRegFlat(idx); } + + void setVecRegFlat(int idx, const VecRegContainer& val) + { actualTC->setVecRegFlat(idx, val); } + + const VecElem& readVecElemFlat(const RegIndex& idx, + const ElemIndex& elem_idx) const + { return actualTC->readVecElemFlat(idx, elem_idx); } + + void setVecElemFlat(const RegIndex& idx, + const ElemIndex& elem_idx, const VecElem& val) + { actualTC->setVecElemFlat(idx, elem_idx, val); } + CCReg readCCRegFlat(int idx) { return actualTC->readCCRegFlat(idx); } diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh index d33147240..e7f5d37ac 100644 --- a/src/cpu/exec_context.hh +++ b/src/cpu/exec_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 ARM Limited + * Copyright (c) 2014, 2016 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -79,6 +79,8 @@ class ExecContext { typedef TheISA::MiscReg MiscReg; typedef TheISA::CCReg CCReg; + using VecRegContainer = TheISA::VecRegContainer; + using VecElem = TheISA::VecElem; public: /** @@ -121,6 +123,63 @@ class ExecContext { /** @} */ + /** Vector Register Interfaces. */ + /** @{ */ + /** Reads source vector register operand. */ + virtual const VecRegContainer& + readVecRegOperand(const StaticInst *si, int idx) const = 0; + + /** Gets destination vector register operand for modification. */ + virtual VecRegContainer& + getWritableVecRegOperand(const StaticInst *si, int idx) = 0; + + /** Sets a destination vector register operand to a value. */ + virtual void + setVecRegOperand(const StaticInst *si, int idx, + const VecRegContainer& val) = 0; + /** @} */ + + /** Vector Register Lane Interfaces. */ + /** @{ */ + /** Reads source vector 8bit operand. */ + virtual ConstVecLane8 + readVec8BitLaneOperand(const StaticInst *si, int idx) const = 0; + + /** Reads source vector 16bit operand. */ + virtual ConstVecLane16 + readVec16BitLaneOperand(const StaticInst *si, int idx) const = 0; + + /** Reads source vector 32bit operand. */ + virtual ConstVecLane32 + readVec32BitLaneOperand(const StaticInst *si, int idx) const = 0; + + /** Reads source vector 64bit operand. */ + virtual ConstVecLane64 + readVec64BitLaneOperand(const StaticInst *si, int idx) const = 0; + + /** Write a lane of the destination vector operand. */ + /** @{ */ + virtual void setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::Byte>& val) = 0; + virtual void setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::TwoByte>& val) = 0; + virtual void setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::FourByte>& val) = 0; + virtual void setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::EightByte>& val) = 0; + /** @} */ + + /** Vector Elem Interfaces. */ + /** @{ */ + /** Reads an element of a vector register. */ + virtual VecElem readVecElemOperand(const StaticInst *si, + int idx) const = 0; + + /** Sets a vector register to a value. */ + virtual void setVecElemOperand(const StaticInst *si, int idx, + const VecElem val) = 0; + /** @} */ + /** * @{ * @name Condition Code Registers diff --git a/src/cpu/inst_res.hh b/src/cpu/inst_res.hh index f6f14fe19..9b6a23d95 100644 --- a/src/cpu/inst_res.hh +++ b/src/cpu/inst_res.hh @@ -43,17 +43,24 @@ #include <type_traits> #include "arch/generic/types.hh" +#include "arch/generic/vec_reg.hh" class InstResult { + using VecRegContainer = TheISA::VecRegContainer; + using VecElem = TheISA::VecElem; public: union MultiResult { uint64_t integer; double dbl; + VecRegContainer vector; + VecElem vecElem; MultiResult() {} }; enum class ResultType { Scalar, + VecElem, + VecReg, NumResultTypes, Invalid }; @@ -77,7 +84,32 @@ class InstResult { result.dbl = i; } } + /** Vector result. */ + explicit InstResult(const VecRegContainer& v, const ResultType& t) + : type(t) { result.vector = v; } + InstResult& operator=(const InstResult& that) { + type = that.type; + switch (type) { + /* Given that misc regs are not written to, there may be invalids in + * the result stack. */ + case ResultType::Invalid: + break; + case ResultType::Scalar: + result.integer = that.result.integer; + break; + case ResultType::VecElem: + result.vecElem = that.result.vecElem; + break; + case ResultType::VecReg: + result.vector = that.result.vector; + break; + default: + panic("Assigning result from unknown result type"); + break; + } + return *this; + } /** * Result comparison * Two invalid results always differ. @@ -88,6 +120,10 @@ class InstResult { switch (type) { case ResultType::Scalar: return result.integer == that.result.integer; + case ResultType::VecElem: + return result.vecElem == that.result.vecElem; + case ResultType::VecReg: + return result.vector == that.result.vector; case ResultType::Invalid: return false; default: @@ -103,6 +139,10 @@ class InstResult { /** @{ */ /** Is this a scalar result?. */ bool isScalar() const { return type == ResultType::Scalar; } + /** Is this a vector result?. */ + bool isVector() const { return type == ResultType::VecReg; } + /** Is this a vector element result?. */ + bool isVecElem() const { return type == ResultType::VecElem; } /** Is this a valid result?. */ bool isValid() const { return type != ResultType::Invalid; } /** @} */ @@ -125,6 +165,18 @@ class InstResult { { return result.integer; } + const VecRegContainer& + asVector() const + { + panic_if(!isVector(), "Converting scalar (or invalid) to vector!!"); + return result.vector; + } + const VecElem& + asVectorElem() const + { + panic_if(!isVecElem(), "Converting scalar (or invalid) to vector!!"); + return result.vecElem; + } /** @} */ }; diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc index 1ed598833..756b214bd 100644 --- a/src/cpu/minor/dyn_inst.cc +++ b/src/cpu/minor/dyn_inst.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 ARM Limited + * Copyright (c) 2013-2014, 2016 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -153,6 +153,13 @@ printRegName(std::ostream &os, const RegId& reg) case FloatRegClass: os << 'f' << static_cast<unsigned int>(reg.index()); break; + case VecRegClass: + os << 'v' << static_cast<unsigned int>(reg.index()); + break; + case VecElemClass: + os << 'v' << static_cast<unsigned int>(reg.index()) << '[' << + static_cast<unsigned int>(reg.elemIndex()) << ']'; + break; case IntRegClass: if (reg.isZeroReg()) { os << 'z'; @@ -162,6 +169,8 @@ printRegName(std::ostream &os, const RegId& reg) break; case CCRegClass: os << 'c' << static_cast<unsigned int>(reg.index()); + default: + panic("Unknown register class: %d", (int)reg.classValue()); } } diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh index e91b7a6dd..4b3a02fca 100644 --- a/src/cpu/minor/exec_context.hh +++ b/src/cpu/minor/exec_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2014 ARM Limited + * Copyright (c) 2011-2014, 2016 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -145,6 +145,30 @@ class ExecContext : public ::ExecContext return thread.readFloatRegBits(reg.index()); } + const TheISA::VecRegContainer& + readVecRegOperand(const StaticInst *si, int idx) const override + { + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecReg()); + return thread.readVecReg(reg); + } + + TheISA::VecRegContainer& + getWritableVecRegOperand(const StaticInst *si, int idx) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + return thread.getWritableVecReg(reg); + } + + TheISA::VecElem + readVecElemOperand(const StaticInst *si, int idx) const override + { + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecReg()); + return thread.readVecElem(reg); + } + void setIntRegOperand(const StaticInst *si, int idx, IntReg val) override { @@ -171,6 +195,102 @@ class ExecContext : public ::ExecContext thread.setFloatRegBits(reg.index(), val); } + void + setVecRegOperand(const StaticInst *si, int idx, + const TheISA::VecRegContainer& val) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + thread.setVecReg(reg, val); + } + + /** Vector Register Lane Interfaces. */ + /** @{ */ + /** Reads source vector 8bit operand. */ + ConstVecLane8 + readVec8BitLaneOperand(const StaticInst *si, int idx) const + override + { + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecReg()); + return thread.readVec8BitLaneReg(reg); + } + + /** Reads source vector 16bit operand. */ + ConstVecLane16 + readVec16BitLaneOperand(const StaticInst *si, int idx) const + override + { + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecReg()); + return thread.readVec16BitLaneReg(reg); + } + + /** Reads source vector 32bit operand. */ + ConstVecLane32 + readVec32BitLaneOperand(const StaticInst *si, int idx) const + override + { + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecReg()); + return thread.readVec32BitLaneReg(reg); + } + + /** Reads source vector 64bit operand. */ + ConstVecLane64 + readVec64BitLaneOperand(const StaticInst *si, int idx) const + override + { + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecReg()); + return thread.readVec64BitLaneReg(reg); + } + + /** Write a lane of the destination vector operand. */ + template <typename LD> + void + setVecLaneOperandT(const StaticInst *si, int idx, + const LD& val) + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + return thread.setVecLane(reg, val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::Byte>& val) override + { + setVecLaneOperandT(si, idx, val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::TwoByte>& val) override + { + setVecLaneOperandT(si, idx, val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::FourByte>& val) override + { + setVecLaneOperandT(si, idx, val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::EightByte>& val) override + { + setVecLaneOperandT(si, idx, val); + } + /** @} */ + + void + setVecElemOperand(const StaticInst *si, int idx, + const TheISA::VecElem val) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + thread.setVecElem(reg, val); + } + bool readPredicate() override { diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc index e3497a5cf..c56d3b303 100644 --- a/src/cpu/minor/scoreboard.cc +++ b/src/cpu/minor/scoreboard.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 ARM Limited + * Copyright (c) 2013-2014, 2016 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -67,6 +67,16 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index) reg.index(); ret = true; break; + case VecRegClass: + scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs + + TheISA::NumFloatRegs + reg.index(); + ret = true; + break; + case VecElemClass: + scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs + + TheISA::NumFloatRegs + TheISA::NumVecRegs + reg.index(); + ret = true; + break; case CCRegClass: scoreboard_index = TheISA::NumIntRegs + reg.index(); ret = true; @@ -75,6 +85,9 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index) /* Don't bother with Misc registers */ ret = false; break; + default: + panic("Unknown register class: %d", + static_cast<int>(reg.classValue())); } } diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh index 7fe5002f9..9e42c2a6b 100644 --- a/src/cpu/minor/scoreboard.hh +++ b/src/cpu/minor/scoreboard.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 ARM Limited + * Copyright (c) 2013-2014, 2016 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -94,7 +94,7 @@ class Scoreboard : public Named Scoreboard(const std::string &name) : Named(name), numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs + - TheISA::NumFloatRegs), + TheISA::NumFloatRegs + TheISA::NumVecRegs), numResults(numRegs, 0), numUnpredictableResults(numRegs, 0), fuIndices(numRegs, 0), diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index cab2cf34f..8507ab6ff 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -1,3 +1,15 @@ +# Copyright (c) 2016 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# # Copyright (c) 2005-2007 The Regents of The University of Michigan # All rights reserved. # @@ -125,6 +137,8 @@ class DerivO3CPU(BaseCPU): # (it's a side effect of int reg renaming), so they should # never be the bottleneck here. _defaultNumPhysCCRegs = Self.numPhysIntRegs * 5 + numPhysVecRegs = Param.Unsigned(256, "Number of physical vector " + "registers") numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs, "Number of physical cc registers") numIQEntries = Param.Unsigned(64, "Number of instruction queue entries") diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index 49e153a52..f5be5a804 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -75,12 +75,18 @@ class PhysRegId : private RegId { : RegId(_regClass, _regIdx), flatIdx(_flatIdx) {} + /** Vector PhysRegId constructor (w/ elemIndex). */ + explicit PhysRegId(RegClass _regClass, PhysRegIndex _regIdx, + ElemIndex elem_idx, PhysRegIndex flat_idx) + : RegId(_regClass, _regIdx, elem_idx), flatIdx(flat_idx) { } + /** Visible RegId methods */ /** @{ */ using RegId::index; using RegId::classValue; using RegId::isZeroReg; using RegId::className; + using RegId::elemIndex; /** @} */ /** * Explicit forward methods, to prevent comparisons of PhysRegId with @@ -109,6 +115,12 @@ class PhysRegId : private RegId { /** @Return true if it is a condition-code physical register. */ bool isCCPhysReg() const { return isCCReg(); } + /** @Return true if it is a vector physical register. */ + bool isVectorPhysReg() const { return isVecReg(); } + + /** @Return true if it is a vector element physical register. */ + bool isVectorPhysElem() const { return isVecElem(); } + /** @Return true if it is a condition-code physical register. */ bool isMiscPhysReg() const { return isMiscReg(); } @@ -123,11 +135,18 @@ class PhysRegId : private RegId { /** Flat index accessor */ const PhysRegIndex& flatIndex() const { return flatIdx; } + + static PhysRegId elemId(const PhysRegId* vid, ElemIndex elem) + { + assert(vid->isVectorPhysReg()); + return PhysRegId(VecElemClass, vid->index(), elem); + } }; -// PhysRegIds only need to be created once and then we can use the following -// to work with them -typedef const PhysRegId* PhysRegIdPtr; +/** Constant pointer definition. + * PhysRegIds only need to be created once and then we can just share + * pointers */ +using PhysRegIdPtr = const PhysRegId*; /** Struct that defines the information passed from fetch to decode. */ template<class Impl> diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index a7a39b72a..c249d90ba 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012, 2014 ARM Limited + * Copyright (c) 2011-2012, 2014, 2016 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -46,6 +46,7 @@ #include "cpu/o3/cpu.hh" +#include "arch/generic/traits.hh" #include "arch/kernel_stats.hh" #include "config/the_isa.hh" #include "cpu/activity.hh" @@ -171,9 +172,14 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) iew(this, params), commit(this, params), + /* It is mandatory that all SMT threads use the same renaming mode as + * they are sharing registers and rename */ + vecMode(initRenameMode<TheISA::ISA>::mode(params->isa[0])), regFile(params->numPhysIntRegs, params->numPhysFloatRegs, - params->numPhysCCRegs), + params->numPhysVecRegs, + params->numPhysCCRegs, + vecMode), freeList(name() + ".freelist", ®File), @@ -270,6 +276,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) //Make Sure That this a Valid Architeture assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); + assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs); assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); rename.setScoreboard(&scoreboard); @@ -278,6 +285,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) // Setup the rename map for whichever stages need it. for (ThreadID tid = 0; tid < numThreads; tid++) { isa[tid] = params->isa[tid]; + assert(initRenameMode<TheISA::ISA>::equals(isa[tid], isa[0])); // Only Alpha has an FP zero register, so for other ISAs we // use an invalid FP register index to avoid special treatment @@ -287,10 +295,11 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) (THE_ISA == ALPHA_ISA) ? TheISA::ZeroReg : invalidFPReg; commitRenameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, - &freeList); + &freeList, + vecMode); renameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, - &freeList); + &freeList, vecMode); } // Initialize rename map to assign physical registers to the @@ -311,6 +320,30 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) RegId(FloatRegClass, ridx), phys_reg); } + /* Here we need two 'interfaces' the 'whole register' and the + * 'register element'. At any point only one of them will be + * active. */ + if (vecMode == Enums::Full) { + /* Initialize the full-vector interface */ + for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { + RegId rid = RegId(VecRegClass, ridx); + PhysRegIdPtr phys_reg = freeList.getVecReg(); + renameMap[tid].setEntry(rid, phys_reg); + commitRenameMap[tid].setEntry(rid, phys_reg); + } + } else { + /* Initialize the vector-element interface */ + for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { + for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg; + ++ldx) { + RegId lrid = RegId(VecElemClass, ridx, ldx); + PhysRegIdPtr phys_elem = freeList.getVecElem(); + renameMap[tid].setEntry(lrid, phys_elem); + commitRenameMap[tid].setEntry(lrid, phys_elem); + } + } + } + for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) { PhysRegIdPtr phys_reg = freeList.getCCReg(); renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); @@ -514,6 +547,16 @@ FullO3CPU<Impl>::regStats() .desc("number of floating regfile writes") .prereq(fpRegfileWrites); + vecRegfileReads + .name(name() + ".vec_regfile_reads") + .desc("number of vector regfile reads") + .prereq(vecRegfileReads); + + vecRegfileWrites + .name(name() + ".vec_regfile_writes") + .desc("number of vector regfile writes") + .prereq(vecRegfileWrites); + ccRegfileReads .name(name() + ".cc_regfile_reads") .desc("number of cc regfile reads") @@ -1257,6 +1300,32 @@ FullO3CPU<Impl>::readFloatRegBits(PhysRegIdPtr phys_reg) } template <class Impl> +auto +FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const + -> const VecRegContainer& +{ + vecRegfileReads++; + return regFile.readVecReg(phys_reg); +} + +template <class Impl> +auto +FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg) + -> VecRegContainer& +{ + vecRegfileWrites++; + return regFile.getWritableVecReg(phys_reg); +} + +template <class Impl> +auto +FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem& +{ + vecRegfileReads++; + return regFile.readVecElem(phys_reg); +} + +template <class Impl> CCReg FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg) { @@ -1290,6 +1359,22 @@ FullO3CPU<Impl>::setFloatRegBits(PhysRegIdPtr phys_reg, FloatRegBits val) template <class Impl> void +FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val) +{ + vecRegfileWrites++; + regFile.setVecReg(phys_reg, val); +} + +template <class Impl> +void +FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val) +{ + vecRegfileWrites++; + regFile.setVecElem(phys_reg, val); +} + +template <class Impl> +void FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, CCReg val) { ccRegfileWrites++; @@ -1330,6 +1415,36 @@ FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, ThreadID tid) } template <class Impl> +auto +FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const + -> const VecRegContainer& +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecRegClass, reg_idx)); + return readVecReg(phys_reg); +} + +template <class Impl> +auto +FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid) + -> VecRegContainer& +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecRegClass, reg_idx)); + return getWritableVecReg(phys_reg); +} + +template <class Impl> +auto +FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, + ThreadID tid) const -> const VecElem& +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecRegClass, reg_idx, ldx)); + return readVecElem(phys_reg); +} + +template <class Impl> CCReg FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) { @@ -1375,6 +1490,26 @@ FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid) template <class Impl> void +FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val, + ThreadID tid) +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecRegClass, reg_idx)); + setVecReg(phys_reg, val); +} + +template <class Impl> +void +FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, + const VecElem& val, ThreadID tid) +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecRegClass, reg_idx, ldx)); + setVecElem(phys_reg, val); +} + +template <class Impl> +void FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid) { ccRegfileWrites++; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index b5cbc5fe2..d78d1b9d3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013 ARM Limited + * Copyright (c) 2011-2013, 2016 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -53,6 +53,7 @@ #include <set> #include <vector> +#include "arch/generic/types.hh" #include "arch/types.hh" #include "base/statistics.hh" #include "config/the_isa.hh" @@ -103,6 +104,9 @@ class FullO3CPU : public BaseO3CPU typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::O3CPU O3CPU; + using VecElem = TheISA::VecElem; + using VecRegContainer = TheISA::VecRegContainer; + typedef O3ThreadState<Impl> ImplState; typedef O3ThreadState<Impl> Thread; @@ -417,6 +421,46 @@ class FullO3CPU : public BaseO3CPU TheISA::FloatRegBits readFloatRegBits(PhysRegIdPtr phys_reg); + const VecRegContainer& readVecReg(PhysRegIdPtr reg_idx) const; + + /** + * Read physical vector register for modification. + */ + VecRegContainer& getWritableVecReg(PhysRegIdPtr reg_idx); + + /** + * Read physical vector register lane + */ + template<typename VecElem, int LaneIdx> + VecLaneT<VecElem, true> + readVecLane(PhysRegIdPtr phys_reg) const + { + vecRegfileReads++; + return regFile.readVecLane<VecElem, LaneIdx>(phys_reg); + } + + /** + * Read physical vector register lane + */ + template<typename VecElem> + VecLaneT<VecElem, true> + readVecLane(PhysRegIdPtr phys_reg) const + { + vecRegfileReads++; + return regFile.readVecLane<VecElem>(phys_reg); + } + + /** Write a lane of the destination vector register. */ + template<typename LD> + void + setVecLane(PhysRegIdPtr phys_reg, const LD& val) + { + vecRegfileWrites++; + return regFile.setVecLane(phys_reg, val); + } + + const VecElem& readVecElem(PhysRegIdPtr reg_idx) const; + TheISA::CCReg readCCReg(PhysRegIdPtr phys_reg); void setIntReg(PhysRegIdPtr phys_reg, uint64_t val); @@ -425,6 +469,10 @@ class FullO3CPU : public BaseO3CPU void setFloatRegBits(PhysRegIdPtr phys_reg, TheISA::FloatRegBits val); + void setVecReg(PhysRegIdPtr reg_idx, const VecRegContainer& val); + + void setVecElem(PhysRegIdPtr reg_idx, const VecElem& val); + void setCCReg(PhysRegIdPtr phys_reg, TheISA::CCReg val); uint64_t readArchIntReg(int reg_idx, ThreadID tid); @@ -433,6 +481,34 @@ class FullO3CPU : public BaseO3CPU uint64_t readArchFloatRegInt(int reg_idx, ThreadID tid); + const VecRegContainer& readArchVecReg(int reg_idx, ThreadID tid) const; + /** Read architectural vector register for modification. */ + VecRegContainer& getWritableArchVecReg(int reg_idx, ThreadID tid); + + /** Read architectural vector register lane. */ + template<typename VecElem> + VecLaneT<VecElem, true> + readArchVecLane(int reg_idx, int lId, ThreadID tid) const + { + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecRegClass, reg_idx)); + return readVecLane<VecElem>(phys_reg); + } + + + /** Write a lane of the destination vector register. */ + template<typename LD> + void + setArchVecLane(int reg_idx, int lId, ThreadID tid, const LD& val) + { + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecRegClass, reg_idx)); + setVecLane(phys_reg, val); + } + + const VecElem& readArchVecElem(const RegIndex& reg_idx, + const ElemIndex& ldx, ThreadID tid) const; + TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid); /** Architectural register accessors. Looks up in the commit @@ -446,6 +522,11 @@ class FullO3CPU : public BaseO3CPU void setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid); + void setArchVecReg(int reg_idx, const VecRegContainer& val, ThreadID tid); + + void setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, + const VecElem& val, ThreadID tid); + void setArchCCReg(int reg_idx, TheISA::CCReg val, ThreadID tid); /** Sets the commit PC state of a specific thread. */ @@ -540,6 +621,9 @@ class FullO3CPU : public BaseO3CPU /** The commit stage. */ typename CPUPolicy::Commit commit; + /** The rename mode of the vector registers */ + Enums::VecRegRenameMode vecMode; + /** The register file. */ PhysRegFile regFile; @@ -722,6 +806,9 @@ class FullO3CPU : public BaseO3CPU //number of float register file accesses Stats::Scalar fpRegfileReads; Stats::Scalar fpRegfileWrites; + //number of vector register file accesses + mutable Stats::Scalar vecRegfileReads; + Stats::Scalar vecRegfileWrites; //number of CC register file accesses Stats::Scalar ccRegfileReads; Stats::Scalar ccRegfileWrites; diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index a6adb4c20..0643e7e30 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2016 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -72,6 +72,9 @@ class BaseO3DynInst : public BaseDynInst<Impl> typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + using VecRegContainer = TheISA::VecRegContainer; + using VecElem = TheISA::VecElem; + static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg; /** Misc register type. */ typedef TheISA::MiscReg MiscReg; @@ -83,9 +86,9 @@ class BaseO3DynInst : public BaseDynInst<Impl> public: /** BaseDynInst constructor given a binary instruction. */ - BaseO3DynInst(const StaticInstPtr &staticInst, const StaticInstPtr ¯oop, - TheISA::PCState pc, TheISA::PCState predPC, - InstSeqNum seq_num, O3CPU *cpu); + BaseO3DynInst(const StaticInstPtr &staticInst, const StaticInstPtr + ¯oop, TheISA::PCState pc, TheISA::PCState predPC, + InstSeqNum seq_num, O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ BaseO3DynInst(const StaticInstPtr &_staticInst, @@ -107,6 +110,11 @@ class BaseO3DynInst : public BaseDynInst<Impl> void initVars(); protected: + /** Explicitation of dependent names. */ + using BaseDynInst<Impl>::cpu; + using BaseDynInst<Impl>::_srcRegIdx; + using BaseDynInst<Impl>::_destRegIdx; + /** Values to be written to the destination misc. registers. */ std::array<MiscReg, TheISA::MaxMiscDestRegs> _destMiscRegVal; @@ -213,19 +221,30 @@ class BaseO3DynInst : public BaseDynInst<Impl> switch (original_dest_reg.classValue()) { case IntRegClass: this->setIntRegOperand(this->staticInst.get(), idx, - this->cpu->readIntReg(prev_phys_reg)); + this->cpu->readIntReg(prev_phys_reg)); break; case FloatRegClass: this->setFloatRegOperandBits(this->staticInst.get(), idx, - this->cpu->readFloatRegBits(prev_phys_reg)); + this->cpu->readFloatRegBits(prev_phys_reg)); + break; + case VecRegClass: + this->setVecRegOperand(this->staticInst.get(), idx, + this->cpu->readVecReg(prev_phys_reg)); + break; + case VecElemClass: + this->setVecElemOperand(this->staticInst.get(), idx, + this->cpu->readVecElem(prev_phys_reg)); break; case CCRegClass: this->setCCRegOperand(this->staticInst.get(), idx, - this->cpu->readCCReg(prev_phys_reg)); + this->cpu->readCCReg(prev_phys_reg)); break; case MiscRegClass: // no need to forward misc reg values break; + default: + panic("Unknown register class: %d", + (int)original_dest_reg.classValue()); } } } @@ -266,6 +285,89 @@ class BaseO3DynInst : public BaseDynInst<Impl> return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]); } + const VecRegContainer& + readVecRegOperand(const StaticInst *si, int idx) const + { + return this->cpu->readVecReg(this->_srcRegIdx[idx]); + } + + /** + * Read destination vector register operand for modification. + */ + VecRegContainer& + getWritableVecRegOperand(const StaticInst *si, int idx) + { + return this->cpu->getWritableVecReg(this->_destRegIdx[idx]); + } + + /** Vector Register Lane Interfaces. */ + /** @{ */ + /** Reads source vector 8bit operand. */ + ConstVecLane8 + readVec8BitLaneOperand(const StaticInst *si, int idx) const + { + return cpu->template readVecLane<uint8_t>(_srcRegIdx[idx]); + } + + /** Reads source vector 16bit operand. */ + ConstVecLane16 + readVec16BitLaneOperand(const StaticInst *si, int idx) const + { + return cpu->template readVecLane<uint16_t>(_srcRegIdx[idx]); + } + + /** Reads source vector 32bit operand. */ + ConstVecLane32 + readVec32BitLaneOperand(const StaticInst *si, int idx) const + { + return cpu->template readVecLane<uint32_t>(_srcRegIdx[idx]); + } + + /** Reads source vector 64bit operand. */ + ConstVecLane64 + readVec64BitLaneOperand(const StaticInst *si, int idx) const + { + return cpu->template readVecLane<uint64_t>(_srcRegIdx[idx]); + } + + /** Write a lane of the destination vector operand. */ + template <typename LD> + void + setVecLaneOperandT(const StaticInst *si, int idx, const LD& val) + { + return cpu->template setVecLane(_destRegIdx[idx], val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::Byte>& val) + { + return setVecLaneOperandT(si, idx, val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::TwoByte>& val) + { + return setVecLaneOperandT(si, idx, val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::FourByte>& val) + { + return setVecLaneOperandT(si, idx, val); + } + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::EightByte>& val) + { + return setVecLaneOperandT(si, idx, val); + } + /** @} */ + + VecElem readVecElemOperand(const StaticInst *si, int idx) const + { + return this->cpu->readVecElem(this->_srcRegIdx[idx]); + } + CCReg readCCRegOperand(const StaticInst *si, int idx) { return this->cpu->readCCReg(this->_srcRegIdx[idx]); @@ -293,6 +395,22 @@ class BaseO3DynInst : public BaseDynInst<Impl> BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val); } + void + setVecRegOperand(const StaticInst *si, int idx, + const VecRegContainer& val) + { + this->cpu->setVecReg(this->_destRegIdx[idx], val); + BaseDynInst<Impl>::setVecRegOperand(si, idx, val); + } + + void setVecElemOperand(const StaticInst *si, int idx, + const VecElem val) + { + int reg_idx = idx; + this->cpu->setVecElem(this->_destRegIdx[reg_idx], val); + BaseDynInst<Impl>::setVecElemOperand(si, idx, val); + } + void setCCRegOperand(const StaticInst *si, int idx, CCReg val) { this->cpu->setCCReg(this->_destRegIdx[idx], val); diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index 6fc6cc909..f4c26a697 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2005 The Regents of The University of Michigan * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. @@ -63,6 +75,16 @@ class SimpleFreeList /** Add a physical register to the free list */ void addReg(PhysRegIdPtr reg) { freeRegs.push(reg); } + /** Add physical registers to the free list */ + template<class InputIt> + void + addRegs(InputIt first, InputIt last) { + std::for_each(first, last, + [this](const typename InputIt::value_type& reg) { + this->freeRegs.push(®); + }); + } + /** Get the next available register from the free list */ PhysRegIdPtr getReg() { @@ -107,6 +129,15 @@ class UnifiedFreeList /** The list of free floating point registers. */ SimpleFreeList floatList; + /** The following two are exclusive interfaces. */ + /** @{ */ + /** The list of free vector registers. */ + SimpleFreeList vecList; + + /** The list of free vector element registers. */ + SimpleFreeList vecElemList; + /** @} */ + /** The list of free condition-code registers. */ SimpleFreeList ccList; @@ -146,18 +177,36 @@ class UnifiedFreeList /** Gets a free fp register. */ PhysRegIdPtr getFloatReg() { return floatList.getReg(); } + /** Gets a free vector register. */ + PhysRegIdPtr getVecReg() { return vecList.getReg(); } + + /** Gets a free vector elemenet register. */ + PhysRegIdPtr getVecElem() { return vecElemList.getReg(); } + /** Gets a free cc register. */ PhysRegIdPtr getCCReg() { return ccList.getReg(); } /** Adds a register back to the free list. */ void addReg(PhysRegIdPtr freed_reg); + /** Adds a register back to the free list. */ + template<class InputIt> + void addRegs(InputIt first, InputIt last); + /** Adds an integer register back to the free list. */ void addIntReg(PhysRegIdPtr freed_reg) { intList.addReg(freed_reg); } /** Adds a fp register back to the free list. */ void addFloatReg(PhysRegIdPtr freed_reg) { floatList.addReg(freed_reg); } + /** Adds a vector register back to the free list. */ + void addVecReg(PhysRegIdPtr freed_reg) { vecList.addReg(freed_reg); } + + /** Adds a vector element register back to the free list. */ + void addVecElem(PhysRegIdPtr freed_reg) { + vecElemList.addReg(freed_reg); + } + /** Adds a cc register back to the free list. */ void addCCReg(PhysRegIdPtr freed_reg) { ccList.addReg(freed_reg); } @@ -167,6 +216,12 @@ class UnifiedFreeList /** Checks if there are any free fp registers. */ bool hasFreeFloatRegs() const { return floatList.hasFreeRegs(); } + /** Checks if there are any free vector registers. */ + bool hasFreeVecRegs() const { return vecList.hasFreeRegs(); } + + /** Checks if there are any free vector registers. */ + bool hasFreeVecElems() const { return vecElemList.hasFreeRegs(); } + /** Checks if there are any free cc registers. */ bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); } @@ -176,10 +231,49 @@ class UnifiedFreeList /** Returns the number of free fp registers. */ unsigned numFreeFloatRegs() const { return floatList.numFreeRegs(); } + /** Returns the number of free vector registers. */ + unsigned numFreeVecRegs() const { return vecList.numFreeRegs(); } + /** Returns the number of free cc registers. */ unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); } }; +template<class InputIt> +inline void +UnifiedFreeList::addRegs(InputIt first, InputIt last) +{ + // Are there any registers to add? + if (first == last) + return; + + panic_if((first != last) && + first->classValue() != (last-1)->classValue(), + "Attempt to add mixed type regs: %s and %s", + first->className(), + (last-1)->className()); + switch (first->classValue()) { + case IntRegClass: + intList.addRegs(first, last); + break; + case FloatRegClass: + floatList.addRegs(first, last); + break; + case VecRegClass: + vecList.addRegs(first, last); + break; + case VecElemClass: + vecElemList.addRegs(first, last); + break; + case CCRegClass: + ccList.addRegs(first, last); + break; + default: + panic("Unexpected RegClass (%s)", + first->className()); + } + +} + inline void UnifiedFreeList::addReg(PhysRegIdPtr freed_reg) { @@ -194,6 +288,12 @@ UnifiedFreeList::addReg(PhysRegIdPtr freed_reg) case FloatRegClass: floatList.addReg(freed_reg); break; + case VecRegClass: + vecList.addReg(freed_reg); + break; + case VecElemClass: + vecElemList.addReg(freed_reg); + break; case CCRegClass: ccList.addReg(freed_reg); break; diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index f52cf2d6c..2b113ae04 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -98,8 +98,11 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, numThreads = params->numThreads; // Set the number of total physical registers + // As the vector registers have two addressing modes, they are added twice numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs + - params->numPhysCCRegs; + params->numPhysVecRegs + + params->numPhysVecRegs * TheISA::NumVecElemPerVecReg + + params->numPhysCCRegs; //Create an entry for each physical register within the //dependency graph. diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index ea4370f48..2f41e2ac2 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2005 The Regents of The University of Michigan * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. @@ -34,19 +46,30 @@ #include "cpu/o3/regfile.hh" #include "cpu/o3/free_list.hh" +#include "arch/generic/types.hh" +#include "cpu/o3/free_list.hh" PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, - unsigned _numPhysicalCCRegs) + unsigned _numPhysicalVecRegs, + unsigned _numPhysicalCCRegs, + VecMode vmode) : intRegFile(_numPhysicalIntRegs), floatRegFile(_numPhysicalFloatRegs), + vectorRegFile(_numPhysicalVecRegs), ccRegFile(_numPhysicalCCRegs), numPhysicalIntRegs(_numPhysicalIntRegs), numPhysicalFloatRegs(_numPhysicalFloatRegs), + numPhysicalVecRegs(_numPhysicalVecRegs), + numPhysicalVecElemRegs(_numPhysicalVecRegs * + NumVecElemPerVecReg), numPhysicalCCRegs(_numPhysicalCCRegs), totalNumRegs(_numPhysicalIntRegs + _numPhysicalFloatRegs - + _numPhysicalCCRegs) + + _numPhysicalVecRegs + + _numPhysicalVecRegs * NumVecElemPerVecReg + + _numPhysicalCCRegs), + vecMode(vmode) { PhysRegIndex phys_reg; PhysRegIndex flat_reg_idx = 0; @@ -68,6 +91,23 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, floatRegIds.emplace_back(FloatRegClass, phys_reg, flat_reg_idx++); } + // The next batch of the registers are the vector physical + // registers; put them onto the vector free list. + for (phys_reg = 0; phys_reg < numPhysicalVecRegs; phys_reg++) { + vectorRegFile[phys_reg].zero(); + vecRegIds.emplace_back(VecRegClass, phys_reg, flat_reg_idx++); + } + // The next batch of the registers are the vector element physical + // registers; they refer to the same containers as the vector + // registers, just a different (and incompatible) way to access + // them; put them onto the vector free list. + for (phys_reg = 0; phys_reg < numPhysicalVecRegs; phys_reg++) { + for (ElemIndex eIdx = 0; eIdx < NumVecElemPerVecReg; eIdx++) { + vecElemIds.emplace_back(VecElemClass, phys_reg, + eIdx, flat_reg_idx++); + } + } + // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (phys_reg = 0; phys_reg < numPhysicalCCRegs; phys_reg++) { @@ -90,20 +130,90 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList) // The initial batch of registers are the integer ones for (reg_idx = 0; reg_idx < numPhysicalIntRegs; reg_idx++) { assert(intRegIds[reg_idx].index() == reg_idx); - freeList->addIntReg(&intRegIds[reg_idx]); } + freeList->addRegs(intRegIds.begin(), intRegIds.end()); // The next batch of the registers are the floating-point physical // registers; put them onto the floating-point free list. for (reg_idx = 0; reg_idx < numPhysicalFloatRegs; reg_idx++) { assert(floatRegIds[reg_idx].index() == reg_idx); - freeList->addFloatReg(&floatRegIds[reg_idx]); } + freeList->addRegs(floatRegIds.begin(), floatRegIds.end()); + + /* The next batch of the registers are the vector physical + * registers; put them onto the vector free list. */ + for (reg_idx = 0; reg_idx < numPhysicalVecRegs; reg_idx++) { + assert(vecRegIds[reg_idx].index() == reg_idx); + for (ElemIndex elemIdx = 0; elemIdx < NumVecElemPerVecReg; elemIdx++) { + assert(vecElemIds[reg_idx * NumVecElemPerVecReg + + elemIdx].index() == reg_idx); + assert(vecElemIds[reg_idx * NumVecElemPerVecReg + + elemIdx].elemIndex() == elemIdx); + } + } + + /* depending on the mode we add the vector registers as whole units or + * as different elements. */ + if (vecMode == Enums::Full) + freeList->addRegs(vecRegIds.begin(), vecRegIds.end()); + else + freeList->addRegs(vecElemIds.begin(), vecElemIds.end()); // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (reg_idx = 0; reg_idx < numPhysicalCCRegs; reg_idx++) { assert(ccRegIds[reg_idx].index() == reg_idx); - freeList->addCCReg(&ccRegIds[reg_idx]); } + freeList->addRegs(ccRegIds.begin(), ccRegIds.end()); } + +auto +PhysRegFile::getRegElemIds(PhysRegIdPtr reg) -> IdRange +{ + panic_if(!reg->isVectorPhysReg(), + "Trying to get elems of a %s register", reg->className()); + auto idx = reg->index(); + return std::make_pair( + vecElemIds.begin() + idx * NumVecElemPerVecReg, + vecElemIds.begin() + (idx+1) * NumVecElemPerVecReg); +} + +auto +PhysRegFile::getRegIds(RegClass cls) -> IdRange +{ + switch (cls) + { + case IntRegClass: + return std::make_pair(intRegIds.begin(), intRegIds.end()); + case FloatRegClass: + return std::make_pair(floatRegIds.begin(), floatRegIds.end()); + case VecRegClass: + return std::make_pair(vecRegIds.begin(), vecRegIds.end()); + case VecElemClass: + return std::make_pair(vecElemIds.begin(), vecElemIds.end()); + case CCRegClass: + return std::make_pair(ccRegIds.begin(), ccRegIds.end()); + case MiscRegClass: + return std::make_pair(miscRegIds.begin(), miscRegIds.end()); + } + /* There is no way to make an empty iterator */ + return std::make_pair(PhysIds::const_iterator(), + PhysIds::const_iterator()); +} + +PhysRegIdPtr +PhysRegFile::getTrueId(PhysRegIdPtr reg) +{ + switch (reg->classValue()) { + case VecRegClass: + return &vecRegIds[reg->index()]; + case VecElemClass: + return &vecElemIds[reg->index() * NumVecElemPerVecReg + + reg->elemIndex()]; + default: + panic_if(!reg->isVectorPhysElem(), + "Trying to get the register of a %s register", reg->className()); + } + return nullptr; +} + diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index c353b2746..7feec933f 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2005 The Regents of The University of Michigan * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. @@ -42,6 +54,7 @@ #include "config/the_isa.hh" #include "cpu/o3/comm.hh" #include "debug/IEW.hh" +#include "enums/VecRegRenameMode.hh" class UnifiedFreeList; @@ -56,6 +69,15 @@ class PhysRegFile typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + using VecElem = TheISA::VecElem; + using VecRegContainer = TheISA::VecRegContainer; + using PhysIds = std::vector<PhysRegId>; + using VecMode = Enums::VecRegRenameMode; + public: + using IdRange = std::pair<PhysIds::const_iterator, + PhysIds::const_iterator>; + private: + static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg; typedef union { FloatReg d; @@ -70,6 +92,11 @@ class PhysRegFile std::vector<PhysFloatReg> floatRegFile; std::vector<PhysRegId> floatRegIds; + /** Vector register file. */ + std::vector<VecRegContainer> vectorRegFile; + std::vector<PhysRegId> vecRegIds; + std::vector<PhysRegId> vecElemIds; + /** Condition-code register file. */ std::vector<CCReg> ccRegFile; std::vector<PhysRegId> ccRegIds; @@ -83,18 +110,31 @@ class PhysRegFile unsigned numPhysicalIntRegs; /** - * Number of physical general purpose registers + * Number of physical floating point registers */ unsigned numPhysicalFloatRegs; /** - * Number of physical general purpose registers + * Number of physical vector registers + */ + unsigned numPhysicalVecRegs; + + /** + * Number of physical vector element registers + */ + unsigned numPhysicalVecElemRegs; + + /** + * Number of physical CC registers */ unsigned numPhysicalCCRegs; /** Total number of physical registers. */ unsigned totalNumRegs; + /** Mode in which vector registers are addressed. */ + VecMode vecMode; + public: /** * Constructs a physical register file with the specified amount of @@ -102,7 +142,10 @@ class PhysRegFile */ PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, - unsigned _numPhysicalCCRegs); + unsigned _numPhysicalVecRegs, + unsigned _numPhysicalCCRegs, + VecMode vmode + ); /** * Destructor to free resources @@ -117,6 +160,11 @@ class PhysRegFile /** @return the number of floating-point physical registers. */ unsigned numFloatPhysRegs() const { return numPhysicalFloatRegs; } + /** @return the number of vector physical registers. */ + unsigned numVecPhysRegs() const { return numPhysicalVecRegs; } + + /** @return the number of vector physical registers. */ + unsigned numVecElemPhysRegs() const { return numPhysicalVecElemRegs; } /** @return the number of condition-code physical registers. */ unsigned numCCPhysRegs() const { return numPhysicalCCRegs; } @@ -164,6 +212,68 @@ class PhysRegFile return floatRegBits; } + /** Reads a vector register. */ + const VecRegContainer& readVecReg(PhysRegIdPtr phys_reg) const + { + assert(phys_reg->isVectorPhysReg()); + + DPRINTF(IEW, "RegFile: Access to vector register %i, has " + "data %s\n", int(phys_reg->index()), + vectorRegFile[phys_reg->index()].as<VecElem>().print()); + + return vectorRegFile[phys_reg->index()]; + } + + /** Reads a vector register for modification. */ + VecRegContainer& getWritableVecReg(PhysRegIdPtr phys_reg) + { + /* const_cast for not duplicating code above. */ + return const_cast<VecRegContainer&>(readVecReg(phys_reg)); + } + + /** Reads a vector register lane. */ + template <typename VecElem, int LaneIdx> + VecLaneT<VecElem, true> + readVecLane(PhysRegIdPtr phys_reg) const + { + return readVecReg(phys_reg).laneView<VecElem, LaneIdx>(); + } + + /** Reads a vector register lane. */ + template <typename VecElem> + VecLaneT<VecElem, true> + readVecLane(PhysRegIdPtr phys_reg) const + { + return readVecReg(phys_reg).laneView<VecElem>(phys_reg->elemIndex()); + } + + /** Get a vector register lane for modification. */ + template <typename LD> + void + setVecLane(PhysRegIdPtr phys_reg, const LD& val) + { + assert(phys_reg->isVectorPhysReg()); + + DPRINTF(IEW, "RegFile: Setting vector register %i[%d] to %lx\n", + int(phys_reg->index()), phys_reg->elemIndex(), val); + + vectorRegFile[phys_reg->index()].laneView<typename LD::UnderlyingType>( + phys_reg->elemIndex()) = val; + } + + /** Reads a vector element. */ + const VecElem& readVecElem(PhysRegIdPtr phys_reg) const + { + assert(phys_reg->isVectorPhysElem()); + auto ret = vectorRegFile[phys_reg->index()].as<VecElem>(); + const VecElem& val = ret[phys_reg->elemIndex()]; + DPRINTF(IEW, "RegFile: Access to element %d of vector register %i," + " has data %#x\n", phys_reg->elemIndex(), + int(phys_reg->index()), val); + + return val; + } + /** Reads a condition-code register. */ CCReg readCCReg(PhysRegIdPtr phys_reg) { @@ -207,7 +317,31 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", phys_reg->index(), (uint64_t)val); - floatRegFile[phys_reg->index()].q = val; + if (!phys_reg->isZeroReg()) + floatRegFile[phys_reg->index()].q = val; + } + + /** Sets a vector register to the given value. */ + void setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val) + { + assert(phys_reg->isVectorPhysReg()); + + DPRINTF(IEW, "RegFile: Setting vector register %i to %s\n", + int(phys_reg->index()), val.print()); + + vectorRegFile[phys_reg->index()] = val; + } + + /** Sets a vector register to the given value. */ + void setVecElem(PhysRegIdPtr phys_reg, const VecElem val) + { + assert(phys_reg->isVectorPhysElem()); + + DPRINTF(IEW, "RegFile: Setting element %d of vector register %i to" + " %#x\n", phys_reg->elemIndex(), int(phys_reg->index()), val); + + vectorRegFile[phys_reg->index()].as<VecElem>()[phys_reg->elemIndex()] = + val; } /** Sets a condition-code register to the given value. */ @@ -220,6 +354,25 @@ class PhysRegFile ccRegFile[phys_reg->index()] = val; } + + /** Get the PhysRegIds of the elems of a vector register. + * Auxiliary function to transition from Full vector mode to Elem mode. + */ + IdRange getRegElemIds(PhysRegIdPtr reg); + + /** + * Get the PhysRegIds of the elems of all vector registers. + * Auxiliary function to transition from Full vector mode to Elem mode + * and to initialise the rename map. + */ + IdRange getRegIds(RegClass cls); + + /** + * Get the true physical register id. + * As many parts work with PhysRegIdPtr, we need to be able to produce + * the pointer out of just class and register idx. + */ + PhysRegIdPtr getTrueId(PhysRegIdPtr reg); }; diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 6d3861ba6..d0f6ba13d 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -464,8 +464,6 @@ class DefaultRename /** The maximum skid buffer size. */ unsigned skidBufferMax; - PhysRegIndex maxPhysicalRegs; - /** Enum to record the source of a structure full stall. Can come from * either ROB, IQ, LSQ, and it is priortized in that order. */ @@ -515,6 +513,7 @@ class DefaultRename Stats::Scalar renameRenameLookups; Stats::Scalar intRenameLookups; Stats::Scalar fpRenameLookups; + Stats::Scalar vecRenameLookups; /** Stat for total number of committed renaming mappings. */ Stats::Scalar renameCommittedMaps; /** Stat for total number of mappings that were undone due to a squash. */ diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 9c9b030f5..b9adcdff7 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -67,9 +67,7 @@ DefaultRename<Impl>::DefaultRename(O3CPU *_cpu, DerivO3CPUParams *params) commitToRenameDelay(params->commitToRenameDelay), renameWidth(params->renameWidth), commitWidth(params->commitWidth), - numThreads(params->numThreads), - maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs - + params->numPhysCCRegs) + numThreads(params->numThreads) { if (renameWidth > Impl::MaxWidth) fatal("renameWidth (%d) is larger than compiled limit (%d),\n" @@ -182,6 +180,10 @@ DefaultRename<Impl>::regStats() .name(name() + ".fp_rename_lookups") .desc("Number of floating rename lookups") .prereq(fpRenameLookups); + vecRenameLookups + .name(name() + ".vec_rename_lookups") + .desc("Number of vector rename lookups") + .prereq(vecRenameLookups); } template <class Impl> @@ -645,6 +647,8 @@ DefaultRename<Impl>::renameInsts(ThreadID tid) // to rename to. Otherwise block. if (!renameMap[tid]->canRename(inst->numIntDestRegs(), inst->numFPDestRegs(), + inst->numVecDestRegs(), + inst->numVecElemDestRegs(), inst->numCCDestRegs())) { DPRINTF(Rename, "Blocking due to lack of free " "physical registers to rename to.\n"); @@ -1239,12 +1243,17 @@ DefaultRename<Impl>::readFreeEntries(ThreadID tid) } DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, " - "Free LQ: %i, Free SQ: %i\n", + "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i)\n", tid, freeEntries[tid].iqEntries, freeEntries[tid].robEntries, freeEntries[tid].lqEntries, - freeEntries[tid].sqEntries); + freeEntries[tid].sqEntries, + renameMap[tid]->numFreeEntries(), + renameMap[tid]->numFreeIntEntries(), + renameMap[tid]->numFreeFloatEntries(), + renameMap[tid]->numFreeVecEntries(), + renameMap[tid]->numFreeCCEntries()); DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n", tid, instsInProgress[tid]); diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index 38ccc7ec9..bde2a6921 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2005 The Regents of The University of Michigan * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. @@ -93,15 +105,92 @@ void UnifiedRenameMap::init(PhysRegFile *_regFile, RegIndex _intZeroReg, RegIndex _floatZeroReg, - UnifiedFreeList *freeList) + UnifiedFreeList *freeList, + VecMode _mode) { regFile = _regFile; + vecMode = _mode; intMap.init(TheISA::NumIntRegs, &(freeList->intList), _intZeroReg); floatMap.init(TheISA::NumFloatRegs, &(freeList->floatList), _floatZeroReg); + vecMap.init(TheISA::NumVecRegs, &(freeList->vecList), (RegIndex)-1); + + vecElemMap.init(TheISA::NumVecRegs * NVecElems, + &(freeList->vecElemList), (RegIndex)-1); + ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1); } +void +UnifiedRenameMap::switchMode(VecMode newVecMode, UnifiedFreeList* freeList) +{ + if (newVecMode == Enums::Elem && vecMode == Enums::Full) { + /* Switch to vector element rename mode. */ + /* The free list should currently be tracking full registers. */ + panic_if(freeList->hasFreeVecElems(), + "The free list is already tracking Vec elems"); + panic_if(freeList->numFreeVecRegs() != + regFile->numVecPhysRegs() - TheISA::NumVecRegs, + "The free list has lost vector registers"); + /* Split the mapping of each arch reg. */ + int reg = 0; + for (auto &e: vecMap) { + PhysRegFile::IdRange range = this->regFile->getRegElemIds(e); + uint32_t i; + for (i = 0; range.first != range.second; i++, range.first++) { + vecElemMap.setEntry(RegId(VecElemClass, reg, i), + &(*range.first)); + } + panic_if(i != NVecElems, + "Wrong name of elems: expecting %u, got %d\n", + TheISA::NumVecElemPerVecReg, i); + reg++; + } + /* Split the free regs. */ + while (freeList->hasFreeVecRegs()) { + auto vr = freeList->getVecReg(); + auto range = this->regFile->getRegElemIds(vr); + freeList->addRegs(range.first, range.second); + } + vecMode = Enums::Elem; + } else if (newVecMode == Enums::Full && vecMode == Enums::Elem) { + /* Switch to full vector register rename mode. */ + /* The free list should currently be tracking register elems. */ + panic_if(freeList->hasFreeVecRegs(), + "The free list is already tracking full Vec"); + panic_if(freeList->numFreeVecRegs() != + regFile->numVecElemPhysRegs() - TheISA::NumFloatRegs, + "The free list has lost vector register elements"); + /* To rebuild the arch regs we take the easy road: + * 1.- Stitch the elems together into vectors. + * 2.- Replace the contents of the register file with the vectors + * 3.- Set the remaining registers as free + */ + TheISA::VecRegContainer new_RF[TheISA::NumVecRegs]; + for (uint32_t i = 0; i < TheISA::NumVecRegs; i++) { + VecReg dst = new_RF[i].as<TheISA::VecElem>(); + for (uint32_t l = 0; l < NVecElems; l++) { + RegId s_rid(VecElemClass, i, l); + PhysRegIdPtr s_prid = vecElemMap.lookup(s_rid); + dst[l] = regFile->readVecElem(s_prid); + } + } + + for (uint32_t i = 0; i < TheISA::NumVecRegs; i++) { + PhysRegId pregId(VecRegClass, i, 0); + regFile->setVecReg(regFile->getTrueId(&pregId), new_RF[i]); + } + + auto range = regFile->getRegIds(VecRegClass); + freeList->addRegs(range.first + TheISA::NumVecRegs, range.second); + + /* We remove the elems from the free list. */ + while (freeList->hasFreeVecElems()) + freeList->getVecElem(); + vecMode = Enums::Full; + } +} + diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh index 028c32e3a..ab909f090 100644 --- a/src/cpu/o3/rename_map.hh +++ b/src/cpu/o3/rename_map.hh @@ -54,6 +54,7 @@ #include "cpu/o3/free_list.hh" #include "cpu/o3/regfile.hh" #include "cpu/reg_class.hh" +#include "enums/VecRegRenameMode.hh" /** * Register rename map for a single class of registers (e.g., integer @@ -68,6 +69,10 @@ class SimpleRenameMap using Arch2PhysMap = std::vector<PhysRegIdPtr>; /** The acutal arch-to-phys register map */ Arch2PhysMap map; + public: + using iterator = Arch2PhysMap::iterator; + using const_iterator = Arch2PhysMap::const_iterator; + private: /** * Pointer to the free list from which new physical registers @@ -139,6 +144,20 @@ class SimpleRenameMap /** Return the number of free entries on the associated free list. */ unsigned numFreeEntries() const { return freeList->numFreeRegs(); } + + /** Forward begin/cbegin to the map. */ + /** @{ */ + iterator begin() { return map.begin(); } + const_iterator begin() const { return map.begin(); } + const_iterator cbegin() const { return map.cbegin(); } + /** @} */ + + /** Forward end/cend to the map. */ + /** @{ */ + iterator end() { return map.end(); } + const_iterator end() const { return map.end(); } + const_iterator cend() const { return map.cend(); } + /** @} */ }; @@ -152,6 +171,8 @@ class SimpleRenameMap class UnifiedRenameMap { private: + static constexpr uint32_t NVecElems = TheISA::NumVecElemPerVecReg; + using VecReg = TheISA::VecReg; /** The integer register rename map */ SimpleRenameMap intMap; @@ -162,6 +183,15 @@ class UnifiedRenameMap /** The condition-code register rename map */ SimpleRenameMap ccMap; + /** The vector register rename map */ + SimpleRenameMap vecMap; + + /** The vector element register rename map */ + SimpleRenameMap vecElemMap; + + using VecMode = Enums::VecRegRenameMode; + VecMode vecMode; + /** * The register file object is used only to get PhysRegIdPtr * on MiscRegs, as they are stored in it. @@ -182,7 +212,8 @@ class UnifiedRenameMap void init(PhysRegFile *_regFile, RegIndex _intZeroReg, RegIndex _floatZeroReg, - UnifiedFreeList *freeList); + UnifiedFreeList *freeList, + VecMode _mode); /** * Tell rename map to get a new free physical register to remap @@ -199,6 +230,12 @@ class UnifiedRenameMap return intMap.rename(arch_reg); case FloatRegClass: return floatMap.rename(arch_reg); + case VecRegClass: + assert(vecMode == Enums::Full); + return vecMap.rename(arch_reg); + case VecElemClass: + assert(vecMode == Enums::Elem); + return vecElemMap.rename(arch_reg); case CCRegClass: return ccMap.rename(arch_reg); case MiscRegClass: @@ -232,6 +269,14 @@ class UnifiedRenameMap case FloatRegClass: return floatMap.lookup(arch_reg); + case VecRegClass: + assert(vecMode == Enums::Full); + return vecMap.lookup(arch_reg); + + case VecElemClass: + assert(vecMode == Enums::Elem); + return vecElemMap.lookup(arch_reg); + case CCRegClass: return ccMap.lookup(arch_reg); @@ -265,6 +310,16 @@ class UnifiedRenameMap assert(phys_reg->isFloatPhysReg()); return floatMap.setEntry(arch_reg, phys_reg); + case VecRegClass: + assert(phys_reg->isVectorPhysReg()); + assert(vecMode == Enums::Full); + return vecMap.setEntry(arch_reg, phys_reg); + + case VecElemClass: + assert(phys_reg->isVectorPhysElem()); + assert(vecMode == Enums::Elem); + return vecElemMap.setEntry(arch_reg, phys_reg); + case CCRegClass: assert(phys_reg->isCCPhysReg()); return ccMap.setEntry(arch_reg, phys_reg); @@ -291,18 +346,39 @@ class UnifiedRenameMap */ unsigned numFreeEntries() const { - return std::min(intMap.numFreeEntries(), floatMap.numFreeEntries()); + return std::min( + std::min(intMap.numFreeEntries(), floatMap.numFreeEntries()), + vecMode == Enums::Full ? vecMap.numFreeEntries() + : vecElemMap.numFreeEntries()); } + unsigned numFreeIntEntries() const { return intMap.numFreeEntries(); } + unsigned numFreeFloatEntries() const { return floatMap.numFreeEntries(); } + unsigned numFreeVecEntries() const + { + return vecMode == Enums::Full + ? vecMap.numFreeEntries() + : vecElemMap.numFreeEntries(); + } + unsigned numFreeCCEntries() const { return ccMap.numFreeEntries(); } + /** * Return whether there are enough registers to serve the request. */ - bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs) const + bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t vectorRegs, + uint32_t vecElemRegs, uint32_t ccRegs) const { return intRegs <= intMap.numFreeEntries() && floatRegs <= floatMap.numFreeEntries() && + vectorRegs <= vecMap.numFreeEntries() && + vecElemRegs <= vecElemMap.numFreeEntries() && ccRegs <= ccMap.numFreeEntries(); } + /** + * Set vector mode to Full or Elem. + * Ignore 'silent' modifications. + */ + void switchMode(VecMode newVecMode, UnifiedFreeList* freeList); }; diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index 161d70b28..ac4ceed02 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 ARM Limited + * Copyright (c) 2011-2012, 2016 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -194,6 +194,70 @@ class O3ThreadContext : public ThreadContext reg_idx)).index()); } + virtual const VecRegContainer& readVecReg(const RegId& id) const { + return readVecRegFlat(flattenRegId(id).index()); + } + + /** + * Read vector register operand for modification, hierarchical indexing. + */ + virtual VecRegContainer& getWritableVecReg(const RegId& id) { + return getWritableVecRegFlat(flattenRegId(id).index()); + } + + /** Vector Register Lane Interfaces. */ + /** @{ */ + /** Reads source vector 8bit operand. */ + virtual ConstVecLane8 + readVec8BitLaneReg(const RegId& id) const + { + return readVecLaneFlat<uint8_t>(flattenRegId(id).index(), + id.elemIndex()); + } + + /** Reads source vector 16bit operand. */ + virtual ConstVecLane16 + readVec16BitLaneReg(const RegId& id) const + { + return readVecLaneFlat<uint16_t>(flattenRegId(id).index(), + id.elemIndex()); + } + + /** Reads source vector 32bit operand. */ + virtual ConstVecLane32 + readVec32BitLaneReg(const RegId& id) const + { + return readVecLaneFlat<uint32_t>(flattenRegId(id).index(), + id.elemIndex()); + } + + /** Reads source vector 64bit operand. */ + virtual ConstVecLane64 + readVec64BitLaneReg(const RegId& id) const + { + return readVecLaneFlat<uint64_t>(flattenRegId(id).index(), + id.elemIndex()); + } + + /** Write a lane of the destination vector register. */ + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::Byte>& val) + { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::TwoByte>& val) + { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::FourByte>& val) + { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::EightByte>& val) + { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); } + /** @} */ + + virtual const VecElem& readVecElem(const RegId& reg) const { + return readVecElemFlat(flattenRegId(reg).index(), reg.elemIndex()); + } + virtual CCReg readCCReg(int reg_idx) { return readCCRegFlat(flattenRegId(RegId(CCRegClass, reg_idx)).index()); @@ -214,6 +278,14 @@ class O3ThreadContext : public ThreadContext reg_idx)).index(), val); } + virtual void setVecReg(const RegId& reg, const VecRegContainer& val) { + setVecRegFlat(flattenRegId(reg).index(), val); + } + + virtual void setVecElem(const RegId& reg, const VecElem& val) { + setVecElemFlat(flattenRegId(reg).index(), reg.elemIndex(), val); + } + virtual void setCCReg(int reg_idx, CCReg val) { setCCRegFlat(flattenRegId(RegId(CCRegClass, reg_idx)).index(), val); } @@ -298,6 +370,29 @@ class O3ThreadContext : public ThreadContext virtual FloatRegBits readFloatRegBitsFlat(int idx); virtual void setFloatRegBitsFlat(int idx, FloatRegBits val); + virtual const VecRegContainer& readVecRegFlat(int idx) const; + /** Read vector register operand for modification, flat indexing. */ + virtual VecRegContainer& getWritableVecRegFlat(int idx); + virtual void setVecRegFlat(int idx, const VecRegContainer& val); + + template <typename VecElem> + VecLaneT<VecElem, true> readVecLaneFlat(int idx, int lId) const + { + return cpu->template readArchVecLane<VecElem>(idx, lId, + thread->threadId()); + } + + template <typename LD> + void setVecLaneFlat(int idx, int lId, const LD& val) + { + cpu->template setArchVecLane(idx, lId, thread->threadId(), val); + } + + virtual const VecElem& readVecElemFlat(const RegIndex& idx, + const ElemIndex& elemIndex) const; + virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx, + const VecElem& val); + virtual CCReg readCCRegFlat(int idx); virtual void setCCRegFlat(int idx, CCReg val); }; diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index c3f894275..2d109aea9 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2012, 2016 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -209,6 +209,28 @@ O3ThreadContext<Impl>::readFloatRegBitsFlat(int reg_idx) } template <class Impl> +const TheISA::VecRegContainer& +O3ThreadContext<Impl>::readVecRegFlat(int reg_id) const +{ + return cpu->readArchVecReg(reg_id, thread->threadId()); +} + +template <class Impl> +TheISA::VecRegContainer& +O3ThreadContext<Impl>::getWritableVecRegFlat(int reg_id) +{ + return cpu->getWritableArchVecReg(reg_id, thread->threadId()); +} + +template <class Impl> +const TheISA::VecElem& +O3ThreadContext<Impl>::readVecElemFlat(const RegIndex& idx, + const ElemIndex& elemIndex) const +{ + return cpu->readArchVecElem(idx, elemIndex, thread->threadId()); +} + +template <class Impl> TheISA::CCReg O3ThreadContext<Impl>::readCCRegFlat(int reg_idx) { @@ -244,6 +266,24 @@ O3ThreadContext<Impl>::setFloatRegBitsFlat(int reg_idx, FloatRegBits val) template <class Impl> void +O3ThreadContext<Impl>::setVecRegFlat(int reg_idx, const VecRegContainer& val) +{ + cpu->setArchVecReg(reg_idx, val, thread->threadId()); + + conditionalSquash(); +} + +template <class Impl> +void +O3ThreadContext<Impl>::setVecElemFlat(const RegIndex& idx, + const ElemIndex& elemIndex, const VecElem& val) +{ + cpu->setArchVecElem(idx, elemIndex, val, thread->threadId()); + conditionalSquash(); +} + +template <class Impl> +void O3ThreadContext<Impl>::setCCRegFlat(int reg_idx, TheISA::CCReg val) { cpu->setArchCCReg(reg_idx, val, thread->threadId()); diff --git a/src/cpu/reg_class.cc b/src/cpu/reg_class.cc index 53a50ce8e..16c1949ee 100644 --- a/src/cpu/reg_class.cc +++ b/src/cpu/reg_class.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -33,6 +45,8 @@ const char *RegId::regClassStrings[] = { "IntRegClass", "FloatRegClass", + "VecRegClass", + "VecElemClass", "CCRegClass", "MiscRegClass" }; diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh index 05869e8fb..27bf59b19 100644 --- a/src/cpu/reg_class.hh +++ b/src/cpu/reg_class.hh @@ -39,6 +39,7 @@ * * Authors: Steve Reinhardt * Nathanael Premillieu + * Rekai Gonzalez */ #ifndef __CPU__REG_CLASS_HH__ @@ -55,6 +56,10 @@ enum RegClass { IntRegClass, ///< Integer register FloatRegClass, ///< Floating-point register + /** Vector Register. */ + VecRegClass, + /** Vector Register Native Elem lane. */ + VecElemClass, CCRegClass, ///< Condition-code register MiscRegClass ///< Control (misc) register }; @@ -75,14 +80,27 @@ class RegId { static const char* regClassStrings[]; RegClass regClass; RegIndex regIdx; + ElemIndex elemIdx; + static constexpr size_t Scale = TheISA::NumVecElemPerVecReg; public: RegId() {}; RegId(RegClass reg_class, RegIndex reg_idx) - : regClass(reg_class), regIdx(reg_idx) - {} + : regClass(reg_class), regIdx(reg_idx), elemIdx(-1) + { + panic_if(regClass == VecElemClass, + "Creating vector physical index w/o element index"); + } + + explicit RegId(RegClass reg_class, RegIndex reg_idx, ElemIndex elem_idx) + : regClass(reg_class), regIdx(reg_idx), elemIdx(elem_idx) + { + panic_if(regClass != VecElemClass, + "Creating non-vector physical index w/ element index"); + } bool operator==(const RegId& that) const { - return regClass == that.classValue() && regIdx == that.index(); + return regClass == that.classValue() && regIdx == that.index() + && elemIdx == that.elemIndex(); } bool operator!=(const RegId& that) const { @@ -94,7 +112,9 @@ class RegId { */ bool operator<(const RegId& that) const { return regClass < that.classValue() || - (regClass == that.classValue() && regIdx < that.index()); + (regClass == that.classValue() && ( + regIdx < that.index() || + (regIdx == that.index() && elemIdx < that.elemIndex()))); } /** @@ -120,11 +140,25 @@ class RegId { bool isFloatReg() const { return regClass == FloatRegClass; } /** @Return true if it is a condition-code physical register. */ + bool isVecReg() const { return regClass == VecRegClass; } + + /** @Return true if it is a condition-code physical register. */ + bool isVecElem() const { return regClass == VecElemClass; } + + /** @Return true if it is a condition-code physical register. */ bool isCCReg() const { return regClass == CCRegClass; } /** @Return true if it is a condition-code physical register. */ bool isMiscReg() const { return regClass == MiscRegClass; } + /** + * Return true if this register can be renamed + */ + bool isRenameable() + { + return regClass != MiscRegClass; + } + /** Index accessors */ /** @{ */ const RegIndex& index() const { return regIdx; } @@ -136,6 +170,8 @@ class RegId { inline RegIndex flatIndex() const; /** @} */ + /** Elem accessor */ + const RegIndex& elemIndex() const { return elemIdx; } /** Class accessor */ const RegClass& classValue() const { return regClass; } /** Return a const char* with the register class name. */ diff --git a/src/cpu/reg_class_impl.hh b/src/cpu/reg_class_impl.hh index a47328b10..98b341e86 100644 --- a/src/cpu/reg_class_impl.hh +++ b/src/cpu/reg_class_impl.hh @@ -55,13 +55,18 @@ bool RegId::isZeroReg() const regIdx == TheISA::ZeroReg)); } +static constexpr size_t Scale = TheISA::NumVecElemPerVecReg; + RegIndex RegId::flatIndex() const { switch (regClass) { case IntRegClass: case FloatRegClass: + case VecRegClass: case CCRegClass: case MiscRegClass: return regIdx; + case VecElemClass: + return Scale*regIdx + elemIdx; } panic("Trying to flatten a register without class!"); return -1; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 77d2fb4ac..57cea4ba7 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -292,6 +292,16 @@ BaseSimpleCPU::regStats() .desc("number of times the floating registers were written") ; + t_info.numVecRegReads + .name(thread_str + ".num_vec_register_reads") + .desc("number of times the vector registers were read") + ; + + t_info.numVecRegWrites + .name(thread_str + ".num_vec_register_writes") + .desc("number of times the vector registers were written") + ; + t_info.numCCRegReads .name(thread_str + ".num_cc_register_reads") .desc("number of times the CC registers were read") diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index f221d6c93..0f546407d 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 ARM Limited + * Copyright (c) 2014-2016 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -64,6 +64,8 @@ class SimpleExecContext : public ExecContext { typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + using VecRegContainer = TheISA::VecRegContainer; + using VecElem = TheISA::VecElem; public: BaseSimpleCPU *cpu; @@ -112,6 +114,10 @@ class SimpleExecContext : public ExecContext { Stats::Scalar numFpRegReads; Stats::Scalar numFpRegWrites; + // Number of vector register file accesses + mutable Stats::Scalar numVecRegReads; + Stats::Scalar numVecRegWrites; + // Number of condition code register file accesses Stats::Scalar numCCRegReads; Stats::Scalar numCCRegWrites; @@ -219,6 +225,124 @@ class SimpleExecContext : public ExecContext { thread->setFloatRegBits(reg.index(), val); } + /** Reads a vector register. */ + const VecRegContainer& + readVecRegOperand(const StaticInst *si, int idx) const override + { + numVecRegReads++; + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecReg()); + return thread->readVecReg(reg); + } + + /** Reads a vector register for modification. */ + VecRegContainer& + getWritableVecRegOperand(const StaticInst *si, int idx) override + { + numVecRegWrites++; + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + return thread->getWritableVecReg(reg); + } + + /** Sets a vector register to a value. */ + void setVecRegOperand(const StaticInst *si, int idx, + const VecRegContainer& val) override + { + numVecRegWrites++; + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + thread->setVecReg(reg, val); + } + + /** Vector Register Lane Interfaces. */ + /** @{ */ + /** Reads source vector lane. */ + template <typename VecElem> + VecLaneT<VecElem, true> + readVecLaneOperand(const StaticInst *si, int idx) const + { + numVecRegReads++; + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecReg()); + return thread->readVecLane<VecElem>(reg); + } + /** Reads source vector 8bit operand. */ + virtual ConstVecLane8 + readVec8BitLaneOperand(const StaticInst *si, int idx) const + override + { return readVecLaneOperand<uint8_t>(si, idx); } + + /** Reads source vector 16bit operand. */ + virtual ConstVecLane16 + readVec16BitLaneOperand(const StaticInst *si, int idx) const + override + { return readVecLaneOperand<uint16_t>(si, idx); } + + /** Reads source vector 32bit operand. */ + virtual ConstVecLane32 + readVec32BitLaneOperand(const StaticInst *si, int idx) const + override + { return readVecLaneOperand<uint32_t>(si, idx); } + + /** Reads source vector 64bit operand. */ + virtual ConstVecLane64 + readVec64BitLaneOperand(const StaticInst *si, int idx) const + override + { return readVecLaneOperand<uint64_t>(si, idx); } + + /** Write a lane of the destination vector operand. */ + template <typename LD> + void + setVecLaneOperandT(const StaticInst *si, int idx, + const LD& val) + { + numVecRegWrites++; + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecReg()); + return thread->setVecLane(reg, val); + } + /** Write a lane of the destination vector operand. */ + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::Byte>& val) override + { return setVecLaneOperandT(si, idx, val); } + /** Write a lane of the destination vector operand. */ + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::TwoByte>& val) override + { return setVecLaneOperandT(si, idx, val); } + /** Write a lane of the destination vector operand. */ + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::FourByte>& val) override + { return setVecLaneOperandT(si, idx, val); } + /** Write a lane of the destination vector operand. */ + virtual void + setVecLaneOperand(const StaticInst *si, int idx, + const LaneData<LaneSize::EightByte>& val) override + { return setVecLaneOperandT(si, idx, val); } + /** @} */ + + /** Reads an element of a vector register. */ + VecElem readVecElemOperand(const StaticInst *si, int idx) const override + { + numVecRegReads++; + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecElem()); + return thread->readVecElem(reg); + } + + /** Sets an element of a vector register to a value. */ + void setVecElemOperand(const StaticInst *si, int idx, + const VecElem val) override + { + numVecRegWrites++; + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecElem()); + thread->setVecElem(reg, val); + } + CCReg readCCRegOperand(const StaticInst *si, int idx) override { numCCRegReads++; diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 286d91766..4ea8b91ba 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 ARM Limited + * Copyright (c) 2011-2012, 2016 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -58,6 +58,7 @@ #include "debug/CCRegs.hh" #include "debug/FloatRegs.hh" #include "debug/IntRegs.hh" +#include "debug/VecRegs.hh" #include "mem/page_table.hh" #include "mem/request.hh" #include "sim/byteswap.hh" @@ -102,6 +103,8 @@ class SimpleThread : public ThreadState typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + using VecRegContainer = TheISA::VecRegContainer; + using VecElem = TheISA::VecElem; public: typedef ThreadContext::Status Status; @@ -111,6 +114,7 @@ class SimpleThread : public ThreadState FloatRegBits i[TheISA::NumFloatRegs]; } floatRegs; TheISA::IntReg intRegs[TheISA::NumIntRegs]; + VecRegContainer vecRegs[TheISA::NumVecRegs]; #ifdef ISA_HAS_CC_REGS TheISA::CCReg ccRegs[TheISA::NumCCRegs]; #endif @@ -227,6 +231,9 @@ class SimpleThread : public ThreadState _pcState = 0; memset(intRegs, 0, sizeof(intRegs)); memset(floatRegs.i, 0, sizeof(floatRegs.i)); + for (int i = 0; i < TheISA::NumVecRegs; i++) { + vecRegs[i].zero(); + } #ifdef ISA_HAS_CC_REGS memset(ccRegs, 0, sizeof(ccRegs)); #endif @@ -266,6 +273,98 @@ class SimpleThread : public ThreadState return regVal; } + const VecRegContainer& + readVecReg(const RegId& reg) const + { + int flatIndex = isa->flattenVecIndex(reg.index()); + assert(flatIndex < TheISA::NumVecRegs); + const VecRegContainer& regVal = readVecRegFlat(flatIndex); + DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s.\n", + reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print()); + return regVal; + } + + VecRegContainer& + getWritableVecReg(const RegId& reg) + { + int flatIndex = isa->flattenVecIndex(reg.index()); + assert(flatIndex < TheISA::NumVecRegs); + VecRegContainer& regVal = getWritableVecRegFlat(flatIndex); + DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s for modify.\n", + reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print()); + return regVal; + } + + /** Vector Register Lane Interfaces. */ + /** @{ */ + /** Reads source vector <T> operand. */ + template <typename T> + VecLaneT<T, true> + readVecLane(const RegId& reg) const + { + int flatIndex = isa->flattenVecIndex(reg.index()); + assert(flatIndex < TheISA::NumVecRegs); + auto regVal = readVecLaneFlat<T>(flatIndex, reg.elemIndex()); + DPRINTF(VecRegs, "Reading vector lane %d (%d)[%d] as %lx.\n", + reg.index(), flatIndex, reg.elemIndex(), regVal); + return regVal; + } + + /** Reads source vector 8bit operand. */ + virtual ConstVecLane8 + readVec8BitLaneReg(const RegId& reg) const + { return readVecLane<uint8_t>(reg); } + + /** Reads source vector 16bit operand. */ + virtual ConstVecLane16 + readVec16BitLaneReg(const RegId& reg) const + { return readVecLane<uint16_t>(reg); } + + /** Reads source vector 32bit operand. */ + virtual ConstVecLane32 + readVec32BitLaneReg(const RegId& reg) const + { return readVecLane<uint32_t>(reg); } + + /** Reads source vector 64bit operand. */ + virtual ConstVecLane64 + readVec64BitLaneReg(const RegId& reg) const + { return readVecLane<uint64_t>(reg); } + + /** Write a lane of the destination vector register. */ + template <typename LD> + void setVecLaneT(const RegId& reg, const LD& val) + { + int flatIndex = isa->flattenVecIndex(reg.index()); + assert(flatIndex < TheISA::NumVecRegs); + setVecLaneFlat(flatIndex, reg.elemIndex(), val); + DPRINTF(VecRegs, "Reading vector lane %d (%d)[%d] to %lx.\n", + reg.index(), flatIndex, reg.elemIndex(), val); + } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::Byte>& val) + { return setVecLaneT(reg, val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::TwoByte>& val) + { return setVecLaneT(reg, val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::FourByte>& val) + { return setVecLaneT(reg, val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::EightByte>& val) + { return setVecLaneT(reg, val); } + /** @} */ + + const VecElem& readVecElem(const RegId& reg) const + { + int flatIndex = isa->flattenVecElemIndex(reg.index()); + assert(flatIndex < TheISA::NumVecRegs); + const VecElem& regVal = readVecElemFlat(flatIndex, reg.elemIndex()); + DPRINTF(VecRegs, "Reading element %d of vector reg %d (%d) as" + " %#x.\n", reg.elemIndex(), reg.index(), flatIndex, regVal); + return regVal; + } + + CCReg readCCReg(int reg_idx) { #ifdef ISA_HAS_CC_REGS @@ -312,6 +411,24 @@ class SimpleThread : public ThreadState reg_idx, flatIndex, val, floatRegs.f[flatIndex]); } + void setVecReg(const RegId& reg, const VecRegContainer& val) + { + int flatIndex = isa->flattenVecIndex(reg.index()); + assert(flatIndex < TheISA::NumVecRegs); + setVecRegFlat(flatIndex, val); + DPRINTF(VecRegs, "Setting vector reg %d (%d) to %s.\n", + reg.index(), flatIndex, val.print()); + } + + void setVecElem(const RegId& reg, const VecElem& val) + { + int flatIndex = isa->flattenVecElemIndex(reg.index()); + assert(flatIndex < TheISA::NumVecRegs); + setVecElemFlat(flatIndex, reg.elemIndex(), val); + DPRINTF(VecRegs, "Setting element %d of vector reg %d (%d) to" + " %#x.\n", reg.elemIndex(), reg.index(), flatIndex, val); + } + void setCCReg(int reg_idx, CCReg val) { #ifdef ISA_HAS_CC_REGS @@ -428,6 +545,45 @@ class SimpleThread : public ThreadState floatRegs.i[idx] = val; } + const VecRegContainer& readVecRegFlat(const RegIndex& reg) const + { + return vecRegs[reg]; + } + + VecRegContainer& getWritableVecRegFlat(const RegIndex& reg) + { + return vecRegs[reg]; + } + + void setVecRegFlat(const RegIndex& reg, const VecRegContainer& val) + { + vecRegs[reg] = val; + } + + template <typename T> + VecLaneT<T, true> readVecLaneFlat(const RegIndex& reg, int lId) const + { + return vecRegs[reg].laneView<T>(lId); + } + + template <typename LD> + void setVecLaneFlat(const RegIndex& reg, int lId, const LD& val) + { + vecRegs[reg].laneView<typename LD::UnderlyingType>(lId) = val; + } + + const VecElem& readVecElemFlat(const RegIndex& reg, + const ElemIndex& elemIndex) const + { + return vecRegs[reg].as<TheISA::VecElem>()[elemIndex]; + } + + void setVecElemFlat(const RegIndex& reg, const ElemIndex& elemIndex, + const VecElem val) + { + vecRegs[reg].as<TheISA::VecElem>()[elemIndex] = val; + } + #ifdef ISA_HAS_CC_REGS CCReg readCCRegFlat(int idx) { return ccRegs[idx]; } void setCCRegFlat(int idx, CCReg val) { ccRegs[idx] = val; } diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index d60afc019..e7507c6a6 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -100,13 +100,20 @@ class StaticInst : public RefCounted, public StaticInstFlags int8_t _numCCDestRegs; //@} + /** To use in architectures with vector register file. */ + /** @{ */ + int8_t _numVecDestRegs; + int8_t _numVecElemDestRegs; + /** @} */ + public: /// @name Register information. - /// The sum of numFPDestRegs() and numIntDestRegs() equals - /// numDestRegs(). The former two functions are used to track - /// physical register usage for machines with separate int & FP - /// reg files. + /// The sum of numFPDestRegs(), numIntDestRegs(), numVecDestRegs() and + /// numVecelemDestRegs() equals numDestRegs(). The former two functions + /// are used to track physical register usage for machines with separate + /// int & FP reg files, the next two is for machines with vector register + /// file. //@{ /// Number of source registers. int8_t numSrcRegs() const { return _numSrcRegs; } @@ -116,7 +123,10 @@ class StaticInst : public RefCounted, public StaticInstFlags int8_t numFPDestRegs() const { return _numFPDestRegs; } /// Number of integer destination regs. int8_t numIntDestRegs() const { return _numIntDestRegs; } - //@} + /// Number of vector destination regs. + int8_t numVecDestRegs() const { return _numVecDestRegs; } + /// Number of vector element destination regs. + int8_t numVecElemDestRegs() const { return _numVecElemDestRegs; } /// Number of coprocesor destination regs. int8_t numCCDestRegs() const { return _numCCDestRegs; } //@} @@ -252,7 +262,8 @@ class StaticInst : public RefCounted, public StaticInstFlags StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass) : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0), _numFPDestRegs(0), _numIntDestRegs(0), _numCCDestRegs(0), - machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0) + _numVecDestRegs(0), _numVecElemDestRegs(0), machInst(_machInst), + mnemonic(_mnemonic), cachedDisassembly(0) { } public: diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc index cea21e790..0d288de6f 100644 --- a/src/cpu/thread_context.cc +++ b/src/cpu/thread_context.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 ARM Limited + * Copyright (c) 2012, 2016 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -76,6 +76,16 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two) panic("Float reg idx %d doesn't match, one: %#x, two: %#x", i, t1, t2); } + + // Then loop through the vector registers. + for (int i = 0; i < TheISA::NumVecRegs; ++i) { + RegId rid(VecRegClass, i); + const TheISA::VecRegContainer& t1 = one->readVecReg(rid); + const TheISA::VecRegContainer& t2 = two->readVecReg(rid); + if (t1 != t2) + panic("Vec reg idx %d doesn't match, one: %#x, two: %#x", + i, t1, t2); + } for (int i = 0; i < TheISA::NumMiscRegs; ++i) { TheISA::MiscReg t1 = one->readMiscRegNoEffect(i); TheISA::MiscReg t2 = two->readMiscRegNoEffect(i); @@ -152,6 +162,12 @@ serialize(ThreadContext &tc, CheckpointOut &cp) // compatibility. arrayParamOut(cp, "floatRegs.i", floatRegs, NumFloatRegs); + std::vector<TheISA::VecRegContainer> vecRegs(NumVecRegs); + for (int i = 0; i < NumVecRegs; ++i) { + vecRegs[i] = tc.readVecRegFlat(i); + } + SERIALIZE_CONTAINER(vecRegs); + IntReg intRegs[NumIntRegs]; for (int i = 0; i < NumIntRegs; ++i) intRegs[i] = tc.readIntRegFlat(i); @@ -181,6 +197,12 @@ unserialize(ThreadContext &tc, CheckpointIn &cp) for (int i = 0; i < NumFloatRegs; ++i) tc.setFloatRegBitsFlat(i, floatRegs[i]); + std::vector<TheISA::VecRegContainer> vecRegs(NumVecRegs); + UNSERIALIZE_CONTAINER(vecRegs); + for (int i = 0; i < NumVecRegs; ++i) { + tc.setVecRegFlat(i, vecRegs[i]); + } + IntReg intRegs[NumIntRegs]; UNSERIALIZE_ARRAY(intRegs, NumIntRegs); for (int i = 0; i < NumIntRegs; ++i) diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 43c40481e..66b2f7554 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 ARM Limited + * Copyright (c) 2011-2012, 2016 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -100,6 +100,8 @@ class ThreadContext typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; typedef TheISA::MiscReg MiscReg; + using VecRegContainer = TheISA::VecRegContainer; + using VecElem = TheISA::VecElem; public: enum Status @@ -212,6 +214,40 @@ class ThreadContext virtual FloatRegBits readFloatRegBits(int reg_idx) = 0; + virtual const VecRegContainer& readVecReg(const RegId& reg) const = 0; + virtual VecRegContainer& getWritableVecReg(const RegId& reg) = 0; + + /** Vector Register Lane Interfaces. */ + /** @{ */ + /** Reads source vector 8bit operand. */ + virtual ConstVecLane8 + readVec8BitLaneReg(const RegId& reg) const = 0; + + /** Reads source vector 16bit operand. */ + virtual ConstVecLane16 + readVec16BitLaneReg(const RegId& reg) const = 0; + + /** Reads source vector 32bit operand. */ + virtual ConstVecLane32 + readVec32BitLaneReg(const RegId& reg) const = 0; + + /** Reads source vector 64bit operand. */ + virtual ConstVecLane64 + readVec64BitLaneReg(const RegId& reg) const = 0; + + /** Write a lane of the destination vector register. */ + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::Byte>& val) = 0; + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::TwoByte>& val) = 0; + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::FourByte>& val) = 0; + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::EightByte>& val) = 0; + /** @} */ + + virtual const VecElem& readVecElem(const RegId& reg) const = 0; + virtual CCReg readCCReg(int reg_idx) = 0; virtual void setIntReg(int reg_idx, uint64_t val) = 0; @@ -220,6 +256,10 @@ class ThreadContext virtual void setFloatRegBits(int reg_idx, FloatRegBits val) = 0; + virtual void setVecReg(const RegId& reg, const VecRegContainer& val) = 0; + + virtual void setVecElem(const RegId& reg, const VecElem& val) = 0; + virtual void setCCReg(int reg_idx, CCReg val) = 0; virtual TheISA::PCState pcState() = 0; @@ -303,6 +343,15 @@ class ThreadContext virtual FloatRegBits readFloatRegBitsFlat(int idx) = 0; virtual void setFloatRegBitsFlat(int idx, FloatRegBits val) = 0; + virtual const VecRegContainer& readVecRegFlat(int idx) const = 0; + virtual VecRegContainer& getWritableVecRegFlat(int idx) = 0; + virtual void setVecRegFlat(int idx, const VecRegContainer& val) = 0; + + virtual const VecElem& readVecElemFlat(const RegIndex& idx, + const ElemIndex& elemIdx) const = 0; + virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx, + const VecElem& val) = 0; + virtual CCReg readCCRegFlat(int idx) = 0; virtual void setCCRegFlat(int idx, CCReg val) = 0; /** @} */ @@ -421,6 +470,52 @@ class ProxyThreadContext : public ThreadContext FloatRegBits readFloatRegBits(int reg_idx) { return actualTC->readFloatRegBits(reg_idx); } + const VecRegContainer& readVecReg(const RegId& reg) const + { return actualTC->readVecReg(reg); } + + VecRegContainer& getWritableVecReg(const RegId& reg) + { return actualTC->getWritableVecReg(reg); } + + /** Vector Register Lane Interfaces. */ + /** @{ */ + /** Reads source vector 8bit operand. */ + ConstVecLane8 + readVec8BitLaneReg(const RegId& reg) const + { return actualTC->readVec8BitLaneReg(reg); } + + /** Reads source vector 16bit operand. */ + ConstVecLane16 + readVec16BitLaneReg(const RegId& reg) const + { return actualTC->readVec16BitLaneReg(reg); } + + /** Reads source vector 32bit operand. */ + ConstVecLane32 + readVec32BitLaneReg(const RegId& reg) const + { return actualTC->readVec32BitLaneReg(reg); } + + /** Reads source vector 64bit operand. */ + ConstVecLane64 + readVec64BitLaneReg(const RegId& reg) const + { return actualTC->readVec64BitLaneReg(reg); } + + /** Write a lane of the destination vector register. */ + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::Byte>& val) + { return actualTC->setVecLane(reg, val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::TwoByte>& val) + { return actualTC->setVecLane(reg, val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::FourByte>& val) + { return actualTC->setVecLane(reg, val); } + virtual void setVecLane(const RegId& reg, + const LaneData<LaneSize::EightByte>& val) + { return actualTC->setVecLane(reg, val); } + /** @} */ + + const VecElem& readVecElem(const RegId& reg) const + { return actualTC->readVecElem(reg); } + CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } @@ -433,6 +528,12 @@ class ProxyThreadContext : public ThreadContext void setFloatRegBits(int reg_idx, FloatRegBits val) { actualTC->setFloatRegBits(reg_idx, val); } + void setVecReg(const RegId& reg, const VecRegContainer& val) + { actualTC->setVecReg(reg, val); } + + void setVecElem(const RegId& reg, const VecElem& val) + { actualTC->setVecElem(reg, val); } + void setCCReg(int reg_idx, CCReg val) { actualTC->setCCReg(reg_idx, val); } @@ -495,6 +596,23 @@ class ProxyThreadContext : public ThreadContext void setFloatRegBitsFlat(int idx, FloatRegBits val) { actualTC->setFloatRegBitsFlat(idx, val); } + const VecRegContainer& readVecRegFlat(int id) const + { return actualTC->readVecRegFlat(id); } + + VecRegContainer& getWritableVecRegFlat(int id) + { return actualTC->getWritableVecRegFlat(id); } + + void setVecRegFlat(int idx, const VecRegContainer& val) + { actualTC->setVecRegFlat(idx, val); } + + const VecElem& readVecElemFlat(const RegIndex& id, + const ElemIndex& elemIndex) const + { return actualTC->readVecElemFlat(id, elemIndex); } + + void setVecElemFlat(const RegIndex& id, const ElemIndex& elemIndex, + const VecElem& val) + { actualTC->setVecElemFlat(id, elemIndex, val); } + CCReg readCCRegFlat(int idx) { return actualTC->readCCRegFlat(idx); } diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc index f06aaf899..f49092d4d 100644 --- a/src/sim/serialize.cc +++ b/src/sim/serialize.cc @@ -57,6 +57,7 @@ #include <string> #include <vector> +#include "arch/generic/vec_reg.hh" #include "base/framebuffer.hh" #include "base/inifile.hh" #include "base/misc.hh" @@ -471,6 +472,8 @@ INSTANTIATE_PARAM_TEMPLATES(float) INSTANTIATE_PARAM_TEMPLATES(double) INSTANTIATE_PARAM_TEMPLATES(string) INSTANTIATE_PARAM_TEMPLATES(Pixel) +INSTANTIATE_PARAM_TEMPLATES(VecRegContainer<8>) +INSTANTIATE_PARAM_TEMPLATES(VecRegContainer<16>) // set is only used with strings and furthermore doesn't agree with Pixel template void |