diff options
Diffstat (limited to 'src/cpu')
30 files changed, 635 insertions, 36 deletions
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index d81b58bdf..b87fd8b4e 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -584,6 +584,11 @@ class BaseDynInst : public ExecContext, public RefCounted { return staticInst->numVecElemDestRegs(); } + int8_t + numVecPredDestRegs() const + { + return staticInst->numVecPredDestRegs(); + } /** Returns the logical register index of the i'th destination register. */ const RegId& destRegIdx(int i) const { return staticInst->destRegIdx(i); } @@ -638,6 +643,16 @@ class BaseDynInst : public ExecContext, public RefCounted InstResult::ResultType::VecElem)); } } + + /** Predicate result. */ + template<typename T> + void setVecPredResult(T&& t) + { + if (instFlags[RecordResult]) { + instResult.push(InstResult(std::forward<T>(t), + InstResult::ResultType::VecPredReg)); + } + } /** @} */ /** Records an integer register being set to a value. */ @@ -672,6 +687,13 @@ class BaseDynInst : public ExecContext, public RefCounted setVecElemResult(val); } + /** Record a vector register being set to a value */ + void setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer& val) + { + setVecPredResult(val); + } + /** Records that one of the source registers is ready. */ void markSrcRegReady(); diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index 4468689bd..9d6061ad8 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2016 ARM Limited + * Copyright (c) 2011, 2016-2017 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -304,6 +304,22 @@ class CheckerCPU : public BaseCPU, public ExecContext return thread->readVecElem(reg); } + const VecPredRegContainer& + readVecPredRegOperand(const StaticInst *si, int idx) const override + { + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecPredReg()); + return thread->readVecPredReg(reg); + } + + VecPredRegContainer& + getWritableVecPredRegOperand(const StaticInst *si, int idx) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + return thread->getWritableVecPredReg(reg); + } + CCReg readCCRegOperand(const StaticInst *si, int idx) override { @@ -336,6 +352,14 @@ class CheckerCPU : public BaseCPU, public ExecContext InstResult::ResultType::VecElem)); } + template<typename T> + void + setVecPredResult(T&& t) + { + result.push(InstResult(std::forward<T>(t), + InstResult::ResultType::VecPredReg)); + } + void setIntRegOperand(const StaticInst *si, int idx, RegVal val) override { @@ -383,6 +407,15 @@ class CheckerCPU : public BaseCPU, public ExecContext setVecElemResult(val); } + void setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer& val) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + thread->setVecPredReg(reg, val); + setVecPredResult(val); + } + bool readPredicate() const override { return thread->readPredicate(); } void diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index b5a2079ea..8ce5a740d 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012, 2016 ARM Limited + * Copyright (c) 2011-2012, 2016-2018 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -263,6 +263,12 @@ class CheckerThreadContext : public ThreadContext const VecElem& readVecElem(const RegId& reg) const { return actualTC->readVecElem(reg); } + const VecPredRegContainer& readVecPredReg(const RegId& reg) const override + { return actualTC->readVecPredReg(reg); } + + VecPredRegContainer& getWritableVecPredReg(const RegId& reg) override + { return actualTC->getWritableVecPredReg(reg); } + CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } @@ -295,6 +301,13 @@ class CheckerThreadContext : public ThreadContext } void + setVecPredReg(const RegId& reg, const VecPredRegContainer& val) + { + actualTC->setVecPredReg(reg, val); + checkerTC->setVecPredReg(reg, val); + } + + void setCCReg(int reg_idx, CCReg val) { actualTC->setCCReg(reg_idx, val); @@ -428,6 +441,15 @@ class CheckerThreadContext : public ThreadContext const ElemIndex& elem_idx, const VecElem& val) { actualTC->setVecElemFlat(idx, elem_idx, val); } + const VecPredRegContainer& readVecPredRegFlat(int idx) const override + { return actualTC->readVecPredRegFlat(idx); } + + VecPredRegContainer& getWritableVecPredRegFlat(int idx) override + { return actualTC->getWritableVecPredRegFlat(idx); } + + void setVecPredRegFlat(int idx, const VecPredRegContainer& val) override + { actualTC->setVecPredRegFlat(idx, val); } + CCReg readCCRegFlat(int idx) { return actualTC->readCCRegFlat(idx); } diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh index 75f428b87..87af91623 100644 --- a/src/cpu/exec_context.hh +++ b/src/cpu/exec_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016 ARM Limited + * Copyright (c) 2014, 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -77,6 +77,7 @@ class ExecContext { typedef TheISA::CCReg CCReg; using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; + using VecPredRegContainer = TheISA::VecPredRegContainer; public: /** @@ -168,6 +169,22 @@ class ExecContext { const VecElem val) = 0; /** @} */ + /** Predicate registers interface. */ + /** @{ */ + /** Reads source predicate register operand. */ + virtual const VecPredRegContainer& + readVecPredRegOperand(const StaticInst *si, int idx) const = 0; + + /** Gets destination predicate register operand for modification. */ + virtual VecPredRegContainer& + getWritableVecPredRegOperand(const StaticInst *si, int idx) = 0; + + /** Sets a destination predicate register operand to a value. */ + virtual void + setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer& val) = 0; + /** @} */ + /** * @{ * @name Condition Code Registers diff --git a/src/cpu/inst_res.hh b/src/cpu/inst_res.hh index 9b6a23d95..bf9c649ef 100644 --- a/src/cpu/inst_res.hh +++ b/src/cpu/inst_res.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -48,12 +48,14 @@ class InstResult { using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; + using VecPredRegContainer = TheISA::VecPredRegContainer; public: union MultiResult { uint64_t integer; double dbl; VecRegContainer vector; VecElem vecElem; + VecPredRegContainer pred; MultiResult() {} }; @@ -61,6 +63,7 @@ class InstResult { Scalar, VecElem, VecReg, + VecPredReg, NumResultTypes, Invalid }; @@ -87,6 +90,9 @@ class InstResult { /** Vector result. */ explicit InstResult(const VecRegContainer& v, const ResultType& t) : type(t) { result.vector = v; } + /** Predicate result. */ + explicit InstResult(const VecPredRegContainer& v, const ResultType& t) + : type(t) { result.pred = v; } InstResult& operator=(const InstResult& that) { type = that.type; @@ -104,6 +110,10 @@ class InstResult { case ResultType::VecReg: result.vector = that.result.vector; break; + case ResultType::VecPredReg: + result.pred = that.result.pred; + break; + default: panic("Assigning result from unknown result type"); break; @@ -124,6 +134,8 @@ class InstResult { return result.vecElem == that.result.vecElem; case ResultType::VecReg: return result.vector == that.result.vector; + case ResultType::VecPredReg: + return result.pred == that.result.pred; case ResultType::Invalid: return false; default: @@ -143,6 +155,8 @@ class InstResult { bool isVector() const { return type == ResultType::VecReg; } /** Is this a vector element result?. */ bool isVecElem() const { return type == ResultType::VecElem; } + /** Is this a predicate result?. */ + bool isPred() const { return type == ResultType::VecPredReg; } /** Is this a valid result?. */ bool isValid() const { return type != ResultType::Invalid; } /** @} */ @@ -177,6 +191,14 @@ class InstResult { panic_if(!isVecElem(), "Converting scalar (or invalid) to vector!!"); return result.vecElem; } + + const VecPredRegContainer& + asPred() const + { + panic_if(!isPred(), "Converting scalar (or invalid) to predicate!!"); + return result.pred; + } + /** @} */ }; diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh index b9ed3971f..4cb67372e 100644 --- a/src/cpu/minor/exec_context.hh +++ b/src/cpu/minor/exec_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2014, 2016 ARM Limited + * Copyright (c) 2011-2014, 2016-2017 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -161,6 +161,22 @@ class ExecContext : public ::ExecContext return thread.readVecElem(reg); } + const TheISA::VecPredRegContainer& + readVecPredRegOperand(const StaticInst *si, int idx) const override + { + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecPredReg()); + return thread.readVecPredReg(reg); + } + + TheISA::VecPredRegContainer& + getWritableVecPredRegOperand(const StaticInst *si, int idx) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + return thread.getWritableVecPredReg(reg); + } + void setIntRegOperand(const StaticInst *si, int idx, RegVal val) override { @@ -186,6 +202,15 @@ class ExecContext : public ::ExecContext thread.setVecReg(reg, val); } + void + setVecPredRegOperand(const StaticInst *si, int idx, + const TheISA::VecPredRegContainer& val) + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + thread.setVecPredReg(reg, val); + } + /** Vector Register Lane Interfaces. */ /** @{ */ /** Reads source vector 8bit operand. */ diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc index 196d035eb..5c0e86a67 100644 --- a/src/cpu/minor/scoreboard.cc +++ b/src/cpu/minor/scoreboard.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014, 2016 ARM Limited + * Copyright (c) 2013-2014, 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -77,6 +77,11 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index) TheISA::NumFloatRegs + reg.flatIndex(); ret = true; break; + case VecPredRegClass: + scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs + + TheISA::NumFloatRegs + TheISA::NumVecRegs + reg.index(); + ret = true; + break; case CCRegClass: scoreboard_index = TheISA::NumIntRegs + reg.index(); ret = true; diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh index 37ae8da0a..b21e14e24 100644 --- a/src/cpu/minor/scoreboard.hh +++ b/src/cpu/minor/scoreboard.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014, 2016 ARM Limited + * Copyright (c) 2013-2014, 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -95,7 +95,8 @@ class Scoreboard : public Named Named(name), numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs + TheISA::NumFloatRegs + - (TheISA::NumVecRegs * TheISA::NumVecElemPerVecReg)), + (TheISA::NumVecRegs * TheISA::NumVecElemPerVecReg) + + TheISA::NumVecPredRegs), numResults(numRegs, 0), numUnpredictableResults(numRegs, 0), fuIndices(numRegs, 0), diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index 32cc19010..e73c09334 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -150,6 +150,8 @@ class DerivO3CPU(BaseCPU): _defaultNumPhysCCRegs = Self.numPhysIntRegs * 5 numPhysVecRegs = Param.Unsigned(256, "Number of physical vector " "registers") + numPhysVecPredRegs = Param.Unsigned(32, "Number of physical predicate " + "registers") numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs, "Number of physical cc registers") numIQEntries = Param.Unsigned(64, "Number of instruction queue entries") diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index f5be5a804..df518b1e4 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2016 ARM Limited + * Copyright (c) 2011, 2016-2017 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -121,6 +121,9 @@ class PhysRegId : private RegId { /** @Return true if it is a vector element physical register. */ bool isVectorPhysElem() const { return isVecElem(); } + /** @return true if it is a vector predicate physical register. */ + bool isVecPredPhysReg() const { return isVecPredReg(); } + /** @Return true if it is a condition-code physical register. */ bool isMiscPhysReg() const { return isMiscReg(); } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 5d92d92dc..ef3b17202 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -159,6 +159,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs, params->numPhysVecRegs, + params->numPhysVecPredRegs, params->numPhysCCRegs, vecMode), @@ -258,6 +259,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs); + assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs); assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); rename.setScoreboard(&scoreboard); @@ -325,6 +327,13 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) } } + for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) { + PhysRegIdPtr phys_reg = freeList.getVecPredReg(); + renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg); + commitRenameMap[tid].setEntry( + RegId(VecPredRegClass, ridx), phys_reg); + } + for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) { PhysRegIdPtr phys_reg = freeList.getCCReg(); renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); @@ -538,6 +547,16 @@ FullO3CPU<Impl>::regStats() .desc("number of vector regfile writes") .prereq(vecRegfileWrites); + vecPredRegfileReads + .name(name() + ".pred_regfile_reads") + .desc("number of predicate regfile reads") + .prereq(vecPredRegfileReads); + + vecPredRegfileWrites + .name(name() + ".pred_regfile_writes") + .desc("number of predicate regfile writes") + .prereq(vecPredRegfileWrites); + ccRegfileReads .name(name() + ".cc_regfile_reads") .desc("number of cc regfile reads") @@ -883,6 +902,14 @@ FullO3CPU<Impl>::removeThread(ThreadID tid) freeList.addReg(phys_reg); } + // Unbind Float Regs from Rename Map + for (unsigned preg = 0; preg < TheISA::NumVecPredRegs; preg++) { + PhysRegIdPtr phys_reg = renameMap[tid].lookup( + RegId(VecPredRegClass, preg)); + scoreboard.unsetReg(phys_reg); + freeList.addReg(phys_reg); + } + // Unbind condition-code Regs from Rename Map for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs; reg_id.index()++) { @@ -1334,6 +1361,24 @@ FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem& } template <class Impl> +auto +FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const + -> const VecPredRegContainer& +{ + vecPredRegfileReads++; + return regFile.readVecPredReg(phys_reg); +} + +template <class Impl> +auto +FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg) + -> VecPredRegContainer& +{ + vecPredRegfileWrites++; + return regFile.getWritableVecPredReg(phys_reg); +} + +template <class Impl> CCReg FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg) { @@ -1375,6 +1420,15 @@ FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val) template <class Impl> void +FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg, + const VecPredRegContainer& val) +{ + vecPredRegfileWrites++; + regFile.setVecPredReg(phys_reg, val); +} + +template <class Impl> +void FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, CCReg val) { ccRegfileWrites++; @@ -1434,6 +1488,26 @@ FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, } template <class Impl> +auto +FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const + -> const VecPredRegContainer& +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecPredRegClass, reg_idx)); + return readVecPredReg(phys_reg); +} + +template <class Impl> +auto +FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid) + -> VecPredRegContainer& +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecPredRegClass, reg_idx)); + return getWritableVecPredReg(phys_reg); +} + +template <class Impl> CCReg FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) { @@ -1488,6 +1562,16 @@ FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, template <class Impl> void +FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, + ThreadID tid) +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecPredRegClass, reg_idx)); + setVecPredReg(phys_reg, val); +} + +template <class Impl> +void FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid) { ccRegfileWrites++; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index b5f754056..30ed4ef3b 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -107,6 +107,8 @@ class FullO3CPU : public BaseO3CPU using VecElem = TheISA::VecElem; using VecRegContainer = TheISA::VecRegContainer; + using VecPredRegContainer = TheISA::VecPredRegContainer; + typedef O3ThreadState<Impl> ImplState; typedef O3ThreadState<Impl> Thread; @@ -457,6 +459,10 @@ class FullO3CPU : public BaseO3CPU const VecElem& readVecElem(PhysRegIdPtr reg_idx) const; + const VecPredRegContainer& readVecPredReg(PhysRegIdPtr reg_idx) const; + + VecPredRegContainer& getWritableVecPredReg(PhysRegIdPtr reg_idx); + TheISA::CCReg readCCReg(PhysRegIdPtr phys_reg); void setIntReg(PhysRegIdPtr phys_reg, RegVal val); @@ -467,6 +473,8 @@ class FullO3CPU : public BaseO3CPU void setVecElem(PhysRegIdPtr reg_idx, const VecElem& val); + void setVecPredReg(PhysRegIdPtr reg_idx, const VecPredRegContainer& val); + void setCCReg(PhysRegIdPtr phys_reg, TheISA::CCReg val); RegVal readArchIntReg(int reg_idx, ThreadID tid); @@ -501,6 +509,11 @@ class FullO3CPU : public BaseO3CPU const VecElem& readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, ThreadID tid) const; + const VecPredRegContainer& readArchVecPredReg(int reg_idx, + ThreadID tid) const; + + VecPredRegContainer& getWritableArchVecPredReg(int reg_idx, ThreadID tid); + TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid); /** Architectural register accessors. Looks up in the commit @@ -512,6 +525,9 @@ class FullO3CPU : public BaseO3CPU void setArchFloatRegBits(int reg_idx, RegVal val, ThreadID tid); + void setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, + ThreadID tid); + void setArchVecReg(int reg_idx, const VecRegContainer& val, ThreadID tid); void setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, @@ -805,6 +821,9 @@ class FullO3CPU : public BaseO3CPU //number of vector register file accesses mutable Stats::Scalar vecRegfileReads; Stats::Scalar vecRegfileWrites; + //number of predicate register file accesses + mutable Stats::Scalar vecPredRegfileReads; + Stats::Scalar vecPredRegfileWrites; //number of CC register file accesses Stats::Scalar ccRegfileReads; Stats::Scalar ccRegfileWrites; diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 5bd0f8e47..9793f4ead 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -70,6 +70,7 @@ class BaseO3DynInst : public BaseDynInst<Impl> using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg; + using VecPredRegContainer = TheISA::VecPredRegContainer; enum { MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs @@ -231,6 +232,10 @@ class BaseO3DynInst : public BaseDynInst<Impl> this->setVecElemOperand(this->staticInst.get(), idx, this->cpu->readVecElem(prev_phys_reg)); break; + case VecPredRegClass: + this->setVecPredRegOperand(this->staticInst.get(), idx, + this->cpu->readVecPredReg(prev_phys_reg)); + break; case CCRegClass: this->setCCRegOperand(this->staticInst.get(), idx, this->cpu->readCCReg(prev_phys_reg)); @@ -361,6 +366,18 @@ class BaseO3DynInst : public BaseDynInst<Impl> return this->cpu->readVecElem(this->_srcRegIdx[idx]); } + const VecPredRegContainer& + readVecPredRegOperand(const StaticInst *si, int idx) const override + { + return this->cpu->readVecPredReg(this->_srcRegIdx[idx]); + } + + VecPredRegContainer& + getWritableVecPredRegOperand(const StaticInst *si, int idx) override + { + return this->cpu->getWritableVecPredReg(this->_destRegIdx[idx]); + } + CCReg readCCRegOperand(const StaticInst *si, int idx) { return this->cpu->readCCReg(this->_srcRegIdx[idx]); @@ -399,6 +416,14 @@ class BaseO3DynInst : public BaseDynInst<Impl> BaseDynInst<Impl>::setVecElemOperand(si, idx, val); } + void + setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer& val) override + { + this->cpu->setVecPredReg(this->_destRegIdx[idx], val); + BaseDynInst<Impl>::setVecPredRegOperand(si, idx, val); + } + void setCCRegOperand(const StaticInst *si, int idx, CCReg val) { this->cpu->setCCReg(this->_destRegIdx[idx], val); diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index e7a899cdf..46bebf30d 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -138,6 +138,9 @@ class UnifiedFreeList SimpleFreeList vecElemList; /** @} */ + /** The list of free predicate registers. */ + SimpleFreeList predList; + /** The list of free condition-code registers. */ SimpleFreeList ccList; @@ -183,6 +186,9 @@ class UnifiedFreeList /** Gets a free vector elemenet register. */ PhysRegIdPtr getVecElem() { return vecElemList.getReg(); } + /** Gets a free predicate register. */ + PhysRegIdPtr getVecPredReg() { return predList.getReg(); } + /** Gets a free cc register. */ PhysRegIdPtr getCCReg() { return ccList.getReg(); } @@ -207,6 +213,9 @@ class UnifiedFreeList vecElemList.addReg(freed_reg); } + /** Adds a predicate register back to the free list. */ + void addVecPredReg(PhysRegIdPtr freed_reg) { predList.addReg(freed_reg); } + /** Adds a cc register back to the free list. */ void addCCReg(PhysRegIdPtr freed_reg) { ccList.addReg(freed_reg); } @@ -222,6 +231,9 @@ class UnifiedFreeList /** Checks if there are any free vector registers. */ bool hasFreeVecElems() const { return vecElemList.hasFreeRegs(); } + /** Checks if there are any free predicate registers. */ + bool hasFreeVecPredRegs() const { return predList.hasFreeRegs(); } + /** Checks if there are any free cc registers. */ bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); } @@ -237,6 +249,9 @@ class UnifiedFreeList /** Returns the number of free vector registers. */ unsigned numFreeVecElems() const { return vecElemList.numFreeRegs(); } + /** Returns the number of free predicate registers. */ + unsigned numFreeVecPredRegs() const { return predList.numFreeRegs(); } + /** Returns the number of free cc registers. */ unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); } }; @@ -267,6 +282,9 @@ UnifiedFreeList::addRegs(InputIt first, InputIt last) case VecElemClass: vecElemList.addRegs(first, last); break; + case VecPredRegClass: + predList.addRegs(first, last); + break; case CCRegClass: ccList.addRegs(first, last); break; @@ -297,6 +315,9 @@ UnifiedFreeList::addReg(PhysRegIdPtr freed_reg) case VecElemClass: vecElemList.addReg(freed_reg); break; + case VecPredRegClass: + predList.addReg(freed_reg); + break; case CCRegClass: ccList.addReg(freed_reg); break; diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 4a55a91ea..ddd7b6d5f 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -104,6 +104,7 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs + params->numPhysVecRegs + params->numPhysVecRegs * TheISA::NumVecElemPerVecReg + + params->numPhysVecPredRegs + params->numPhysCCRegs; //Create an entry for each physical register within the diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index 2f41e2ac2..cc4bba6b0 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -52,22 +52,26 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, unsigned _numPhysicalVecRegs, + unsigned _numPhysicalVecPredRegs, unsigned _numPhysicalCCRegs, VecMode vmode) : intRegFile(_numPhysicalIntRegs), floatRegFile(_numPhysicalFloatRegs), vectorRegFile(_numPhysicalVecRegs), + vecPredRegFile(_numPhysicalVecPredRegs), ccRegFile(_numPhysicalCCRegs), numPhysicalIntRegs(_numPhysicalIntRegs), numPhysicalFloatRegs(_numPhysicalFloatRegs), numPhysicalVecRegs(_numPhysicalVecRegs), numPhysicalVecElemRegs(_numPhysicalVecRegs * NumVecElemPerVecReg), + numPhysicalVecPredRegs(_numPhysicalVecPredRegs), numPhysicalCCRegs(_numPhysicalCCRegs), totalNumRegs(_numPhysicalIntRegs + _numPhysicalFloatRegs + _numPhysicalVecRegs + _numPhysicalVecRegs * NumVecElemPerVecReg + + _numPhysicalVecPredRegs + _numPhysicalCCRegs), vecMode(vmode) { @@ -108,6 +112,12 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, } } + // The next batch of the registers are the predicate physical + // registers; put them onto the predicate free list. + for (phys_reg = 0; phys_reg < numPhysicalVecPredRegs; phys_reg++) { + vecPredRegIds.emplace_back(VecPredRegClass, phys_reg, flat_reg_idx++); + } + // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (phys_reg = 0; phys_reg < numPhysicalCCRegs; phys_reg++) { @@ -159,6 +169,13 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList) else freeList->addRegs(vecElemIds.begin(), vecElemIds.end()); + // The next batch of the registers are the predicate physical + // registers; put them onto the predicate free list. + for (reg_idx = 0; reg_idx < numPhysicalVecPredRegs; reg_idx++) { + assert(vecPredRegIds[reg_idx].index() == reg_idx); + } + freeList->addRegs(vecPredRegIds.begin(), vecPredRegIds.end()); + // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (reg_idx = 0; reg_idx < numPhysicalCCRegs; reg_idx++) { @@ -191,6 +208,8 @@ PhysRegFile::getRegIds(RegClass cls) -> IdRange return std::make_pair(vecRegIds.begin(), vecRegIds.end()); case VecElemClass: return std::make_pair(vecElemIds.begin(), vecElemIds.end()); + case VecPredRegClass: + return std::make_pair(vecPredRegIds.begin(), vecPredRegIds.end()); case CCRegClass: return std::make_pair(ccRegIds.begin(), ccRegIds.end()); case MiscRegClass: diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 9d9113240..4077c99a4 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -70,6 +70,7 @@ class PhysRegFile using VecRegContainer = TheISA::VecRegContainer; using PhysIds = std::vector<PhysRegId>; using VecMode = Enums::VecRegRenameMode; + using VecPredRegContainer = TheISA::VecPredRegContainer; public: using IdRange = std::pair<PhysIds::const_iterator, PhysIds::const_iterator>; @@ -89,6 +90,10 @@ class PhysRegFile std::vector<PhysRegId> vecRegIds; std::vector<PhysRegId> vecElemIds; + /** Predicate register file. */ + std::vector<VecPredRegContainer> vecPredRegFile; + std::vector<PhysRegId> vecPredRegIds; + /** Condition-code register file. */ std::vector<CCReg> ccRegFile; std::vector<PhysRegId> ccRegIds; @@ -117,6 +122,11 @@ class PhysRegFile unsigned numPhysicalVecElemRegs; /** + * Number of physical predicate registers + */ + unsigned numPhysicalVecPredRegs; + + /** * Number of physical CC registers */ unsigned numPhysicalCCRegs; @@ -135,6 +145,7 @@ class PhysRegFile PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, unsigned _numPhysicalVecRegs, + unsigned _numPhysicalVecPredRegs, unsigned _numPhysicalCCRegs, VecMode vmode ); @@ -154,6 +165,8 @@ class PhysRegFile unsigned numFloatPhysRegs() const { return numPhysicalFloatRegs; } /** @return the number of vector physical registers. */ unsigned numVecPhysRegs() const { return numPhysicalVecRegs; } + /** @return the number of predicate physical registers. */ + unsigned numPredPhysRegs() const { return numPhysicalVecPredRegs; } /** @return the number of vector physical registers. */ unsigned numVecElemPhysRegs() const { return numPhysicalVecElemRegs; } @@ -201,7 +214,7 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Access to vector register %i, has " "data %s\n", int(phys_reg->index()), - vectorRegFile[phys_reg->index()].as<VecElem>().print()); + vectorRegFile[phys_reg->index()].print()); return vectorRegFile[phys_reg->index()]; } @@ -258,6 +271,24 @@ class PhysRegFile return val; } + /** Reads a predicate register. */ + const VecPredRegContainer& readVecPredReg(PhysRegIdPtr phys_reg) const + { + assert(phys_reg->isVecPredPhysReg()); + + DPRINTF(IEW, "RegFile: Access to predicate register %i, has " + "data %s\n", int(phys_reg->index()), + vecPredRegFile[phys_reg->index()].print()); + + return vecPredRegFile[phys_reg->index()]; + } + + VecPredRegContainer& getWritableVecPredReg(PhysRegIdPtr phys_reg) + { + /* const_cast for not duplicating code above. */ + return const_cast<VecPredRegContainer&>(readVecPredReg(phys_reg)); + } + /** Reads a condition-code register. */ CCReg readCCReg(PhysRegIdPtr phys_reg) @@ -321,6 +352,17 @@ class PhysRegFile val; } + /** Sets a predicate register to the given value. */ + void setVecPredReg(PhysRegIdPtr phys_reg, const VecPredRegContainer& val) + { + assert(phys_reg->isVecPredPhysReg()); + + DPRINTF(IEW, "RegFile: Setting predicate register %i to %s\n", + int(phys_reg->index()), val.print()); + + vecPredRegFile[phys_reg->index()] = val; + } + /** Sets a condition-code register to the given value. */ void setCCReg(PhysRegIdPtr phys_reg, CCReg val) diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index bd5e72dec..a091c0908 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 ARM Limited + * Copyright (c) 2012, 2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -514,6 +514,7 @@ class DefaultRename Stats::Scalar intRenameLookups; Stats::Scalar fpRenameLookups; Stats::Scalar vecRenameLookups; + Stats::Scalar vecPredRenameLookups; /** Stat for total number of committed renaming mappings. */ Stats::Scalar renameCommittedMaps; /** Stat for total number of mappings that were undone due to a squash. */ diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index ed5dfb6eb..b63163f04 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -196,6 +196,10 @@ DefaultRename<Impl>::regStats() .name(name() + ".vec_rename_lookups") .desc("Number of vector rename lookups") .prereq(vecRenameLookups); + vecPredRenameLookups + .name(name() + ".vec_pred_rename_lookups") + .desc("Number of vector predicate rename lookups") + .prereq(vecPredRenameLookups); } template <class Impl> @@ -659,6 +663,7 @@ DefaultRename<Impl>::renameInsts(ThreadID tid) inst->numFPDestRegs(), inst->numVecDestRegs(), inst->numVecElemDestRegs(), + inst->numVecPredDestRegs(), inst->numCCDestRegs())) { DPRINTF(Rename, "Blocking due to lack of free " "physical registers to rename to.\n"); @@ -1041,6 +1046,9 @@ DefaultRename<Impl>::renameSrcRegs(const DynInstPtr &inst, ThreadID tid) case VecElemClass: vecRenameLookups++; break; + case VecPredRegClass: + vecPredRenameLookups++; + break; case CCRegClass: case MiscRegClass: break; @@ -1256,7 +1264,7 @@ DefaultRename<Impl>::readFreeEntries(ThreadID tid) } DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, " - "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i)\n", + "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i)\n", tid, freeEntries[tid].iqEntries, freeEntries[tid].robEntries, @@ -1266,6 +1274,7 @@ DefaultRename<Impl>::readFreeEntries(ThreadID tid) renameMap[tid]->numFreeIntEntries(), renameMap[tid]->numFreeFloatEntries(), renameMap[tid]->numFreeVecEntries(), + renameMap[tid]->numFreePredEntries(), renameMap[tid]->numFreeCCEntries()); DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n", diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index 86c43932c..603f1ff36 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016,2019 ARM Limited + * Copyright (c) 2016-2017,2019 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -120,6 +120,8 @@ UnifiedRenameMap::init(PhysRegFile *_regFile, vecElemMap.init(TheISA::NumVecRegs * NVecElems, &(freeList->vecElemList), (RegIndex)-1); + predMap.init(TheISA::NumVecPredRegs, &(freeList->predList), (RegIndex)-1); + ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1); } diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh index d30668027..5424633e5 100644 --- a/src/cpu/o3/rename_map.hh +++ b/src/cpu/o3/rename_map.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 ARM Limited + * Copyright (c) 2015-2017 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -172,6 +172,7 @@ class UnifiedRenameMap private: static constexpr uint32_t NVecElems = TheISA::NumVecElemPerVecReg; using VecReg = TheISA::VecReg; + using VecPredReg = TheISA::VecPredReg; /** The integer register rename map */ SimpleRenameMap intMap; @@ -188,6 +189,9 @@ class UnifiedRenameMap /** The vector element register rename map */ SimpleRenameMap vecElemMap; + /** The predicate register rename map */ + SimpleRenameMap predMap; + using VecMode = Enums::VecRegRenameMode; VecMode vecMode; @@ -235,6 +239,8 @@ class UnifiedRenameMap case VecElemClass: assert(vecMode == Enums::Elem); return vecElemMap.rename(arch_reg); + case VecPredRegClass: + return predMap.rename(arch_reg); case CCRegClass: return ccMap.rename(arch_reg); case MiscRegClass: @@ -276,6 +282,9 @@ class UnifiedRenameMap assert(vecMode == Enums::Elem); return vecElemMap.lookup(arch_reg); + case VecPredRegClass: + return predMap.lookup(arch_reg); + case CCRegClass: return ccMap.lookup(arch_reg); @@ -319,6 +328,10 @@ class UnifiedRenameMap assert(vecMode == Enums::Elem); return vecElemMap.setEntry(arch_reg, phys_reg); + case VecPredRegClass: + assert(phys_reg->isVecPredPhysReg()); + return predMap.setEntry(arch_reg, phys_reg); + case CCRegClass: assert(phys_reg->isCCPhysReg()); return ccMap.setEntry(arch_reg, phys_reg); @@ -345,10 +358,11 @@ class UnifiedRenameMap */ unsigned numFreeEntries() const { - return std::min( + return std::min(std::min( std::min(intMap.numFreeEntries(), floatMap.numFreeEntries()), vecMode == Enums::Full ? vecMap.numFreeEntries() - : vecElemMap.numFreeEntries()); + : vecElemMap.numFreeEntries()), + predMap.numFreeEntries()); } unsigned numFreeIntEntries() const { return intMap.numFreeEntries(); } @@ -359,18 +373,21 @@ class UnifiedRenameMap ? vecMap.numFreeEntries() : vecElemMap.numFreeEntries(); } + unsigned numFreePredEntries() const { return predMap.numFreeEntries(); } unsigned numFreeCCEntries() const { return ccMap.numFreeEntries(); } /** * Return whether there are enough registers to serve the request. */ bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t vectorRegs, - uint32_t vecElemRegs, uint32_t ccRegs) const + uint32_t vecElemRegs, uint32_t vecPredRegs, + uint32_t ccRegs) const { return intRegs <= intMap.numFreeEntries() && floatRegs <= floatMap.numFreeEntries() && vectorRegs <= vecMap.numFreeEntries() && vecElemRegs <= vecElemMap.numFreeEntries() && + vecPredRegs <= predMap.numFreeEntries() && ccRegs <= ccMap.numFreeEntries(); } /** diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index c74936469..7858f5a0a 100644 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012, 2016 ARM Limited + * Copyright (c) 2011-2012, 2016-2018 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -263,6 +263,14 @@ class O3ThreadContext : public ThreadContext return readVecElemFlat(flattenRegId(reg).index(), reg.elemIndex()); } + virtual const VecPredRegContainer& readVecPredReg(const RegId& id) const { + return readVecPredRegFlat(flattenRegId(id).index()); + } + + virtual VecPredRegContainer& getWritableVecPredReg(const RegId& id) { + return getWritableVecPredRegFlat(flattenRegId(id).index()); + } + virtual CCReg readCCReg(int reg_idx) { return readCCRegFlat(flattenRegId(RegId(CCRegClass, reg_idx)).index()); @@ -295,6 +303,13 @@ class O3ThreadContext : public ThreadContext } virtual void + setVecPredReg(const RegId& reg, + const VecPredRegContainer& val) + { + setVecPredRegFlat(flattenRegId(reg).index(), val); + } + + virtual void setCCReg(int reg_idx, CCReg val) { setCCRegFlat(flattenRegId(RegId(CCRegClass, reg_idx)).index(), val); @@ -403,6 +418,12 @@ class O3ThreadContext : public ThreadContext virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx, const VecElem& val); + virtual const VecPredRegContainer& readVecPredRegFlat(int idx) + const override; + virtual VecPredRegContainer& getWritableVecPredRegFlat(int idx) override; + virtual void setVecPredRegFlat(int idx, + const VecPredRegContainer& val) override; + virtual CCReg readCCRegFlat(int idx); virtual void setCCRegFlat(int idx, CCReg val); }; diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index bd2bf63b0..59562ba3b 100644 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -233,6 +233,20 @@ O3ThreadContext<Impl>::readVecElemFlat(const RegIndex& idx, } template <class Impl> +const TheISA::VecPredRegContainer& +O3ThreadContext<Impl>::readVecPredRegFlat(int reg_id) const +{ + return cpu->readArchVecPredReg(reg_id, thread->threadId()); +} + +template <class Impl> +TheISA::VecPredRegContainer& +O3ThreadContext<Impl>::getWritableVecPredRegFlat(int reg_id) +{ + return cpu->getWritableArchVecPredReg(reg_id, thread->threadId()); +} + +template <class Impl> TheISA::CCReg O3ThreadContext<Impl>::readCCRegFlat(int reg_idx) { @@ -277,6 +291,16 @@ O3ThreadContext<Impl>::setVecElemFlat(const RegIndex& idx, template <class Impl> void +O3ThreadContext<Impl>::setVecPredRegFlat(int reg_idx, + const VecPredRegContainer& val) +{ + cpu->setArchVecPredReg(reg_idx, val, thread->threadId()); + + conditionalSquash(); +} + +template <class Impl> +void O3ThreadContext<Impl>::setCCRegFlat(int reg_idx, TheISA::CCReg val) { cpu->setArchCCReg(reg_idx, val, thread->threadId()); diff --git a/src/cpu/reg_class.cc b/src/cpu/reg_class.cc index 16c1949ee..eeb06adcc 100644 --- a/src/cpu/reg_class.cc +++ b/src/cpu/reg_class.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -47,6 +47,7 @@ const char *RegId::regClassStrings[] = { "FloatRegClass", "VecRegClass", "VecElemClass", + "VecPredRegClass", "CCRegClass", "MiscRegClass" }; diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh index 69da9cf7e..70cfab39e 100644 --- a/src/cpu/reg_class.hh +++ b/src/cpu/reg_class.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -60,6 +60,7 @@ enum RegClass { VecRegClass, /** Vector Register Native Elem lane. */ VecElemClass, + VecPredRegClass, CCRegClass, ///< Condition-code register MiscRegClass ///< Control (misc) register }; @@ -151,6 +152,9 @@ class RegId { /** @Return true if it is a condition-code physical register. */ bool isVecElem() const { return regClass == VecElemClass; } + /** @Return true if it is a predicate physical register. */ + bool isVecPredReg() const { return regClass == VecPredRegClass; } + /** @Return true if it is a condition-code physical register. */ bool isCCReg() const { return regClass == CCRegClass; } @@ -179,6 +183,7 @@ class RegId { case IntRegClass: case FloatRegClass: case VecRegClass: + case VecPredRegClass: case CCRegClass: case MiscRegClass: return regIdx; diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index cbca34123..d2107b89a 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 ARM Limited + * Copyright (c) 2014-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -121,6 +121,10 @@ class SimpleExecContext : public ExecContext { mutable Stats::Scalar numVecRegReads; Stats::Scalar numVecRegWrites; + // Number of predicate register file accesses + mutable Stats::Scalar numVecPredRegReads; + Stats::Scalar numVecPredRegWrites; + // Number of condition code register file accesses Stats::Scalar numCCRegReads; Stats::Scalar numCCRegWrites; @@ -333,6 +337,34 @@ class SimpleExecContext : public ExecContext { thread->setVecElem(reg, val); } + const VecPredRegContainer& + readVecPredRegOperand(const StaticInst *si, int idx) const override + { + numVecPredRegReads++; + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecPredReg()); + return thread->readVecPredReg(reg); + } + + VecPredRegContainer& + getWritableVecPredRegOperand(const StaticInst *si, int idx) override + { + numVecPredRegWrites++; + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + return thread->getWritableVecPredReg(reg); + } + + void + setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer& val) override + { + numVecPredRegWrites++; + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + thread->setVecPredReg(reg, val); + } + CCReg readCCRegOperand(const StaticInst *si, int idx) override { diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 211a4c89f..00355c602 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012, 2016 ARM Limited + * Copyright (c) 2011-2012, 2016-2018 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -58,6 +58,7 @@ #include "debug/CCRegs.hh" #include "debug/FloatRegs.hh" #include "debug/IntRegs.hh" +#include "debug/VecPredRegs.hh" #include "debug/VecRegs.hh" #include "mem/page_table.hh" #include "mem/request.hh" @@ -102,6 +103,7 @@ class SimpleThread : public ThreadState typedef TheISA::CCReg CCReg; using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; + using VecPredRegContainer = TheISA::VecPredRegContainer; public: typedef ThreadContext::Status Status; @@ -109,6 +111,7 @@ class SimpleThread : public ThreadState RegVal floatRegs[TheISA::NumFloatRegs]; RegVal intRegs[TheISA::NumIntRegs]; VecRegContainer vecRegs[TheISA::NumVecRegs]; + VecPredRegContainer vecPredRegs[TheISA::NumVecPredRegs]; #ifdef ISA_HAS_CC_REGS TheISA::CCReg ccRegs[TheISA::NumCCRegs]; #endif @@ -228,6 +231,9 @@ class SimpleThread : public ThreadState for (int i = 0; i < TheISA::NumVecRegs; i++) { vecRegs[i].zero(); } + for (int i = 0; i < TheISA::NumVecPredRegs; i++) { + vecPredRegs[i].reset(); + } #ifdef ISA_HAS_CC_REGS memset(ccRegs, 0, sizeof(ccRegs)); #endif @@ -266,7 +272,7 @@ class SimpleThread : public ThreadState assert(flatIndex < TheISA::NumVecRegs); const VecRegContainer& regVal = readVecRegFlat(flatIndex); DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s.\n", - reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print()); + reg.index(), flatIndex, regVal.print()); return regVal; } @@ -277,7 +283,7 @@ class SimpleThread : public ThreadState assert(flatIndex < TheISA::NumVecRegs); VecRegContainer& regVal = getWritableVecRegFlat(flatIndex); DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s for modify.\n", - reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print()); + reg.index(), flatIndex, regVal.print()); return regVal; } @@ -350,6 +356,28 @@ class SimpleThread : public ThreadState return regVal; } + const VecPredRegContainer& + readVecPredReg(const RegId& reg) const + { + int flatIndex = isa->flattenVecPredIndex(reg.index()); + assert(flatIndex < TheISA::NumVecPredRegs); + const VecPredRegContainer& regVal = readVecPredRegFlat(flatIndex); + DPRINTF(VecPredRegs, "Reading predicate reg %d (%d) as %s.\n", + reg.index(), flatIndex, regVal.print()); + return regVal; + } + + VecPredRegContainer& + getWritableVecPredReg(const RegId& reg) + { + int flatIndex = isa->flattenVecPredIndex(reg.index()); + assert(flatIndex < TheISA::NumVecPredRegs); + VecPredRegContainer& regVal = getWritableVecPredRegFlat(flatIndex); + DPRINTF(VecPredRegs, + "Reading predicate reg %d (%d) as %s for modify.\n", + reg.index(), flatIndex, regVal.print()); + return regVal; + } CCReg readCCReg(int reg_idx) { @@ -411,6 +439,16 @@ class SimpleThread : public ThreadState } void + setVecPredReg(const RegId& reg, const VecPredRegContainer& val) + { + int flatIndex = isa->flattenVecPredIndex(reg.index()); + assert(flatIndex < TheISA::NumVecPredRegs); + setVecPredRegFlat(flatIndex, val); + DPRINTF(VecPredRegs, "Setting predicate reg %d (%d) to %s.\n", + reg.index(), flatIndex, val.print()); + } + + void setCCReg(int reg_idx, CCReg val) { #ifdef ISA_HAS_CC_REGS @@ -568,6 +606,21 @@ class SimpleThread : public ThreadState vecRegs[reg].as<TheISA::VecElem>()[elemIndex] = val; } + const VecPredRegContainer& readVecPredRegFlat(const RegIndex& reg) const + { + return vecPredRegs[reg]; + } + + VecPredRegContainer& getWritableVecPredRegFlat(const RegIndex& reg) + { + return vecPredRegs[reg]; + } + + void setVecPredRegFlat(const RegIndex& reg, const VecPredRegContainer& val) + { + vecPredRegs[reg] = val; + } + #ifdef ISA_HAS_CC_REGS CCReg readCCRegFlat(int idx) { return ccRegs[idx]; } void setCCRegFlat(int idx, CCReg val) { ccRegs[idx] = val; } diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 70d933c31..bcb53f5ea 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2017 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2003-2005 The Regents of The University of Michigan * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. @@ -105,16 +117,17 @@ class StaticInst : public RefCounted, public StaticInstFlags /** @{ */ int8_t _numVecDestRegs; int8_t _numVecElemDestRegs; + int8_t _numVecPredDestRegs; /** @} */ public: /// @name Register information. - /// The sum of numFPDestRegs(), numIntDestRegs(), numVecDestRegs() and - /// numVecelemDestRegs() equals numDestRegs(). The former two functions - /// are used to track physical register usage for machines with separate - /// int & FP reg files, the next two is for machines with vector register - /// file. + /// The sum of numFPDestRegs(), numIntDestRegs(), numVecDestRegs(), + /// numVecElemDestRegs() and numVecPredDestRegs() equals numDestRegs(). + /// The former two functions are used to track physical register usage for + /// machines with separate int & FP reg files, the next three are for + /// machines with vector and predicate register files. //@{ /// Number of source registers. int8_t numSrcRegs() const { return _numSrcRegs; } @@ -128,6 +141,8 @@ class StaticInst : public RefCounted, public StaticInstFlags int8_t numVecDestRegs() const { return _numVecDestRegs; } /// Number of vector element destination regs. int8_t numVecElemDestRegs() const { return _numVecElemDestRegs; } + /// Number of predicate destination regs. + int8_t numVecPredDestRegs() const { return _numVecPredDestRegs; } /// Number of coprocesor destination regs. int8_t numCCDestRegs() const { return _numCCDestRegs; } //@} @@ -248,8 +263,8 @@ class StaticInst : public RefCounted, public StaticInstFlags StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass) : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0), _numFPDestRegs(0), _numIntDestRegs(0), _numCCDestRegs(0), - _numVecDestRegs(0), _numVecElemDestRegs(0), machInst(_machInst), - mnemonic(_mnemonic), cachedDisassembly(0) + _numVecDestRegs(0), _numVecElemDestRegs(0), _numVecPredDestRegs(0), + machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0) { } public: diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc index 2d907a055..7597dbfb2 100644 --- a/src/cpu/thread_context.cc +++ b/src/cpu/thread_context.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2016 ARM Limited + * Copyright (c) 2012, 2016-2017 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -43,6 +43,7 @@ #include "cpu/thread_context.hh" +#include "arch/generic/vec_pred_reg.hh" #include "arch/kernel_stats.hh" #include "base/logging.hh" #include "base/trace.hh" @@ -86,6 +87,17 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two) panic("Vec reg idx %d doesn't match, one: %#x, two: %#x", i, t1, t2); } + + // Then loop through the predicate registers. + for (int i = 0; i < TheISA::NumVecPredRegs; ++i) { + RegId rid(VecPredRegClass, i); + const TheISA::VecPredRegContainer& t1 = one->readVecPredReg(rid); + const TheISA::VecPredRegContainer& t2 = two->readVecPredReg(rid); + if (t1 != t2) + panic("Pred reg idx %d doesn't match, one: %#x, two: %#x", + i, t1, t2); + } + for (int i = 0; i < TheISA::NumMiscRegs; ++i) { RegVal t1 = one->readMiscRegNoEffect(i); RegVal t2 = two->readMiscRegNoEffect(i); @@ -168,6 +180,12 @@ serialize(ThreadContext &tc, CheckpointOut &cp) } SERIALIZE_CONTAINER(vecRegs); + std::vector<TheISA::VecPredRegContainer> vecPredRegs(NumVecPredRegs); + for (int i = 0; i < NumVecPredRegs; ++i) { + vecPredRegs[i] = tc.readVecPredRegFlat(i); + } + SERIALIZE_CONTAINER(vecPredRegs); + RegVal intRegs[NumIntRegs]; for (int i = 0; i < NumIntRegs; ++i) intRegs[i] = tc.readIntRegFlat(i); @@ -203,6 +221,12 @@ unserialize(ThreadContext &tc, CheckpointIn &cp) tc.setVecRegFlat(i, vecRegs[i]); } + std::vector<TheISA::VecPredRegContainer> vecPredRegs(NumVecPredRegs); + UNSERIALIZE_CONTAINER(vecPredRegs); + for (int i = 0; i < NumVecPredRegs; ++i) { + tc.setVecPredRegFlat(i, vecPredRegs[i]); + } + RegVal intRegs[NumIntRegs]; UNSERIALIZE_ARRAY(intRegs, NumIntRegs); for (int i = 0; i < NumIntRegs; ++i) diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index cad073b4f..6dde68650 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012, 2016 ARM Limited + * Copyright (c) 2011-2012, 2016-2018 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -98,6 +98,8 @@ class ThreadContext typedef TheISA::CCReg CCReg; using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; + using VecPredRegContainer = TheISA::VecPredRegContainer; + public: enum Status @@ -242,6 +244,10 @@ class ThreadContext virtual const VecElem& readVecElem(const RegId& reg) const = 0; + virtual const VecPredRegContainer& readVecPredReg(const RegId& reg) + const = 0; + virtual VecPredRegContainer& getWritableVecPredReg(const RegId& reg) = 0; + virtual CCReg readCCReg(int reg_idx) = 0; virtual void setIntReg(int reg_idx, RegVal val) = 0; @@ -252,6 +258,9 @@ class ThreadContext virtual void setVecElem(const RegId& reg, const VecElem& val) = 0; + virtual void setVecPredReg(const RegId& reg, + const VecPredRegContainer& val) = 0; + virtual void setCCReg(int reg_idx, CCReg val) = 0; virtual TheISA::PCState pcState() = 0; @@ -341,6 +350,11 @@ class ThreadContext virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx, const VecElem& val) = 0; + virtual const VecPredRegContainer& readVecPredRegFlat(int idx) const = 0; + virtual VecPredRegContainer& getWritableVecPredRegFlat(int idx) = 0; + virtual void setVecPredRegFlat(int idx, + const VecPredRegContainer& val) = 0; + virtual CCReg readCCRegFlat(int idx) = 0; virtual void setCCRegFlat(int idx, CCReg val) = 0; /** @} */ @@ -502,6 +516,12 @@ class ProxyThreadContext : public ThreadContext const VecElem& readVecElem(const RegId& reg) const { return actualTC->readVecElem(reg); } + const VecPredRegContainer& readVecPredReg(const RegId& reg) const + { return actualTC->readVecPredReg(reg); } + + VecPredRegContainer& getWritableVecPredReg(const RegId& reg) + { return actualTC->getWritableVecPredReg(reg); } + CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } @@ -514,6 +534,9 @@ class ProxyThreadContext : public ThreadContext void setVecReg(const RegId& reg, const VecRegContainer& val) { actualTC->setVecReg(reg, val); } + void setVecPredReg(const RegId& reg, const VecPredRegContainer& val) + { actualTC->setVecPredReg(reg, val); } + void setVecElem(const RegId& reg, const VecElem& val) { actualTC->setVecElem(reg, val); } @@ -590,6 +613,15 @@ class ProxyThreadContext : public ThreadContext const VecElem& val) { actualTC->setVecElemFlat(id, elemIndex, val); } + const VecPredRegContainer& readVecPredRegFlat(int id) const + { return actualTC->readVecPredRegFlat(id); } + + VecPredRegContainer& getWritableVecPredRegFlat(int id) + { return actualTC->getWritableVecPredRegFlat(id); } + + void setVecPredRegFlat(int idx, const VecPredRegContainer& val) + { actualTC->setVecPredRegFlat(idx, val); } + CCReg readCCRegFlat(int idx) { return actualTC->readCCRegFlat(idx); } |