diff options
author | Giacomo Gabrielli <giacomo.gabrielli@arm.com> | 2018-10-16 16:04:08 +0100 |
---|---|---|
committer | Giacomo Gabrielli <giacomo.gabrielli@arm.com> | 2019-01-30 16:57:54 +0000 |
commit | 25474167e5b247d1b91fbf802c5b396a63ae705e (patch) | |
tree | b509597b23d792734f55c33b8125eebfbd9cd3a5 /src/cpu/o3 | |
parent | c6f5db8743f19b02a38146d9cf2a829883387008 (diff) | |
download | gem5-25474167e5b247d1b91fbf802c5b396a63ae705e.tar.xz |
arch,cpu: Add vector predicate registers
Latest-gen. vector/SIMD extensions, including the Arm Scalable Vector
Extension (SVE), introduce the notion of a predicate register file.
This changeset adds this feature across architectures and CPU models.
Change-Id: Iebcadbad89c0a582ff8b1b70de353305db603946
Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/13715
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Diffstat (limited to 'src/cpu/o3')
-rw-r--r-- | src/cpu/o3/O3CPU.py | 2 | ||||
-rw-r--r-- | src/cpu/o3/comm.hh | 5 | ||||
-rw-r--r-- | src/cpu/o3/cpu.cc | 84 | ||||
-rw-r--r-- | src/cpu/o3/cpu.hh | 19 | ||||
-rw-r--r-- | src/cpu/o3/dyn_inst.hh | 25 | ||||
-rw-r--r-- | src/cpu/o3/free_list.hh | 23 | ||||
-rw-r--r-- | src/cpu/o3/inst_queue_impl.hh | 1 | ||||
-rw-r--r-- | src/cpu/o3/regfile.cc | 21 | ||||
-rw-r--r-- | src/cpu/o3/regfile.hh | 46 | ||||
-rw-r--r-- | src/cpu/o3/rename.hh | 3 | ||||
-rw-r--r-- | src/cpu/o3/rename_impl.hh | 11 | ||||
-rw-r--r-- | src/cpu/o3/rename_map.cc | 4 | ||||
-rw-r--r-- | src/cpu/o3/rename_map.hh | 25 | ||||
-rw-r--r-- | src/cpu/o3/thread_context.hh | 23 | ||||
-rw-r--r-- | src/cpu/o3/thread_context_impl.hh | 24 |
15 files changed, 303 insertions, 13 deletions
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index 32cc19010..e73c09334 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -150,6 +150,8 @@ class DerivO3CPU(BaseCPU): _defaultNumPhysCCRegs = Self.numPhysIntRegs * 5 numPhysVecRegs = Param.Unsigned(256, "Number of physical vector " "registers") + numPhysVecPredRegs = Param.Unsigned(32, "Number of physical predicate " + "registers") numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs, "Number of physical cc registers") numIQEntries = Param.Unsigned(64, "Number of instruction queue entries") diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index f5be5a804..df518b1e4 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2016 ARM Limited + * Copyright (c) 2011, 2016-2017 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -121,6 +121,9 @@ class PhysRegId : private RegId { /** @Return true if it is a vector element physical register. */ bool isVectorPhysElem() const { return isVecElem(); } + /** @return true if it is a vector predicate physical register. */ + bool isVecPredPhysReg() const { return isVecPredReg(); } + /** @Return true if it is a condition-code physical register. */ bool isMiscPhysReg() const { return isMiscReg(); } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 5d92d92dc..ef3b17202 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -159,6 +159,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs, params->numPhysVecRegs, + params->numPhysVecPredRegs, params->numPhysCCRegs, vecMode), @@ -258,6 +259,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs); + assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs); assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); rename.setScoreboard(&scoreboard); @@ -325,6 +327,13 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) } } + for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) { + PhysRegIdPtr phys_reg = freeList.getVecPredReg(); + renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg); + commitRenameMap[tid].setEntry( + RegId(VecPredRegClass, ridx), phys_reg); + } + for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) { PhysRegIdPtr phys_reg = freeList.getCCReg(); renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); @@ -538,6 +547,16 @@ FullO3CPU<Impl>::regStats() .desc("number of vector regfile writes") .prereq(vecRegfileWrites); + vecPredRegfileReads + .name(name() + ".pred_regfile_reads") + .desc("number of predicate regfile reads") + .prereq(vecPredRegfileReads); + + vecPredRegfileWrites + .name(name() + ".pred_regfile_writes") + .desc("number of predicate regfile writes") + .prereq(vecPredRegfileWrites); + ccRegfileReads .name(name() + ".cc_regfile_reads") .desc("number of cc regfile reads") @@ -883,6 +902,14 @@ FullO3CPU<Impl>::removeThread(ThreadID tid) freeList.addReg(phys_reg); } + // Unbind Float Regs from Rename Map + for (unsigned preg = 0; preg < TheISA::NumVecPredRegs; preg++) { + PhysRegIdPtr phys_reg = renameMap[tid].lookup( + RegId(VecPredRegClass, preg)); + scoreboard.unsetReg(phys_reg); + freeList.addReg(phys_reg); + } + // Unbind condition-code Regs from Rename Map for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs; reg_id.index()++) { @@ -1334,6 +1361,24 @@ FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem& } template <class Impl> +auto +FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const + -> const VecPredRegContainer& +{ + vecPredRegfileReads++; + return regFile.readVecPredReg(phys_reg); +} + +template <class Impl> +auto +FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg) + -> VecPredRegContainer& +{ + vecPredRegfileWrites++; + return regFile.getWritableVecPredReg(phys_reg); +} + +template <class Impl> CCReg FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg) { @@ -1375,6 +1420,15 @@ FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val) template <class Impl> void +FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg, + const VecPredRegContainer& val) +{ + vecPredRegfileWrites++; + regFile.setVecPredReg(phys_reg, val); +} + +template <class Impl> +void FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, CCReg val) { ccRegfileWrites++; @@ -1434,6 +1488,26 @@ FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, } template <class Impl> +auto +FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const + -> const VecPredRegContainer& +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecPredRegClass, reg_idx)); + return readVecPredReg(phys_reg); +} + +template <class Impl> +auto +FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid) + -> VecPredRegContainer& +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecPredRegClass, reg_idx)); + return getWritableVecPredReg(phys_reg); +} + +template <class Impl> CCReg FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) { @@ -1488,6 +1562,16 @@ FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, template <class Impl> void +FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, + ThreadID tid) +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecPredRegClass, reg_idx)); + setVecPredReg(phys_reg, val); +} + +template <class Impl> +void FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid) { ccRegfileWrites++; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index b5f754056..30ed4ef3b 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -107,6 +107,8 @@ class FullO3CPU : public BaseO3CPU using VecElem = TheISA::VecElem; using VecRegContainer = TheISA::VecRegContainer; + using VecPredRegContainer = TheISA::VecPredRegContainer; + typedef O3ThreadState<Impl> ImplState; typedef O3ThreadState<Impl> Thread; @@ -457,6 +459,10 @@ class FullO3CPU : public BaseO3CPU const VecElem& readVecElem(PhysRegIdPtr reg_idx) const; + const VecPredRegContainer& readVecPredReg(PhysRegIdPtr reg_idx) const; + + VecPredRegContainer& getWritableVecPredReg(PhysRegIdPtr reg_idx); + TheISA::CCReg readCCReg(PhysRegIdPtr phys_reg); void setIntReg(PhysRegIdPtr phys_reg, RegVal val); @@ -467,6 +473,8 @@ class FullO3CPU : public BaseO3CPU void setVecElem(PhysRegIdPtr reg_idx, const VecElem& val); + void setVecPredReg(PhysRegIdPtr reg_idx, const VecPredRegContainer& val); + void setCCReg(PhysRegIdPtr phys_reg, TheISA::CCReg val); RegVal readArchIntReg(int reg_idx, ThreadID tid); @@ -501,6 +509,11 @@ class FullO3CPU : public BaseO3CPU const VecElem& readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, ThreadID tid) const; + const VecPredRegContainer& readArchVecPredReg(int reg_idx, + ThreadID tid) const; + + VecPredRegContainer& getWritableArchVecPredReg(int reg_idx, ThreadID tid); + TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid); /** Architectural register accessors. Looks up in the commit @@ -512,6 +525,9 @@ class FullO3CPU : public BaseO3CPU void setArchFloatRegBits(int reg_idx, RegVal val, ThreadID tid); + void setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, + ThreadID tid); + void setArchVecReg(int reg_idx, const VecRegContainer& val, ThreadID tid); void setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, @@ -805,6 +821,9 @@ class FullO3CPU : public BaseO3CPU //number of vector register file accesses mutable Stats::Scalar vecRegfileReads; Stats::Scalar vecRegfileWrites; + //number of predicate register file accesses + mutable Stats::Scalar vecPredRegfileReads; + Stats::Scalar vecPredRegfileWrites; //number of CC register file accesses Stats::Scalar ccRegfileReads; Stats::Scalar ccRegfileWrites; diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 5bd0f8e47..9793f4ead 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -70,6 +70,7 @@ class BaseO3DynInst : public BaseDynInst<Impl> using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg; + using VecPredRegContainer = TheISA::VecPredRegContainer; enum { MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs @@ -231,6 +232,10 @@ class BaseO3DynInst : public BaseDynInst<Impl> this->setVecElemOperand(this->staticInst.get(), idx, this->cpu->readVecElem(prev_phys_reg)); break; + case VecPredRegClass: + this->setVecPredRegOperand(this->staticInst.get(), idx, + this->cpu->readVecPredReg(prev_phys_reg)); + break; case CCRegClass: this->setCCRegOperand(this->staticInst.get(), idx, this->cpu->readCCReg(prev_phys_reg)); @@ -361,6 +366,18 @@ class BaseO3DynInst : public BaseDynInst<Impl> return this->cpu->readVecElem(this->_srcRegIdx[idx]); } + const VecPredRegContainer& + readVecPredRegOperand(const StaticInst *si, int idx) const override + { + return this->cpu->readVecPredReg(this->_srcRegIdx[idx]); + } + + VecPredRegContainer& + getWritableVecPredRegOperand(const StaticInst *si, int idx) override + { + return this->cpu->getWritableVecPredReg(this->_destRegIdx[idx]); + } + CCReg readCCRegOperand(const StaticInst *si, int idx) { return this->cpu->readCCReg(this->_srcRegIdx[idx]); @@ -399,6 +416,14 @@ class BaseO3DynInst : public BaseDynInst<Impl> BaseDynInst<Impl>::setVecElemOperand(si, idx, val); } + void + setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer& val) override + { + this->cpu->setVecPredReg(this->_destRegIdx[idx], val); + BaseDynInst<Impl>::setVecPredRegOperand(si, idx, val); + } + void setCCRegOperand(const StaticInst *si, int idx, CCReg val) { this->cpu->setCCReg(this->_destRegIdx[idx], val); diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index e7a899cdf..46bebf30d 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -138,6 +138,9 @@ class UnifiedFreeList SimpleFreeList vecElemList; /** @} */ + /** The list of free predicate registers. */ + SimpleFreeList predList; + /** The list of free condition-code registers. */ SimpleFreeList ccList; @@ -183,6 +186,9 @@ class UnifiedFreeList /** Gets a free vector elemenet register. */ PhysRegIdPtr getVecElem() { return vecElemList.getReg(); } + /** Gets a free predicate register. */ + PhysRegIdPtr getVecPredReg() { return predList.getReg(); } + /** Gets a free cc register. */ PhysRegIdPtr getCCReg() { return ccList.getReg(); } @@ -207,6 +213,9 @@ class UnifiedFreeList vecElemList.addReg(freed_reg); } + /** Adds a predicate register back to the free list. */ + void addVecPredReg(PhysRegIdPtr freed_reg) { predList.addReg(freed_reg); } + /** Adds a cc register back to the free list. */ void addCCReg(PhysRegIdPtr freed_reg) { ccList.addReg(freed_reg); } @@ -222,6 +231,9 @@ class UnifiedFreeList /** Checks if there are any free vector registers. */ bool hasFreeVecElems() const { return vecElemList.hasFreeRegs(); } + /** Checks if there are any free predicate registers. */ + bool hasFreeVecPredRegs() const { return predList.hasFreeRegs(); } + /** Checks if there are any free cc registers. */ bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); } @@ -237,6 +249,9 @@ class UnifiedFreeList /** Returns the number of free vector registers. */ unsigned numFreeVecElems() const { return vecElemList.numFreeRegs(); } + /** Returns the number of free predicate registers. */ + unsigned numFreeVecPredRegs() const { return predList.numFreeRegs(); } + /** Returns the number of free cc registers. */ unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); } }; @@ -267,6 +282,9 @@ UnifiedFreeList::addRegs(InputIt first, InputIt last) case VecElemClass: vecElemList.addRegs(first, last); break; + case VecPredRegClass: + predList.addRegs(first, last); + break; case CCRegClass: ccList.addRegs(first, last); break; @@ -297,6 +315,9 @@ UnifiedFreeList::addReg(PhysRegIdPtr freed_reg) case VecElemClass: vecElemList.addReg(freed_reg); break; + case VecPredRegClass: + predList.addReg(freed_reg); + break; case CCRegClass: ccList.addReg(freed_reg); break; diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 4a55a91ea..ddd7b6d5f 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -104,6 +104,7 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs + params->numPhysVecRegs + params->numPhysVecRegs * TheISA::NumVecElemPerVecReg + + params->numPhysVecPredRegs + params->numPhysCCRegs; //Create an entry for each physical register within the diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index 2f41e2ac2..cc4bba6b0 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -52,22 +52,26 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, unsigned _numPhysicalVecRegs, + unsigned _numPhysicalVecPredRegs, unsigned _numPhysicalCCRegs, VecMode vmode) : intRegFile(_numPhysicalIntRegs), floatRegFile(_numPhysicalFloatRegs), vectorRegFile(_numPhysicalVecRegs), + vecPredRegFile(_numPhysicalVecPredRegs), ccRegFile(_numPhysicalCCRegs), numPhysicalIntRegs(_numPhysicalIntRegs), numPhysicalFloatRegs(_numPhysicalFloatRegs), numPhysicalVecRegs(_numPhysicalVecRegs), numPhysicalVecElemRegs(_numPhysicalVecRegs * NumVecElemPerVecReg), + numPhysicalVecPredRegs(_numPhysicalVecPredRegs), numPhysicalCCRegs(_numPhysicalCCRegs), totalNumRegs(_numPhysicalIntRegs + _numPhysicalFloatRegs + _numPhysicalVecRegs + _numPhysicalVecRegs * NumVecElemPerVecReg + + _numPhysicalVecPredRegs + _numPhysicalCCRegs), vecMode(vmode) { @@ -108,6 +112,12 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, } } + // The next batch of the registers are the predicate physical + // registers; put them onto the predicate free list. + for (phys_reg = 0; phys_reg < numPhysicalVecPredRegs; phys_reg++) { + vecPredRegIds.emplace_back(VecPredRegClass, phys_reg, flat_reg_idx++); + } + // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (phys_reg = 0; phys_reg < numPhysicalCCRegs; phys_reg++) { @@ -159,6 +169,13 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList) else freeList->addRegs(vecElemIds.begin(), vecElemIds.end()); + // The next batch of the registers are the predicate physical + // registers; put them onto the predicate free list. + for (reg_idx = 0; reg_idx < numPhysicalVecPredRegs; reg_idx++) { + assert(vecPredRegIds[reg_idx].index() == reg_idx); + } + freeList->addRegs(vecPredRegIds.begin(), vecPredRegIds.end()); + // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (reg_idx = 0; reg_idx < numPhysicalCCRegs; reg_idx++) { @@ -191,6 +208,8 @@ PhysRegFile::getRegIds(RegClass cls) -> IdRange return std::make_pair(vecRegIds.begin(), vecRegIds.end()); case VecElemClass: return std::make_pair(vecElemIds.begin(), vecElemIds.end()); + case VecPredRegClass: + return std::make_pair(vecPredRegIds.begin(), vecPredRegIds.end()); case CCRegClass: return std::make_pair(ccRegIds.begin(), ccRegIds.end()); case MiscRegClass: diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 9d9113240..4077c99a4 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -70,6 +70,7 @@ class PhysRegFile using VecRegContainer = TheISA::VecRegContainer; using PhysIds = std::vector<PhysRegId>; using VecMode = Enums::VecRegRenameMode; + using VecPredRegContainer = TheISA::VecPredRegContainer; public: using IdRange = std::pair<PhysIds::const_iterator, PhysIds::const_iterator>; @@ -89,6 +90,10 @@ class PhysRegFile std::vector<PhysRegId> vecRegIds; std::vector<PhysRegId> vecElemIds; + /** Predicate register file. */ + std::vector<VecPredRegContainer> vecPredRegFile; + std::vector<PhysRegId> vecPredRegIds; + /** Condition-code register file. */ std::vector<CCReg> ccRegFile; std::vector<PhysRegId> ccRegIds; @@ -117,6 +122,11 @@ class PhysRegFile unsigned numPhysicalVecElemRegs; /** + * Number of physical predicate registers + */ + unsigned numPhysicalVecPredRegs; + + /** * Number of physical CC registers */ unsigned numPhysicalCCRegs; @@ -135,6 +145,7 @@ class PhysRegFile PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, unsigned _numPhysicalVecRegs, + unsigned _numPhysicalVecPredRegs, unsigned _numPhysicalCCRegs, VecMode vmode ); @@ -154,6 +165,8 @@ class PhysRegFile unsigned numFloatPhysRegs() const { return numPhysicalFloatRegs; } /** @return the number of vector physical registers. */ unsigned numVecPhysRegs() const { return numPhysicalVecRegs; } + /** @return the number of predicate physical registers. */ + unsigned numPredPhysRegs() const { return numPhysicalVecPredRegs; } /** @return the number of vector physical registers. */ unsigned numVecElemPhysRegs() const { return numPhysicalVecElemRegs; } @@ -201,7 +214,7 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Access to vector register %i, has " "data %s\n", int(phys_reg->index()), - vectorRegFile[phys_reg->index()].as<VecElem>().print()); + vectorRegFile[phys_reg->index()].print()); return vectorRegFile[phys_reg->index()]; } @@ -258,6 +271,24 @@ class PhysRegFile return val; } + /** Reads a predicate register. */ + const VecPredRegContainer& readVecPredReg(PhysRegIdPtr phys_reg) const + { + assert(phys_reg->isVecPredPhysReg()); + + DPRINTF(IEW, "RegFile: Access to predicate register %i, has " + "data %s\n", int(phys_reg->index()), + vecPredRegFile[phys_reg->index()].print()); + + return vecPredRegFile[phys_reg->index()]; + } + + VecPredRegContainer& getWritableVecPredReg(PhysRegIdPtr phys_reg) + { + /* const_cast for not duplicating code above. */ + return const_cast<VecPredRegContainer&>(readVecPredReg(phys_reg)); + } + /** Reads a condition-code register. */ CCReg readCCReg(PhysRegIdPtr phys_reg) @@ -321,6 +352,17 @@ class PhysRegFile val; } + /** Sets a predicate register to the given value. */ + void setVecPredReg(PhysRegIdPtr phys_reg, const VecPredRegContainer& val) + { + assert(phys_reg->isVecPredPhysReg()); + + DPRINTF(IEW, "RegFile: Setting predicate register %i to %s\n", + int(phys_reg->index()), val.print()); + + vecPredRegFile[phys_reg->index()] = val; + } + /** Sets a condition-code register to the given value. */ void setCCReg(PhysRegIdPtr phys_reg, CCReg val) diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index bd5e72dec..a091c0908 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 ARM Limited + * Copyright (c) 2012, 2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -514,6 +514,7 @@ class DefaultRename Stats::Scalar intRenameLookups; Stats::Scalar fpRenameLookups; Stats::Scalar vecRenameLookups; + Stats::Scalar vecPredRenameLookups; /** Stat for total number of committed renaming mappings. */ Stats::Scalar renameCommittedMaps; /** Stat for total number of mappings that were undone due to a squash. */ diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index ed5dfb6eb..b63163f04 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -196,6 +196,10 @@ DefaultRename<Impl>::regStats() .name(name() + ".vec_rename_lookups") .desc("Number of vector rename lookups") .prereq(vecRenameLookups); + vecPredRenameLookups + .name(name() + ".vec_pred_rename_lookups") + .desc("Number of vector predicate rename lookups") + .prereq(vecPredRenameLookups); } template <class Impl> @@ -659,6 +663,7 @@ DefaultRename<Impl>::renameInsts(ThreadID tid) inst->numFPDestRegs(), inst->numVecDestRegs(), inst->numVecElemDestRegs(), + inst->numVecPredDestRegs(), inst->numCCDestRegs())) { DPRINTF(Rename, "Blocking due to lack of free " "physical registers to rename to.\n"); @@ -1041,6 +1046,9 @@ DefaultRename<Impl>::renameSrcRegs(const DynInstPtr &inst, ThreadID tid) case VecElemClass: vecRenameLookups++; break; + case VecPredRegClass: + vecPredRenameLookups++; + break; case CCRegClass: case MiscRegClass: break; @@ -1256,7 +1264,7 @@ DefaultRename<Impl>::readFreeEntries(ThreadID tid) } DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, " - "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i)\n", + "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i)\n", tid, freeEntries[tid].iqEntries, freeEntries[tid].robEntries, @@ -1266,6 +1274,7 @@ DefaultRename<Impl>::readFreeEntries(ThreadID tid) renameMap[tid]->numFreeIntEntries(), renameMap[tid]->numFreeFloatEntries(), renameMap[tid]->numFreeVecEntries(), + renameMap[tid]->numFreePredEntries(), renameMap[tid]->numFreeCCEntries()); DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n", diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index 86c43932c..603f1ff36 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016,2019 ARM Limited + * Copyright (c) 2016-2017,2019 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -120,6 +120,8 @@ UnifiedRenameMap::init(PhysRegFile *_regFile, vecElemMap.init(TheISA::NumVecRegs * NVecElems, &(freeList->vecElemList), (RegIndex)-1); + predMap.init(TheISA::NumVecPredRegs, &(freeList->predList), (RegIndex)-1); + ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1); } diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh index d30668027..5424633e5 100644 --- a/src/cpu/o3/rename_map.hh +++ b/src/cpu/o3/rename_map.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 ARM Limited + * Copyright (c) 2015-2017 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -172,6 +172,7 @@ class UnifiedRenameMap private: static constexpr uint32_t NVecElems = TheISA::NumVecElemPerVecReg; using VecReg = TheISA::VecReg; + using VecPredReg = TheISA::VecPredReg; /** The integer register rename map */ SimpleRenameMap intMap; @@ -188,6 +189,9 @@ class UnifiedRenameMap /** The vector element register rename map */ SimpleRenameMap vecElemMap; + /** The predicate register rename map */ + SimpleRenameMap predMap; + using VecMode = Enums::VecRegRenameMode; VecMode vecMode; @@ -235,6 +239,8 @@ class UnifiedRenameMap case VecElemClass: assert(vecMode == Enums::Elem); return vecElemMap.rename(arch_reg); + case VecPredRegClass: + return predMap.rename(arch_reg); case CCRegClass: return ccMap.rename(arch_reg); case MiscRegClass: @@ -276,6 +282,9 @@ class UnifiedRenameMap assert(vecMode == Enums::Elem); return vecElemMap.lookup(arch_reg); + case VecPredRegClass: + return predMap.lookup(arch_reg); + case CCRegClass: return ccMap.lookup(arch_reg); @@ -319,6 +328,10 @@ class UnifiedRenameMap assert(vecMode == Enums::Elem); return vecElemMap.setEntry(arch_reg, phys_reg); + case VecPredRegClass: + assert(phys_reg->isVecPredPhysReg()); + return predMap.setEntry(arch_reg, phys_reg); + case CCRegClass: assert(phys_reg->isCCPhysReg()); return ccMap.setEntry(arch_reg, phys_reg); @@ -345,10 +358,11 @@ class UnifiedRenameMap */ unsigned numFreeEntries() const { - return std::min( + return std::min(std::min( std::min(intMap.numFreeEntries(), floatMap.numFreeEntries()), vecMode == Enums::Full ? vecMap.numFreeEntries() - : vecElemMap.numFreeEntries()); + : vecElemMap.numFreeEntries()), + predMap.numFreeEntries()); } unsigned numFreeIntEntries() const { return intMap.numFreeEntries(); } @@ -359,18 +373,21 @@ class UnifiedRenameMap ? vecMap.numFreeEntries() : vecElemMap.numFreeEntries(); } + unsigned numFreePredEntries() const { return predMap.numFreeEntries(); } unsigned numFreeCCEntries() const { return ccMap.numFreeEntries(); } /** * Return whether there are enough registers to serve the request. */ bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t vectorRegs, - uint32_t vecElemRegs, uint32_t ccRegs) const + uint32_t vecElemRegs, uint32_t vecPredRegs, + uint32_t ccRegs) const { return intRegs <= intMap.numFreeEntries() && floatRegs <= floatMap.numFreeEntries() && vectorRegs <= vecMap.numFreeEntries() && vecElemRegs <= vecElemMap.numFreeEntries() && + vecPredRegs <= predMap.numFreeEntries() && ccRegs <= ccMap.numFreeEntries(); } /** diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index c74936469..7858f5a0a 100644 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012, 2016 ARM Limited + * Copyright (c) 2011-2012, 2016-2018 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -263,6 +263,14 @@ class O3ThreadContext : public ThreadContext return readVecElemFlat(flattenRegId(reg).index(), reg.elemIndex()); } + virtual const VecPredRegContainer& readVecPredReg(const RegId& id) const { + return readVecPredRegFlat(flattenRegId(id).index()); + } + + virtual VecPredRegContainer& getWritableVecPredReg(const RegId& id) { + return getWritableVecPredRegFlat(flattenRegId(id).index()); + } + virtual CCReg readCCReg(int reg_idx) { return readCCRegFlat(flattenRegId(RegId(CCRegClass, reg_idx)).index()); @@ -295,6 +303,13 @@ class O3ThreadContext : public ThreadContext } virtual void + setVecPredReg(const RegId& reg, + const VecPredRegContainer& val) + { + setVecPredRegFlat(flattenRegId(reg).index(), val); + } + + virtual void setCCReg(int reg_idx, CCReg val) { setCCRegFlat(flattenRegId(RegId(CCRegClass, reg_idx)).index(), val); @@ -403,6 +418,12 @@ class O3ThreadContext : public ThreadContext virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx, const VecElem& val); + virtual const VecPredRegContainer& readVecPredRegFlat(int idx) + const override; + virtual VecPredRegContainer& getWritableVecPredRegFlat(int idx) override; + virtual void setVecPredRegFlat(int idx, + const VecPredRegContainer& val) override; + virtual CCReg readCCRegFlat(int idx); virtual void setCCRegFlat(int idx, CCReg val); }; diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index bd2bf63b0..59562ba3b 100644 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -233,6 +233,20 @@ O3ThreadContext<Impl>::readVecElemFlat(const RegIndex& idx, } template <class Impl> +const TheISA::VecPredRegContainer& +O3ThreadContext<Impl>::readVecPredRegFlat(int reg_id) const +{ + return cpu->readArchVecPredReg(reg_id, thread->threadId()); +} + +template <class Impl> +TheISA::VecPredRegContainer& +O3ThreadContext<Impl>::getWritableVecPredRegFlat(int reg_id) +{ + return cpu->getWritableArchVecPredReg(reg_id, thread->threadId()); +} + +template <class Impl> TheISA::CCReg O3ThreadContext<Impl>::readCCRegFlat(int reg_idx) { @@ -277,6 +291,16 @@ O3ThreadContext<Impl>::setVecElemFlat(const RegIndex& idx, template <class Impl> void +O3ThreadContext<Impl>::setVecPredRegFlat(int reg_idx, + const VecPredRegContainer& val) +{ + cpu->setArchVecPredReg(reg_idx, val, thread->threadId()); + + conditionalSquash(); +} + +template <class Impl> +void O3ThreadContext<Impl>::setCCRegFlat(int reg_idx, TheISA::CCReg val) { cpu->setArchCCReg(reg_idx, val, thread->threadId()); |