diff options
author | Nilay Vaish <nilay@cs.wisc.edu> | 2015-07-26 10:21:20 -0500 |
---|---|---|
committer | Nilay Vaish <nilay@cs.wisc.edu> | 2015-07-26 10:21:20 -0500 |
commit | 608641e23c7f2288810c3f23a1a63790b664f2ab (patch) | |
tree | 0656aaf9653e8d263f5daac0d5f0fe3190193ae5 /src/cpu/o3 | |
parent | 6e354e82d9395b20f5f148cd545d0666b626e8ac (diff) | |
download | gem5-608641e23c7f2288810c3f23a1a63790b664f2ab.tar.xz |
cpu: implements vector registers
This adds a vector register type. The type is defined as a std::array of a
fixed number of uint64_ts. The isa_parser.py has been modified to parse vector
register operands and generate the required code. Different cpus have vector
register files now.
Diffstat (limited to 'src/cpu/o3')
-rw-r--r-- | src/cpu/o3/O3CPU.py | 7 | ||||
-rw-r--r-- | src/cpu/o3/cpu.cc | 76 | ||||
-rw-r--r-- | src/cpu/o3/cpu.hh | 12 | ||||
-rw-r--r-- | src/cpu/o3/dyn_inst.hh | 19 | ||||
-rw-r--r-- | src/cpu/o3/free_list.hh | 21 | ||||
-rw-r--r-- | src/cpu/o3/inst_queue_impl.hh | 2 | ||||
-rw-r--r-- | src/cpu/o3/regfile.cc | 26 | ||||
-rw-r--r-- | src/cpu/o3/regfile.hh | 52 | ||||
-rw-r--r-- | src/cpu/o3/rename_impl.hh | 20 | ||||
-rw-r--r-- | src/cpu/o3/rename_map.cc | 12 | ||||
-rw-r--r-- | src/cpu/o3/rename_map.hh | 41 | ||||
-rwxr-xr-x | src/cpu/o3/thread_context.hh | 12 | ||||
-rwxr-xr-x | src/cpu/o3/thread_context_impl.hh | 23 |
13 files changed, 305 insertions, 18 deletions
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index 92f96a3b6..d2220de82 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -114,6 +114,7 @@ class DerivO3CPU(BaseCPU): numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers") numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point " "registers") + # most ISAs don't use condition-code regs, so default is 0 _defaultNumPhysCCRegs = 0 if buildEnv['TARGET_ISA'] in ('arm','x86'): @@ -126,6 +127,12 @@ class DerivO3CPU(BaseCPU): _defaultNumPhysCCRegs = Self.numPhysIntRegs * 5 numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs, "Number of physical cc registers") + + # most ISAs don't use vector regs, so default is 0 + _defaultNumPhysVectorRegs = 0 + numPhysVectorRegs = Param.Unsigned(_defaultNumPhysVectorRegs, + "Number of physical vector registers") + numIQEntries = Param.Unsigned(64, "Number of instruction queue entries") numROBEntries = Param.Unsigned(192, "Number of reorder buffer entries") diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 026907a94..d8f39bbe4 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -170,7 +170,8 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs, - params->numPhysCCRegs), + params->numPhysCCRegs, + params->numPhysVectorRegs), freeList(name() + ".freelist", ®File), @@ -269,6 +270,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); + assert(params->numPhysVectorRegs >= numThreads * TheISA::NumVectorRegs); rename.setScoreboard(&scoreboard); iew.setScoreboard(&scoreboard); @@ -313,6 +315,12 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) renameMap[tid].setCCEntry(ridx, phys_reg); commitRenameMap[tid].setCCEntry(ridx, phys_reg); } + + for (RegIndex ridx = 0; ridx < TheISA::NumVectorRegs; ++ridx) { + PhysRegIndex phys_reg = freeList.getVectorReg(); + renameMap[tid].setVectorEntry(ridx, phys_reg); + commitRenameMap[tid].setVectorEntry(ridx, phys_reg); + } } rename.setRenameMap(renameMap); @@ -521,6 +529,16 @@ FullO3CPU<Impl>::regStats() .desc("number of cc regfile writes") .prereq(ccRegfileWrites); + vectorRegfileReads + .name(name() + ".vector_regfile_reads") + .desc("number of vector regfile reads") + .prereq(vectorRegfileReads); + + vectorRegfileWrites + .name(name() + ".vector_regfile_writes") + .desc("number of vector regfile writes") + .prereq(vectorRegfileWrites); + miscRegfileReads .name(name() + ".misc_regfile_reads") .desc("number of misc regfile reads") @@ -807,6 +825,18 @@ FullO3CPU<Impl>::insertThread(ThreadID tid) scoreboard.setReg(phys_reg); } + //Bind vector Regs to Rename Map + max_reg = TheISA::NumIntRegs + TheISA::NumFloatRegs + TheISA::NumCCRegs + + TheISA::NumVectorRegs; + for (int vreg = TheISA::NumIntRegs + TheISA::NumFloatRegs + + TheISA::NumCCRegs; + vreg < max_reg; vreg++) { + PhysRegIndex phys_reg = freeList.getVectorReg(); + + renameMap[tid].setEntry(vreg, phys_reg); + scoreboard.setReg(phys_reg); + } + //Copy Thread Data Into RegFile //this->copyFromTC(tid); @@ -860,6 +890,14 @@ FullO3CPU<Impl>::removeThread(ThreadID tid) freeList.addReg(phys_reg); } + // Unbind condition-code Regs from Rename Map + max_reg = TheISA::Vector_Reg_Base + TheISA::NumVectorRegs; + for (int vreg = TheISA::Vector_Reg_Base; vreg < max_reg; vreg++) { + PhysRegIndex phys_reg = renameMap[tid].lookup(vreg); + scoreboard.unsetReg(phys_reg); + freeList.addReg(phys_reg); + } + // Squash Throughout Pipeline DynInstPtr inst = commit.rob->readHeadInst(tid); InstSeqNum squash_seq_num = inst->seqNum; @@ -1259,6 +1297,14 @@ FullO3CPU<Impl>::readCCReg(int reg_idx) } template <class Impl> +const VectorReg & +FullO3CPU<Impl>::readVectorReg(int reg_idx) +{ + vectorRegfileReads++; + return regFile.readVectorReg(reg_idx); +} + +template <class Impl> void FullO3CPU<Impl>::setIntReg(int reg_idx, uint64_t val) { @@ -1291,6 +1337,14 @@ FullO3CPU<Impl>::setCCReg(int reg_idx, CCReg val) } template <class Impl> +void +FullO3CPU<Impl>::setVectorReg(int reg_idx, const VectorReg &val) +{ + vectorRegfileWrites++; + regFile.setVectorReg(reg_idx, val); +} + +template <class Impl> uint64_t FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid) { @@ -1331,6 +1385,16 @@ FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) } template <class Impl> +const VectorReg& +FullO3CPU<Impl>::readArchVectorReg(int reg_idx, ThreadID tid) +{ + vectorRegfileReads++; + PhysRegIndex phys_reg = commitRenameMap[tid].lookupVector(reg_idx); + + return regFile.readVectorReg(phys_reg); +} + +template <class Impl> void FullO3CPU<Impl>::setArchIntReg(int reg_idx, uint64_t val, ThreadID tid) { @@ -1371,6 +1435,16 @@ FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid) } template <class Impl> +void +FullO3CPU<Impl>::setArchVectorReg(int reg_idx, const VectorReg &val, + ThreadID tid) +{ + vectorRegfileWrites++; + PhysRegIndex phys_reg = commitRenameMap[tid].lookupVector(reg_idx); + regFile.setVectorReg(phys_reg, val); +} + +template <class Impl> TheISA::PCState FullO3CPU<Impl>::pcState(ThreadID tid) { diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index aa02ee2ea..f16450d19 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -427,6 +427,8 @@ class FullO3CPU : public BaseO3CPU TheISA::CCReg readCCReg(int reg_idx); + const TheISA::VectorReg &readVectorReg(int reg_idx); + void setIntReg(int reg_idx, uint64_t val); void setFloatReg(int reg_idx, TheISA::FloatReg val); @@ -435,6 +437,8 @@ class FullO3CPU : public BaseO3CPU void setCCReg(int reg_idx, TheISA::CCReg val); + void setVectorReg(int reg_idx, const TheISA::VectorReg &val); + uint64_t readArchIntReg(int reg_idx, ThreadID tid); float readArchFloatReg(int reg_idx, ThreadID tid); @@ -443,6 +447,8 @@ class FullO3CPU : public BaseO3CPU TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid); + const TheISA::VectorReg &readArchVectorReg(int reg_idx, ThreadID tid); + /** Architectural register accessors. Looks up in the commit * rename table to obtain the true physical index of the * architected register first, then accesses that physical @@ -456,6 +462,9 @@ class FullO3CPU : public BaseO3CPU void setArchCCReg(int reg_idx, TheISA::CCReg val, ThreadID tid); + void setArchVectorReg(int reg_idx, const TheISA::VectorReg &val, + ThreadID tid); + /** Sets the commit PC state of a specific thread. */ void pcState(const TheISA::PCState &newPCState, ThreadID tid); @@ -734,6 +743,9 @@ class FullO3CPU : public BaseO3CPU //number of CC register file accesses Stats::Scalar ccRegfileReads; Stats::Scalar ccRegfileWrites; + //number of integer register file accesses + Stats::Scalar vectorRegfileReads; + Stats::Scalar vectorRegfileWrites; //number of misc Stats::Scalar miscRegfileReads; Stats::Scalar miscRegfileWrites; diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 6740c601d..d19e4d461 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -74,6 +74,7 @@ class BaseO3DynInst : public BaseDynInst<Impl> typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; /** Misc register index type. */ typedef TheISA::MiscReg MiscReg; @@ -206,7 +207,6 @@ class BaseO3DynInst : public BaseDynInst<Impl> void forwardOldRegs() { - for (int idx = 0; idx < this->numDestRegs(); idx++) { PhysRegIndex prev_phys_reg = this->prevDestRegIdx(idx); TheISA::RegIndex original_dest_reg = @@ -224,6 +224,11 @@ class BaseO3DynInst : public BaseDynInst<Impl> this->setCCRegOperand(this->staticInst.get(), idx, this->cpu->readCCReg(prev_phys_reg)); break; + case VectorRegClass: + this->setVectorRegOperand(this->staticInst.get(), idx, + this->cpu->readVectorReg(prev_phys_reg)); + break; + case MiscRegClass: // no need to forward misc reg values break; @@ -272,6 +277,11 @@ class BaseO3DynInst : public BaseDynInst<Impl> return this->cpu->readCCReg(this->_srcRegIdx[idx]); } + const VectorReg &readVectorRegOperand(const StaticInst *si, int idx) + { + return this->cpu->readVectorReg(this->_srcRegIdx[idx]); + } + /** @todo: Make results into arrays so they can handle multiple dest * registers. */ @@ -300,6 +310,13 @@ class BaseO3DynInst : public BaseDynInst<Impl> BaseDynInst<Impl>::setCCRegOperand(si, idx, val); } + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + this->cpu->setVectorReg(this->_destRegIdx[idx], val); + BaseDynInst<Impl>::setVectorRegOperand(si, idx, val); + } + #if THE_ISA == MIPS_ISA MiscReg readRegOtherThread(int misc_reg, ThreadID tid) { diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index aa805e26e..d345d7ac8 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -109,6 +109,9 @@ class UnifiedFreeList /** The list of free condition-code registers. */ SimpleFreeList ccList; + /** The list of free vector registers. */ + SimpleFreeList vectorList; + /** * The register file object is used only to distinguish integer * from floating-point physical register indices. @@ -148,6 +151,9 @@ class UnifiedFreeList /** Gets a free cc register. */ PhysRegIndex getCCReg() { return ccList.getReg(); } + /** Gets a free vector register. */ + PhysRegIndex getVectorReg() { return vectorList.getReg(); } + /** Adds a register back to the free list. */ void addReg(PhysRegIndex freed_reg); @@ -160,6 +166,9 @@ class UnifiedFreeList /** Adds a cc register back to the free list. */ void addCCReg(PhysRegIndex freed_reg) { ccList.addReg(freed_reg); } + /** Adds a vector register back to the free list. */ + void addVectorReg(PhysRegIndex freed_reg) { vectorList.addReg(freed_reg); } + /** Checks if there are any free integer registers. */ bool hasFreeIntRegs() const { return intList.hasFreeRegs(); } @@ -169,6 +178,9 @@ class UnifiedFreeList /** Checks if there are any free cc registers. */ bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); } + /** Checks if there are any free vector registers. */ + bool hasFreeVectorRegs() const { return vectorList.hasFreeRegs(); } + /** Returns the number of free integer registers. */ unsigned numFreeIntRegs() const { return intList.numFreeRegs(); } @@ -177,6 +189,9 @@ class UnifiedFreeList /** Returns the number of free cc registers. */ unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); } + + /** Returns the number of free vector registers. */ + unsigned numFreeVectorRegs() const { return vectorList.numFreeRegs(); } }; inline void @@ -189,9 +204,11 @@ UnifiedFreeList::addReg(PhysRegIndex freed_reg) intList.addReg(freed_reg); } else if (regFile->isFloatPhysReg(freed_reg)) { floatList.addReg(freed_reg); - } else { - assert(regFile->isCCPhysReg(freed_reg)); + } else if (regFile->isCCPhysReg(freed_reg)) { ccList.addReg(freed_reg); + } else { + assert(regFile->isVectorPhysReg(freed_reg)); + vectorList.addReg(freed_reg); } // These assert conditions ensure that the number of free diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 7d359b992..e16843160 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -99,7 +99,7 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, // Set the number of total physical registers numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs + - params->numPhysCCRegs; + params->numPhysCCRegs + params->numPhysVectorRegs; //Create an entry for each physical register within the //dependency graph. diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index 96ce44bdd..a7476c5ec 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -37,15 +37,20 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, - unsigned _numPhysicalCCRegs) + unsigned _numPhysicalCCRegs, + unsigned _numPhysicalVectorRegs) : intRegFile(_numPhysicalIntRegs), floatRegFile(_numPhysicalFloatRegs), ccRegFile(_numPhysicalCCRegs), + vectorRegFile(_numPhysicalVectorRegs), baseFloatRegIndex(_numPhysicalIntRegs), baseCCRegIndex(_numPhysicalIntRegs + _numPhysicalFloatRegs), + baseVectorRegIndex(_numPhysicalIntRegs + _numPhysicalFloatRegs + + _numPhysicalCCRegs), totalNumRegs(_numPhysicalIntRegs + _numPhysicalFloatRegs - + _numPhysicalCCRegs) + + _numPhysicalCCRegs + + _numPhysicalVectorRegs) { if (TheISA::NumCCRegs == 0 && _numPhysicalCCRegs != 0) { // Just make this a warning and go ahead and allocate them @@ -53,6 +58,13 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, warn("Non-zero number of physical CC regs specified, even though\n" " ISA does not use them.\n"); } + + if (TheISA::NumVectorRegs == 0 && _numPhysicalVectorRegs != 0) { + // Just make this a warning and go ahead and allocate them + // anyway, to keep from having to add checks everywhere + warn("Non-zero number of physical vector regs specified, even though\n" + " ISA does not use them.\n"); + } } @@ -73,9 +85,15 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList) freeList->addFloatReg(reg_idx++); } - // The rest of the registers are the condition-code physical + // The next batch of registers are the condition-code physical // registers; put them onto the condition-code free list. - while (reg_idx < totalNumRegs) { + while (reg_idx < baseVectorRegIndex) { freeList->addCCReg(reg_idx++); } + + // The rest of the registers are the vector physical + // registers; put them onto the vector free list. + while (reg_idx < totalNumRegs) { + freeList->addVectorReg(reg_idx++); + } } diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 8b87725ca..71ca5015f 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -56,6 +56,7 @@ class PhysRegFile typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; typedef union { FloatReg d; @@ -71,6 +72,9 @@ class PhysRegFile /** Condition-code register file. */ std::vector<CCReg> ccRegFile; + /** Vector register file. */ + std::vector<VectorReg> vectorRegFile; + /** * The first floating-point physical register index. The physical * register file has a single continuous index space, with the @@ -93,6 +97,12 @@ class PhysRegFile */ unsigned baseCCRegIndex; + /** + * The first vector physical register index. The vector registers follow + * the condition-code registers. + */ + unsigned baseVectorRegIndex; + /** Total number of physical registers. */ unsigned totalNumRegs; @@ -103,7 +113,8 @@ class PhysRegFile */ PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, - unsigned _numPhysicalCCRegs); + unsigned _numPhysicalCCRegs, + unsigned _numPhysicalVectorRegs); /** * Destructor to free resources @@ -122,7 +133,11 @@ class PhysRegFile /** @return the number of condition-code physical registers. */ unsigned numCCPhysRegs() const - { return totalNumRegs - baseCCRegIndex; } + { return baseVectorRegIndex - baseCCRegIndex; } + + /** @return the number of vector physical registers. */ + unsigned numVectorPhysRegs() const + { return totalNumRegs - baseVectorRegIndex; } /** @return the total number of physical registers. */ unsigned totalNumPhysRegs() const { return totalNumRegs; } @@ -151,7 +166,16 @@ class PhysRegFile */ bool isCCPhysReg(PhysRegIndex reg_idx) { - return (baseCCRegIndex <= reg_idx && reg_idx < totalNumRegs); + return (baseCCRegIndex <= reg_idx && reg_idx < baseVectorRegIndex); + } + + /** + * @return true if the specified physical register index + * corresponds to a vector physical register. + */ + bool isVectorPhysReg(PhysRegIndex reg_idx) const + { + return baseVectorRegIndex <= reg_idx && reg_idx < totalNumRegs; } /** Reads an integer register. */ @@ -207,6 +231,18 @@ class PhysRegFile return ccRegFile[reg_offset]; } + /** Reads a vector register. */ + const VectorReg &readVectorReg(PhysRegIndex reg_idx) const + { + assert(isVectorPhysReg(reg_idx)); + + // Remove the base vector reg dependency. + PhysRegIndex reg_offset = reg_idx - baseVectorRegIndex; + + DPRINTF(IEW, "RegFile: Access to vector register %i\n", int(reg_idx)); + return vectorRegFile[reg_offset]; + } + /** Sets an integer register to the given value. */ void setIntReg(PhysRegIndex reg_idx, uint64_t val) { @@ -262,6 +298,16 @@ class PhysRegFile ccRegFile[reg_offset] = val; } + + /** Sets a vector register to the given value. */ + void setVectorReg(PhysRegIndex reg_idx, const VectorReg &val) + { + assert(isVectorPhysReg(reg_idx)); + // Remove the base vector reg dependency. + PhysRegIndex reg_offset = reg_idx - baseVectorRegIndex; + DPRINTF(IEW, "RegFile: Setting vector register %i\n", int(reg_idx)); + vectorRegFile[reg_offset] = val; + } }; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 43b7ba9aa..3da6fd4fa 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -69,7 +69,7 @@ DefaultRename<Impl>::DefaultRename(O3CPU *_cpu, DerivO3CPUParams *params) commitWidth(params->commitWidth), numThreads(params->numThreads), maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs - + params->numPhysCCRegs) + + params->numPhysCCRegs + params->numPhysVectorRegs) { if (renameWidth > Impl::MaxWidth) fatal("renameWidth (%d) is larger than compiled limit (%d),\n" @@ -635,7 +635,8 @@ DefaultRename<Impl>::renameInsts(ThreadID tid) // to rename to. Otherwise block. if (!renameMap[tid]->canRename(inst->numIntDestRegs(), inst->numFPDestRegs(), - inst->numCCDestRegs())) { + inst->numCCDestRegs(), + inst->numVectorDestRegs())) { DPRINTF(Rename, "Blocking due to lack of free " "physical registers to rename to.\n"); blockThisCycle = true; @@ -1016,6 +1017,11 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst, ThreadID tid) renamed_reg = map->lookupCC(flat_rel_src_reg); break; + case VectorRegClass: + flat_rel_src_reg = tc->flattenVectorIndex(rel_src_reg); + renamed_reg = map->lookupVector(flat_rel_src_reg); + break; + case MiscRegClass: // misc regs don't get flattened flat_rel_src_reg = rel_src_reg; @@ -1082,6 +1088,12 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst, ThreadID tid) flat_uni_dest_reg = flat_rel_dest_reg + TheISA::CC_Reg_Base; break; + case VectorRegClass: + flat_rel_dest_reg = tc->flattenVectorIndex(rel_dest_reg); + rename_result = map->renameVector(flat_rel_dest_reg); + flat_uni_dest_reg = flat_rel_dest_reg + TheISA::Vector_Reg_Base; + break; + case MiscRegClass: // misc regs don't get flattened flat_rel_dest_reg = rel_dest_reg; @@ -1156,7 +1168,7 @@ inline int DefaultRename<Impl>::calcFreeLQEntries(ThreadID tid) { int num_free = freeEntries[tid].lqEntries - - (loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ); + (loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ); DPRINTF(Rename, "calcFreeLQEntries: free lqEntries: %d, loadsInProgress: %d, " "loads dispatchedToLQ: %d\n", freeEntries[tid].lqEntries, loadsInProgress[tid], fromIEW->iewInfo[tid].dispatchedToLQ); @@ -1168,7 +1180,7 @@ inline int DefaultRename<Impl>::calcFreeSQEntries(ThreadID tid) { int num_free = freeEntries[tid].sqEntries - - (storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ); + (storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ); DPRINTF(Rename, "calcFreeSQEntries: free sqEntries: %d, storesInProgress: %d, " "stores dispatchedToSQ: %d\n", freeEntries[tid].sqEntries, storesInProgress[tid], fromIEW->iewInfo[tid].dispatchedToSQ); diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index b0232df20..27ddd8c63 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -99,6 +99,9 @@ UnifiedRenameMap::init(PhysRegFile *_regFile, floatMap.init(TheISA::NumFloatRegs, &(freeList->floatList), _floatZeroReg); ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1); + + vectorMap.init(TheISA::NumVectorRegs, &(freeList->vectorList), + (RegIndex)-1); } @@ -117,6 +120,9 @@ UnifiedRenameMap::rename(RegIndex arch_reg) case CCRegClass: return renameCC(rel_arch_reg); + case VectorRegClass: + return renameVector(rel_arch_reg); + case MiscRegClass: return renameMisc(rel_arch_reg); @@ -142,6 +148,9 @@ UnifiedRenameMap::lookup(RegIndex arch_reg) const case CCRegClass: return lookupCC(rel_arch_reg); + case VectorRegClass: + return lookupVector(rel_arch_reg); + case MiscRegClass: return lookupMisc(rel_arch_reg); @@ -166,6 +175,9 @@ UnifiedRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex phys_reg) case CCRegClass: return setCCEntry(rel_arch_reg, phys_reg); + case VectorRegClass: + return setVectorEntry(rel_arch_reg, phys_reg); + case MiscRegClass: // Misc registers do not actually rename, so don't change // their mappings. We end up here when a commit or squash diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh index 9d91f232e..37487c3d3 100644 --- a/src/cpu/o3/rename_map.hh +++ b/src/cpu/o3/rename_map.hh @@ -178,6 +178,9 @@ class UnifiedRenameMap /** The condition-code register rename map */ SimpleRenameMap ccMap; + /** The vector register rename map */ + SimpleRenameMap vectorMap; + public: typedef TheISA::RegIndex RegIndex; @@ -240,6 +243,17 @@ class UnifiedRenameMap } /** + * Perform rename() on a vector register, given a relative vector register + * index. + */ + RenameInfo renameVector(RegIndex rel_arch_reg) + { + RenameInfo info = vectorMap.rename(rel_arch_reg); + assert(regFile->isVectorPhysReg(info.first)); + return info; + } + + /** * Perform rename() on a misc register, given a relative * misc register index. */ @@ -297,6 +311,17 @@ class UnifiedRenameMap } /** + * Perform lookup() on a vector register, given a relative + * vector register index. + */ + PhysRegIndex lookupVector(RegIndex rel_arch_reg) const + { + PhysRegIndex phys_reg = vectorMap.lookup(rel_arch_reg); + assert(regFile->isVectorPhysReg(phys_reg)); + return phys_reg; + } + + /** * Perform lookup() on a misc register, given a relative * misc register index. */ @@ -349,6 +374,16 @@ class UnifiedRenameMap } /** + * Perform setEntry() on a vector register, given a relative vector + * register index. + */ + void setVectorEntry(RegIndex arch_reg, PhysRegIndex phys_reg) + { + assert(regFile->isVectorPhysReg(phys_reg)); + vectorMap.setEntry(arch_reg, phys_reg); + } + + /** * Return the minimum number of free entries across all of the * register classes. The minimum is used so we guarantee that * this number of entries is available regardless of which class @@ -362,11 +397,13 @@ class UnifiedRenameMap /** * Return whether there are enough registers to serve the request. */ - bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs) const + bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs, + uint32_t vectorRegs) const { return intRegs <= intMap.numFreeEntries() && floatRegs <= floatMap.numFreeEntries() && - ccRegs <= ccMap.numFreeEntries(); + ccRegs <= ccMap.numFreeEntries() && + vectorRegs <= vectorMap.numFreeEntries(); } }; diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index 87d87900c..6e9b054da 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -189,6 +189,10 @@ class O3ThreadContext : public ThreadContext return readCCRegFlat(flattenCCIndex(reg_idx)); } + virtual const VectorReg &readVectorReg(int reg_idx) { + return readVectorRegFlat(flattenVectorIndex(reg_idx)); + } + /** Sets an integer register to a value. */ virtual void setIntReg(int reg_idx, uint64_t val) { setIntRegFlat(flattenIntIndex(reg_idx), val); @@ -206,6 +210,10 @@ class O3ThreadContext : public ThreadContext setCCRegFlat(flattenCCIndex(reg_idx), val); } + virtual void setVectorReg(int reg_idx, const VectorReg &val) { + setVectorRegFlat(flattenVectorIndex(reg_idx), val); + } + /** Reads this thread's PC state. */ virtual TheISA::PCState pcState() { return cpu->pcState(thread->threadId()); } @@ -246,6 +254,7 @@ class O3ThreadContext : public ThreadContext virtual int flattenIntIndex(int reg); virtual int flattenFloatIndex(int reg); virtual int flattenCCIndex(int reg); + virtual int flattenVectorIndex(int reg); virtual int flattenMiscIndex(int reg); /** Returns the number of consecutive store conditional failures. */ @@ -291,6 +300,9 @@ class O3ThreadContext : public ThreadContext virtual CCReg readCCRegFlat(int idx); virtual void setCCRegFlat(int idx, CCReg val); + + virtual const VectorReg &readVectorRegFlat(int idx); + virtual void setVectorRegFlat(int idx, const VectorReg &val); }; #endif diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index e6a3d5083..ecdd9ebb9 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -216,6 +216,13 @@ O3ThreadContext<Impl>::readCCRegFlat(int reg_idx) } template <class Impl> +const TheISA::VectorReg & +O3ThreadContext<Impl>::readVectorRegFlat(int reg_idx) +{ + return cpu->readArchVectorReg(reg_idx, thread->threadId()); +} + +template <class Impl> void O3ThreadContext<Impl>::setIntRegFlat(int reg_idx, uint64_t val) { @@ -253,6 +260,15 @@ O3ThreadContext<Impl>::setCCRegFlat(int reg_idx, TheISA::CCReg val) template <class Impl> void +O3ThreadContext<Impl>::setVectorRegFlat(int reg_idx, + const TheISA::VectorReg &val) +{ + cpu->setArchVectorReg(reg_idx, val, thread->threadId()); + conditionalSquash(); +} + +template <class Impl> +void O3ThreadContext<Impl>::pcState(const TheISA::PCState &val) { cpu->pcState(val, thread->threadId()); @@ -292,6 +308,13 @@ O3ThreadContext<Impl>::flattenCCIndex(int reg) template <class Impl> int +O3ThreadContext<Impl>::flattenVectorIndex(int reg) +{ + return cpu->isa[thread->threadId()]->flattenVectorIndex(reg); +} + +template <class Impl> +int O3ThreadContext<Impl>::flattenMiscIndex(int reg) { return cpu->isa[thread->threadId()]->flattenMiscIndex(reg); |