summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
authorNilay Vaish <nilay@cs.wisc.edu>2015-07-26 10:21:20 -0500
committerNilay Vaish <nilay@cs.wisc.edu>2015-07-26 10:21:20 -0500
commit608641e23c7f2288810c3f23a1a63790b664f2ab (patch)
tree0656aaf9653e8d263f5daac0d5f0fe3190193ae5 /src/cpu/o3
parent6e354e82d9395b20f5f148cd545d0666b626e8ac (diff)
downloadgem5-608641e23c7f2288810c3f23a1a63790b664f2ab.tar.xz
cpu: implements vector registers
This adds a vector register type. The type is defined as a std::array of a fixed number of uint64_ts. The isa_parser.py has been modified to parse vector register operands and generate the required code. Different cpus have vector register files now.
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/O3CPU.py7
-rw-r--r--src/cpu/o3/cpu.cc76
-rw-r--r--src/cpu/o3/cpu.hh12
-rw-r--r--src/cpu/o3/dyn_inst.hh19
-rw-r--r--src/cpu/o3/free_list.hh21
-rw-r--r--src/cpu/o3/inst_queue_impl.hh2
-rw-r--r--src/cpu/o3/regfile.cc26
-rw-r--r--src/cpu/o3/regfile.hh52
-rw-r--r--src/cpu/o3/rename_impl.hh20
-rw-r--r--src/cpu/o3/rename_map.cc12
-rw-r--r--src/cpu/o3/rename_map.hh41
-rwxr-xr-xsrc/cpu/o3/thread_context.hh12
-rwxr-xr-xsrc/cpu/o3/thread_context_impl.hh23
13 files changed, 305 insertions, 18 deletions
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index 92f96a3b6..d2220de82 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -114,6 +114,7 @@ class DerivO3CPU(BaseCPU):
numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers")
numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point "
"registers")
+
# most ISAs don't use condition-code regs, so default is 0
_defaultNumPhysCCRegs = 0
if buildEnv['TARGET_ISA'] in ('arm','x86'):
@@ -126,6 +127,12 @@ class DerivO3CPU(BaseCPU):
_defaultNumPhysCCRegs = Self.numPhysIntRegs * 5
numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs,
"Number of physical cc registers")
+
+ # most ISAs don't use vector regs, so default is 0
+ _defaultNumPhysVectorRegs = 0
+ numPhysVectorRegs = Param.Unsigned(_defaultNumPhysVectorRegs,
+ "Number of physical vector registers")
+
numIQEntries = Param.Unsigned(64, "Number of instruction queue entries")
numROBEntries = Param.Unsigned(192, "Number of reorder buffer entries")
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 026907a94..d8f39bbe4 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -170,7 +170,8 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
regFile(params->numPhysIntRegs,
params->numPhysFloatRegs,
- params->numPhysCCRegs),
+ params->numPhysCCRegs,
+ params->numPhysVectorRegs),
freeList(name() + ".freelist", &regFile),
@@ -269,6 +270,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs);
assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs);
assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs);
+ assert(params->numPhysVectorRegs >= numThreads * TheISA::NumVectorRegs);
rename.setScoreboard(&scoreboard);
iew.setScoreboard(&scoreboard);
@@ -313,6 +315,12 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
renameMap[tid].setCCEntry(ridx, phys_reg);
commitRenameMap[tid].setCCEntry(ridx, phys_reg);
}
+
+ for (RegIndex ridx = 0; ridx < TheISA::NumVectorRegs; ++ridx) {
+ PhysRegIndex phys_reg = freeList.getVectorReg();
+ renameMap[tid].setVectorEntry(ridx, phys_reg);
+ commitRenameMap[tid].setVectorEntry(ridx, phys_reg);
+ }
}
rename.setRenameMap(renameMap);
@@ -521,6 +529,16 @@ FullO3CPU<Impl>::regStats()
.desc("number of cc regfile writes")
.prereq(ccRegfileWrites);
+ vectorRegfileReads
+ .name(name() + ".vector_regfile_reads")
+ .desc("number of vector regfile reads")
+ .prereq(vectorRegfileReads);
+
+ vectorRegfileWrites
+ .name(name() + ".vector_regfile_writes")
+ .desc("number of vector regfile writes")
+ .prereq(vectorRegfileWrites);
+
miscRegfileReads
.name(name() + ".misc_regfile_reads")
.desc("number of misc regfile reads")
@@ -807,6 +825,18 @@ FullO3CPU<Impl>::insertThread(ThreadID tid)
scoreboard.setReg(phys_reg);
}
+ //Bind vector Regs to Rename Map
+ max_reg = TheISA::NumIntRegs + TheISA::NumFloatRegs + TheISA::NumCCRegs +
+ TheISA::NumVectorRegs;
+ for (int vreg = TheISA::NumIntRegs + TheISA::NumFloatRegs +
+ TheISA::NumCCRegs;
+ vreg < max_reg; vreg++) {
+ PhysRegIndex phys_reg = freeList.getVectorReg();
+
+ renameMap[tid].setEntry(vreg, phys_reg);
+ scoreboard.setReg(phys_reg);
+ }
+
//Copy Thread Data Into RegFile
//this->copyFromTC(tid);
@@ -860,6 +890,14 @@ FullO3CPU<Impl>::removeThread(ThreadID tid)
freeList.addReg(phys_reg);
}
+ // Unbind condition-code Regs from Rename Map
+ max_reg = TheISA::Vector_Reg_Base + TheISA::NumVectorRegs;
+ for (int vreg = TheISA::Vector_Reg_Base; vreg < max_reg; vreg++) {
+ PhysRegIndex phys_reg = renameMap[tid].lookup(vreg);
+ scoreboard.unsetReg(phys_reg);
+ freeList.addReg(phys_reg);
+ }
+
// Squash Throughout Pipeline
DynInstPtr inst = commit.rob->readHeadInst(tid);
InstSeqNum squash_seq_num = inst->seqNum;
@@ -1259,6 +1297,14 @@ FullO3CPU<Impl>::readCCReg(int reg_idx)
}
template <class Impl>
+const VectorReg &
+FullO3CPU<Impl>::readVectorReg(int reg_idx)
+{
+ vectorRegfileReads++;
+ return regFile.readVectorReg(reg_idx);
+}
+
+template <class Impl>
void
FullO3CPU<Impl>::setIntReg(int reg_idx, uint64_t val)
{
@@ -1291,6 +1337,14 @@ FullO3CPU<Impl>::setCCReg(int reg_idx, CCReg val)
}
template <class Impl>
+void
+FullO3CPU<Impl>::setVectorReg(int reg_idx, const VectorReg &val)
+{
+ vectorRegfileWrites++;
+ regFile.setVectorReg(reg_idx, val);
+}
+
+template <class Impl>
uint64_t
FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid)
{
@@ -1331,6 +1385,16 @@ FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid)
}
template <class Impl>
+const VectorReg&
+FullO3CPU<Impl>::readArchVectorReg(int reg_idx, ThreadID tid)
+{
+ vectorRegfileReads++;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookupVector(reg_idx);
+
+ return regFile.readVectorReg(phys_reg);
+}
+
+template <class Impl>
void
FullO3CPU<Impl>::setArchIntReg(int reg_idx, uint64_t val, ThreadID tid)
{
@@ -1371,6 +1435,16 @@ FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid)
}
template <class Impl>
+void
+FullO3CPU<Impl>::setArchVectorReg(int reg_idx, const VectorReg &val,
+ ThreadID tid)
+{
+ vectorRegfileWrites++;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookupVector(reg_idx);
+ regFile.setVectorReg(phys_reg, val);
+}
+
+template <class Impl>
TheISA::PCState
FullO3CPU<Impl>::pcState(ThreadID tid)
{
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index aa02ee2ea..f16450d19 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -427,6 +427,8 @@ class FullO3CPU : public BaseO3CPU
TheISA::CCReg readCCReg(int reg_idx);
+ const TheISA::VectorReg &readVectorReg(int reg_idx);
+
void setIntReg(int reg_idx, uint64_t val);
void setFloatReg(int reg_idx, TheISA::FloatReg val);
@@ -435,6 +437,8 @@ class FullO3CPU : public BaseO3CPU
void setCCReg(int reg_idx, TheISA::CCReg val);
+ void setVectorReg(int reg_idx, const TheISA::VectorReg &val);
+
uint64_t readArchIntReg(int reg_idx, ThreadID tid);
float readArchFloatReg(int reg_idx, ThreadID tid);
@@ -443,6 +447,8 @@ class FullO3CPU : public BaseO3CPU
TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid);
+ const TheISA::VectorReg &readArchVectorReg(int reg_idx, ThreadID tid);
+
/** Architectural register accessors. Looks up in the commit
* rename table to obtain the true physical index of the
* architected register first, then accesses that physical
@@ -456,6 +462,9 @@ class FullO3CPU : public BaseO3CPU
void setArchCCReg(int reg_idx, TheISA::CCReg val, ThreadID tid);
+ void setArchVectorReg(int reg_idx, const TheISA::VectorReg &val,
+ ThreadID tid);
+
/** Sets the commit PC state of a specific thread. */
void pcState(const TheISA::PCState &newPCState, ThreadID tid);
@@ -734,6 +743,9 @@ class FullO3CPU : public BaseO3CPU
//number of CC register file accesses
Stats::Scalar ccRegfileReads;
Stats::Scalar ccRegfileWrites;
+ //number of integer register file accesses
+ Stats::Scalar vectorRegfileReads;
+ Stats::Scalar vectorRegfileWrites;
//number of misc
Stats::Scalar miscRegfileReads;
Stats::Scalar miscRegfileWrites;
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index 6740c601d..d19e4d461 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -74,6 +74,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
typedef TheISA::FloatReg FloatReg;
typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::CCReg CCReg;
+ typedef TheISA::VectorReg VectorReg;
/** Misc register index type. */
typedef TheISA::MiscReg MiscReg;
@@ -206,7 +207,6 @@ class BaseO3DynInst : public BaseDynInst<Impl>
void forwardOldRegs()
{
-
for (int idx = 0; idx < this->numDestRegs(); idx++) {
PhysRegIndex prev_phys_reg = this->prevDestRegIdx(idx);
TheISA::RegIndex original_dest_reg =
@@ -224,6 +224,11 @@ class BaseO3DynInst : public BaseDynInst<Impl>
this->setCCRegOperand(this->staticInst.get(), idx,
this->cpu->readCCReg(prev_phys_reg));
break;
+ case VectorRegClass:
+ this->setVectorRegOperand(this->staticInst.get(), idx,
+ this->cpu->readVectorReg(prev_phys_reg));
+ break;
+
case MiscRegClass:
// no need to forward misc reg values
break;
@@ -272,6 +277,11 @@ class BaseO3DynInst : public BaseDynInst<Impl>
return this->cpu->readCCReg(this->_srcRegIdx[idx]);
}
+ const VectorReg &readVectorRegOperand(const StaticInst *si, int idx)
+ {
+ return this->cpu->readVectorReg(this->_srcRegIdx[idx]);
+ }
+
/** @todo: Make results into arrays so they can handle multiple dest
* registers.
*/
@@ -300,6 +310,13 @@ class BaseO3DynInst : public BaseDynInst<Impl>
BaseDynInst<Impl>::setCCRegOperand(si, idx, val);
}
+ void setVectorRegOperand(const StaticInst *si, int idx,
+ const VectorReg &val)
+ {
+ this->cpu->setVectorReg(this->_destRegIdx[idx], val);
+ BaseDynInst<Impl>::setVectorRegOperand(si, idx, val);
+ }
+
#if THE_ISA == MIPS_ISA
MiscReg readRegOtherThread(int misc_reg, ThreadID tid)
{
diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh
index aa805e26e..d345d7ac8 100644
--- a/src/cpu/o3/free_list.hh
+++ b/src/cpu/o3/free_list.hh
@@ -109,6 +109,9 @@ class UnifiedFreeList
/** The list of free condition-code registers. */
SimpleFreeList ccList;
+ /** The list of free vector registers. */
+ SimpleFreeList vectorList;
+
/**
* The register file object is used only to distinguish integer
* from floating-point physical register indices.
@@ -148,6 +151,9 @@ class UnifiedFreeList
/** Gets a free cc register. */
PhysRegIndex getCCReg() { return ccList.getReg(); }
+ /** Gets a free vector register. */
+ PhysRegIndex getVectorReg() { return vectorList.getReg(); }
+
/** Adds a register back to the free list. */
void addReg(PhysRegIndex freed_reg);
@@ -160,6 +166,9 @@ class UnifiedFreeList
/** Adds a cc register back to the free list. */
void addCCReg(PhysRegIndex freed_reg) { ccList.addReg(freed_reg); }
+ /** Adds a vector register back to the free list. */
+ void addVectorReg(PhysRegIndex freed_reg) { vectorList.addReg(freed_reg); }
+
/** Checks if there are any free integer registers. */
bool hasFreeIntRegs() const { return intList.hasFreeRegs(); }
@@ -169,6 +178,9 @@ class UnifiedFreeList
/** Checks if there are any free cc registers. */
bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); }
+ /** Checks if there are any free vector registers. */
+ bool hasFreeVectorRegs() const { return vectorList.hasFreeRegs(); }
+
/** Returns the number of free integer registers. */
unsigned numFreeIntRegs() const { return intList.numFreeRegs(); }
@@ -177,6 +189,9 @@ class UnifiedFreeList
/** Returns the number of free cc registers. */
unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); }
+
+ /** Returns the number of free vector registers. */
+ unsigned numFreeVectorRegs() const { return vectorList.numFreeRegs(); }
};
inline void
@@ -189,9 +204,11 @@ UnifiedFreeList::addReg(PhysRegIndex freed_reg)
intList.addReg(freed_reg);
} else if (regFile->isFloatPhysReg(freed_reg)) {
floatList.addReg(freed_reg);
- } else {
- assert(regFile->isCCPhysReg(freed_reg));
+ } else if (regFile->isCCPhysReg(freed_reg)) {
ccList.addReg(freed_reg);
+ } else {
+ assert(regFile->isVectorPhysReg(freed_reg));
+ vectorList.addReg(freed_reg);
}
// These assert conditions ensure that the number of free
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 7d359b992..e16843160 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -99,7 +99,7 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
// Set the number of total physical registers
numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs +
- params->numPhysCCRegs;
+ params->numPhysCCRegs + params->numPhysVectorRegs;
//Create an entry for each physical register within the
//dependency graph.
diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc
index 96ce44bdd..a7476c5ec 100644
--- a/src/cpu/o3/regfile.cc
+++ b/src/cpu/o3/regfile.cc
@@ -37,15 +37,20 @@
PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
unsigned _numPhysicalFloatRegs,
- unsigned _numPhysicalCCRegs)
+ unsigned _numPhysicalCCRegs,
+ unsigned _numPhysicalVectorRegs)
: intRegFile(_numPhysicalIntRegs),
floatRegFile(_numPhysicalFloatRegs),
ccRegFile(_numPhysicalCCRegs),
+ vectorRegFile(_numPhysicalVectorRegs),
baseFloatRegIndex(_numPhysicalIntRegs),
baseCCRegIndex(_numPhysicalIntRegs + _numPhysicalFloatRegs),
+ baseVectorRegIndex(_numPhysicalIntRegs + _numPhysicalFloatRegs
+ + _numPhysicalCCRegs),
totalNumRegs(_numPhysicalIntRegs
+ _numPhysicalFloatRegs
- + _numPhysicalCCRegs)
+ + _numPhysicalCCRegs
+ + _numPhysicalVectorRegs)
{
if (TheISA::NumCCRegs == 0 && _numPhysicalCCRegs != 0) {
// Just make this a warning and go ahead and allocate them
@@ -53,6 +58,13 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
warn("Non-zero number of physical CC regs specified, even though\n"
" ISA does not use them.\n");
}
+
+ if (TheISA::NumVectorRegs == 0 && _numPhysicalVectorRegs != 0) {
+ // Just make this a warning and go ahead and allocate them
+ // anyway, to keep from having to add checks everywhere
+ warn("Non-zero number of physical vector regs specified, even though\n"
+ " ISA does not use them.\n");
+ }
}
@@ -73,9 +85,15 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList)
freeList->addFloatReg(reg_idx++);
}
- // The rest of the registers are the condition-code physical
+ // The next batch of registers are the condition-code physical
// registers; put them onto the condition-code free list.
- while (reg_idx < totalNumRegs) {
+ while (reg_idx < baseVectorRegIndex) {
freeList->addCCReg(reg_idx++);
}
+
+ // The rest of the registers are the vector physical
+ // registers; put them onto the vector free list.
+ while (reg_idx < totalNumRegs) {
+ freeList->addVectorReg(reg_idx++);
+ }
}
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index 8b87725ca..71ca5015f 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -56,6 +56,7 @@ class PhysRegFile
typedef TheISA::FloatReg FloatReg;
typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::CCReg CCReg;
+ typedef TheISA::VectorReg VectorReg;
typedef union {
FloatReg d;
@@ -71,6 +72,9 @@ class PhysRegFile
/** Condition-code register file. */
std::vector<CCReg> ccRegFile;
+ /** Vector register file. */
+ std::vector<VectorReg> vectorRegFile;
+
/**
* The first floating-point physical register index. The physical
* register file has a single continuous index space, with the
@@ -93,6 +97,12 @@ class PhysRegFile
*/
unsigned baseCCRegIndex;
+ /**
+ * The first vector physical register index. The vector registers follow
+ * the condition-code registers.
+ */
+ unsigned baseVectorRegIndex;
+
/** Total number of physical registers. */
unsigned totalNumRegs;
@@ -103,7 +113,8 @@ class PhysRegFile
*/
PhysRegFile(unsigned _numPhysicalIntRegs,
unsigned _numPhysicalFloatRegs,
- unsigned _numPhysicalCCRegs);
+ unsigned _numPhysicalCCRegs,
+ unsigned _numPhysicalVectorRegs);
/**
* Destructor to free resources
@@ -122,7 +133,11 @@ class PhysRegFile
/** @return the number of condition-code physical registers. */
unsigned numCCPhysRegs() const
- { return totalNumRegs - baseCCRegIndex; }
+ { return baseVectorRegIndex - baseCCRegIndex; }
+
+ /** @return the number of vector physical registers. */
+ unsigned numVectorPhysRegs() const
+ { return totalNumRegs - baseVectorRegIndex; }
/** @return the total number of physical registers. */
unsigned totalNumPhysRegs() const { return totalNumRegs; }
@@ -151,7 +166,16 @@ class PhysRegFile
*/
bool isCCPhysReg(PhysRegIndex reg_idx)
{
- return (baseCCRegIndex <= reg_idx && reg_idx < totalNumRegs);
+ return (baseCCRegIndex <= reg_idx && reg_idx < baseVectorRegIndex);
+ }
+
+ /**
+ * @return true if the specified physical register index
+ * corresponds to a vector physical register.
+ */
+ bool isVectorPhysReg(PhysRegIndex reg_idx) const
+ {
+ return baseVectorRegIndex <= reg_idx && reg_idx < totalNumRegs;
}
/** Reads an integer register. */
@@ -207,6 +231,18 @@ class PhysRegFile
return ccRegFile[reg_offset];
}
+ /** Reads a vector register. */
+ const VectorReg &readVectorReg(PhysRegIndex reg_idx) const
+ {
+ assert(isVectorPhysReg(reg_idx));
+
+ // Remove the base vector reg dependency.
+ PhysRegIndex reg_offset = reg_idx - baseVectorRegIndex;
+
+ DPRINTF(IEW, "RegFile: Access to vector register %i\n", int(reg_idx));
+ return vectorRegFile[reg_offset];
+ }
+
/** Sets an integer register to the given value. */
void setIntReg(PhysRegIndex reg_idx, uint64_t val)
{
@@ -262,6 +298,16 @@ class PhysRegFile
ccRegFile[reg_offset] = val;
}
+
+ /** Sets a vector register to the given value. */
+ void setVectorReg(PhysRegIndex reg_idx, const VectorReg &val)
+ {
+ assert(isVectorPhysReg(reg_idx));
+ // Remove the base vector reg dependency.
+ PhysRegIndex reg_offset = reg_idx - baseVectorRegIndex;
+ DPRINTF(IEW, "RegFile: Setting vector register %i\n", int(reg_idx));
+ vectorRegFile[reg_offset] = val;
+ }
};
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 43b7ba9aa..3da6fd4fa 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -69,7 +69,7 @@ DefaultRename<Impl>::DefaultRename(O3CPU *_cpu, DerivO3CPUParams *params)
commitWidth(params->commitWidth),
numThreads(params->numThreads),
maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs
- + params->numPhysCCRegs)
+ + params->numPhysCCRegs + params->numPhysVectorRegs)
{
if (renameWidth > Impl::MaxWidth)
fatal("renameWidth (%d) is larger than compiled limit (%d),\n"
@@ -635,7 +635,8 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
// to rename to. Otherwise block.
if (!renameMap[tid]->canRename(inst->numIntDestRegs(),
inst->numFPDestRegs(),
- inst->numCCDestRegs())) {
+ inst->numCCDestRegs(),
+ inst->numVectorDestRegs())) {
DPRINTF(Rename, "Blocking due to lack of free "
"physical registers to rename to.\n");
blockThisCycle = true;
@@ -1016,6 +1017,11 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst, ThreadID tid)
renamed_reg = map->lookupCC(flat_rel_src_reg);
break;
+ case VectorRegClass:
+ flat_rel_src_reg = tc->flattenVectorIndex(rel_src_reg);
+ renamed_reg = map->lookupVector(flat_rel_src_reg);
+ break;
+
case MiscRegClass:
// misc regs don't get flattened
flat_rel_src_reg = rel_src_reg;
@@ -1082,6 +1088,12 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst, ThreadID tid)
flat_uni_dest_reg = flat_rel_dest_reg + TheISA::CC_Reg_Base;
break;
+ case VectorRegClass:
+ flat_rel_dest_reg = tc->flattenVectorIndex(rel_dest_reg);
+ rename_result = map->renameVector(flat_rel_dest_reg);
+ flat_uni_dest_reg = flat_rel_dest_reg + TheISA::Vector_Reg_Base;
+ break;
+
case MiscRegClass:
// misc regs don't get flattened
flat_rel_dest_reg = rel_dest_reg;
@@ -1156,7 +1168,7 @@ inline int
DefaultRename<Impl>::calcFreeLQEntries(ThreadID tid)
{
int num_free = freeEntries[tid].lqEntries -
- (loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ);
+ (loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ);
DPRINTF(Rename, "calcFreeLQEntries: free lqEntries: %d, loadsInProgress: %d, "
"loads dispatchedToLQ: %d\n", freeEntries[tid].lqEntries,
loadsInProgress[tid], fromIEW->iewInfo[tid].dispatchedToLQ);
@@ -1168,7 +1180,7 @@ inline int
DefaultRename<Impl>::calcFreeSQEntries(ThreadID tid)
{
int num_free = freeEntries[tid].sqEntries -
- (storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ);
+ (storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ);
DPRINTF(Rename, "calcFreeSQEntries: free sqEntries: %d, storesInProgress: %d, "
"stores dispatchedToSQ: %d\n", freeEntries[tid].sqEntries,
storesInProgress[tid], fromIEW->iewInfo[tid].dispatchedToSQ);
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index b0232df20..27ddd8c63 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -99,6 +99,9 @@ UnifiedRenameMap::init(PhysRegFile *_regFile,
floatMap.init(TheISA::NumFloatRegs, &(freeList->floatList), _floatZeroReg);
ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1);
+
+ vectorMap.init(TheISA::NumVectorRegs, &(freeList->vectorList),
+ (RegIndex)-1);
}
@@ -117,6 +120,9 @@ UnifiedRenameMap::rename(RegIndex arch_reg)
case CCRegClass:
return renameCC(rel_arch_reg);
+ case VectorRegClass:
+ return renameVector(rel_arch_reg);
+
case MiscRegClass:
return renameMisc(rel_arch_reg);
@@ -142,6 +148,9 @@ UnifiedRenameMap::lookup(RegIndex arch_reg) const
case CCRegClass:
return lookupCC(rel_arch_reg);
+ case VectorRegClass:
+ return lookupVector(rel_arch_reg);
+
case MiscRegClass:
return lookupMisc(rel_arch_reg);
@@ -166,6 +175,9 @@ UnifiedRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex phys_reg)
case CCRegClass:
return setCCEntry(rel_arch_reg, phys_reg);
+ case VectorRegClass:
+ return setVectorEntry(rel_arch_reg, phys_reg);
+
case MiscRegClass:
// Misc registers do not actually rename, so don't change
// their mappings. We end up here when a commit or squash
diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh
index 9d91f232e..37487c3d3 100644
--- a/src/cpu/o3/rename_map.hh
+++ b/src/cpu/o3/rename_map.hh
@@ -178,6 +178,9 @@ class UnifiedRenameMap
/** The condition-code register rename map */
SimpleRenameMap ccMap;
+ /** The vector register rename map */
+ SimpleRenameMap vectorMap;
+
public:
typedef TheISA::RegIndex RegIndex;
@@ -240,6 +243,17 @@ class UnifiedRenameMap
}
/**
+ * Perform rename() on a vector register, given a relative vector register
+ * index.
+ */
+ RenameInfo renameVector(RegIndex rel_arch_reg)
+ {
+ RenameInfo info = vectorMap.rename(rel_arch_reg);
+ assert(regFile->isVectorPhysReg(info.first));
+ return info;
+ }
+
+ /**
* Perform rename() on a misc register, given a relative
* misc register index.
*/
@@ -297,6 +311,17 @@ class UnifiedRenameMap
}
/**
+ * Perform lookup() on a vector register, given a relative
+ * vector register index.
+ */
+ PhysRegIndex lookupVector(RegIndex rel_arch_reg) const
+ {
+ PhysRegIndex phys_reg = vectorMap.lookup(rel_arch_reg);
+ assert(regFile->isVectorPhysReg(phys_reg));
+ return phys_reg;
+ }
+
+ /**
* Perform lookup() on a misc register, given a relative
* misc register index.
*/
@@ -349,6 +374,16 @@ class UnifiedRenameMap
}
/**
+ * Perform setEntry() on a vector register, given a relative vector
+ * register index.
+ */
+ void setVectorEntry(RegIndex arch_reg, PhysRegIndex phys_reg)
+ {
+ assert(regFile->isVectorPhysReg(phys_reg));
+ vectorMap.setEntry(arch_reg, phys_reg);
+ }
+
+ /**
* Return the minimum number of free entries across all of the
* register classes. The minimum is used so we guarantee that
* this number of entries is available regardless of which class
@@ -362,11 +397,13 @@ class UnifiedRenameMap
/**
* Return whether there are enough registers to serve the request.
*/
- bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs) const
+ bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs,
+ uint32_t vectorRegs) const
{
return intRegs <= intMap.numFreeEntries() &&
floatRegs <= floatMap.numFreeEntries() &&
- ccRegs <= ccMap.numFreeEntries();
+ ccRegs <= ccMap.numFreeEntries() &&
+ vectorRegs <= vectorMap.numFreeEntries();
}
};
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index 87d87900c..6e9b054da 100755
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -189,6 +189,10 @@ class O3ThreadContext : public ThreadContext
return readCCRegFlat(flattenCCIndex(reg_idx));
}
+ virtual const VectorReg &readVectorReg(int reg_idx) {
+ return readVectorRegFlat(flattenVectorIndex(reg_idx));
+ }
+
/** Sets an integer register to a value. */
virtual void setIntReg(int reg_idx, uint64_t val) {
setIntRegFlat(flattenIntIndex(reg_idx), val);
@@ -206,6 +210,10 @@ class O3ThreadContext : public ThreadContext
setCCRegFlat(flattenCCIndex(reg_idx), val);
}
+ virtual void setVectorReg(int reg_idx, const VectorReg &val) {
+ setVectorRegFlat(flattenVectorIndex(reg_idx), val);
+ }
+
/** Reads this thread's PC state. */
virtual TheISA::PCState pcState()
{ return cpu->pcState(thread->threadId()); }
@@ -246,6 +254,7 @@ class O3ThreadContext : public ThreadContext
virtual int flattenIntIndex(int reg);
virtual int flattenFloatIndex(int reg);
virtual int flattenCCIndex(int reg);
+ virtual int flattenVectorIndex(int reg);
virtual int flattenMiscIndex(int reg);
/** Returns the number of consecutive store conditional failures. */
@@ -291,6 +300,9 @@ class O3ThreadContext : public ThreadContext
virtual CCReg readCCRegFlat(int idx);
virtual void setCCRegFlat(int idx, CCReg val);
+
+ virtual const VectorReg &readVectorRegFlat(int idx);
+ virtual void setVectorRegFlat(int idx, const VectorReg &val);
};
#endif
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index e6a3d5083..ecdd9ebb9 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -216,6 +216,13 @@ O3ThreadContext<Impl>::readCCRegFlat(int reg_idx)
}
template <class Impl>
+const TheISA::VectorReg &
+O3ThreadContext<Impl>::readVectorRegFlat(int reg_idx)
+{
+ return cpu->readArchVectorReg(reg_idx, thread->threadId());
+}
+
+template <class Impl>
void
O3ThreadContext<Impl>::setIntRegFlat(int reg_idx, uint64_t val)
{
@@ -253,6 +260,15 @@ O3ThreadContext<Impl>::setCCRegFlat(int reg_idx, TheISA::CCReg val)
template <class Impl>
void
+O3ThreadContext<Impl>::setVectorRegFlat(int reg_idx,
+ const TheISA::VectorReg &val)
+{
+ cpu->setArchVectorReg(reg_idx, val, thread->threadId());
+ conditionalSquash();
+}
+
+template <class Impl>
+void
O3ThreadContext<Impl>::pcState(const TheISA::PCState &val)
{
cpu->pcState(val, thread->threadId());
@@ -292,6 +308,13 @@ O3ThreadContext<Impl>::flattenCCIndex(int reg)
template <class Impl>
int
+O3ThreadContext<Impl>::flattenVectorIndex(int reg)
+{
+ return cpu->isa[thread->threadId()]->flattenVectorIndex(reg);
+}
+
+template <class Impl>
+int
O3ThreadContext<Impl>::flattenMiscIndex(int reg)
{
return cpu->isa[thread->threadId()]->flattenMiscIndex(reg);