diff options
Diffstat (limited to 'src/cpu')
30 files changed, 627 insertions, 68 deletions
diff --git a/src/cpu/StaticInstFlags.py b/src/cpu/StaticInstFlags.py index ef29726fc..3b00e5df8 100644 --- a/src/cpu/StaticInstFlags.py +++ b/src/cpu/StaticInstFlags.py @@ -55,8 +55,8 @@ class StaticInstFlags(Enum): vals = [ 'IsNop', # Is a no-op (no effect at all). - 'IsInteger', # References integer regs. - 'IsFloating', # References FP regs. + 'IsInteger', # References scalar integer regs. + 'IsFloating', # References scalar FP regs. 'IsCC', # References CC regs. 'IsMemRef', # References memory (load, store, or prefetch) @@ -108,5 +108,6 @@ class StaticInstFlags(Enum): 'IsMicroBranch', # This microop branches within the microcode for # a macroop 'IsDspOp', - 'IsSquashAfter' # Squash all uncommitted state after executed + 'IsSquashAfter', # Squash all uncommitted state after executed + 'IsVector', # References vector register. ] diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 5b54679c9..515df6821 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -99,10 +99,19 @@ class BaseDynInst : public ExecContext, public RefCounted union Result { uint64_t integer; double dbl; + + // I am assuming that vector register type is different from the two + // types used above. Else it seems useless to have a separate typedef + // for vector registers. + VectorReg vector; + void set(uint64_t i) { integer = i; } void set(double d) { dbl = d; } + void set(const VectorReg &v) { vector = v; } + void get(uint64_t& i) { i = integer; } void get(double& d) { d = dbl; } + void get(VectorReg& v) { v = vector; } }; protected: @@ -521,6 +530,9 @@ class BaseDynInst : public ExecContext, public RefCounted bool isDataPrefetch() const { return staticInst->isDataPrefetch(); } bool isInteger() const { return staticInst->isInteger(); } bool isFloating() const { return staticInst->isFloating(); } + bool isVector() const { return staticInst->isVector(); } + bool isCC() const { return staticInst->isCC(); } + bool isControl() const { return staticInst->isControl(); } bool isCall() const { return staticInst->isCall(); } bool isReturn() const { return staticInst->isReturn(); } @@ -550,6 +562,11 @@ class BaseDynInst : public ExecContext, public RefCounted bool isFirstMicroop() const { return staticInst->isFirstMicroop(); } bool isMicroBranch() const { return staticInst->isMicroBranch(); } + void printFlags(std::ostream &outs, const std::string &separator) const + { staticInst->printFlags(outs, separator); } + + std::string getName() const { return staticInst->getName(); } + /** Temporarily sets this instruction as a serialize before instruction. */ void setSerializeBefore() { status.set(SerializeBefore); } @@ -596,6 +613,8 @@ class BaseDynInst : public ExecContext, public RefCounted int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); } int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } int8_t numCCDestRegs() const { return staticInst->numCCDestRegs(); } + int8_t numVectorDestRegs() const + { return staticInst->numVectorDestRegs(); } /** Returns the logical register index of the i'th destination register. */ RegIndex destRegIdx(int i) const { return staticInst->destRegIdx(i); } @@ -655,6 +674,13 @@ class BaseDynInst : public ExecContext, public RefCounted setResult<uint64_t>(val); } + /** Records a vector register being set to a value. */ + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + setResult<const VectorReg &>(val); + } + /** Records that one of the source registers is ready. */ void markSrcRegReady(); diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index a363b6d0f..6d75f7c12 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -94,6 +94,7 @@ class CheckerCPU : public BaseCPU, public ExecContext typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscReg MiscReg; + typedef TheISA::VectorReg VectorReg; /** id attached to all issued requests */ MasterID masterId; @@ -145,10 +146,19 @@ class CheckerCPU : public BaseCPU, public ExecContext union Result { uint64_t integer; double dbl; + + // I am assuming that vector register type is different from the two + // types used above. Else it seems useless to have a separate typedef + // for vector registers. + VectorReg vector; + void set(uint64_t i) { integer = i; } void set(double d) { dbl = d; } + void set(const VectorReg &v) { vector = v; } + void get(uint64_t& i) { i = integer; } void get(double& d) { d = dbl; } + void get(VectorReg& v) { v = vector; } }; // ISAs like ARM can have multiple destination registers to check, @@ -231,6 +241,11 @@ class CheckerCPU : public BaseCPU, public ExecContext return thread->readCCReg(reg_idx); } + const VectorReg &readVectorRegOperand(const StaticInst *si, int idx) + { + return thread->readVectorReg(si->srcRegIdx(idx)); + } + template <class T> void setResult(T t) { @@ -267,6 +282,13 @@ class CheckerCPU : public BaseCPU, public ExecContext setResult<uint64_t>(val); } + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + thread->setVectorReg(si->destRegIdx(idx), val); + setResult<VectorReg>(val); + } + bool readPredicate() { return thread->readPredicate(); } void setPredicate(bool val) { @@ -441,7 +463,7 @@ class Checker : public CheckerCPU void validateExecution(DynInstPtr &inst); void validateState(); - void copyResult(DynInstPtr &inst, uint64_t mismatch_val, int start_idx); + void copyResult(DynInstPtr &inst, Result mismatch_val, int start_idx); void handlePendingInt(); private: diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index 289861521..d6a467358 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -491,7 +491,9 @@ Checker<Impl>::validateExecution(DynInstPtr &inst) // Unverifiable instructions assume they were executed // properly by the CPU. Grab the result from the // instruction and write it to the register. - copyResult(inst, 0, idx); + Result r; + r.integer = 0; + copyResult(inst, r, idx); } else if (inst->numDestRegs() > 0 && !result.empty()) { DPRINTF(Checker, "Dest regs %d, number of checker dest regs %d\n", inst->numDestRegs(), result.size()); @@ -525,7 +527,9 @@ Checker<Impl>::validateExecution(DynInstPtr &inst) // The load/store queue in Detailed CPU can also cause problems // if load/store forwarding is allowed. if (inst->isLoad() && warnOnlyOnLoadError) { - copyResult(inst, inst_val, idx); + Result r; + r.integer = inst_val; + copyResult(inst, r, idx); } else { handleError(inst); } @@ -590,7 +594,7 @@ Checker<Impl>::validateState() template <class Impl> void -Checker<Impl>::copyResult(DynInstPtr &inst, uint64_t mismatch_val, +Checker<Impl>::copyResult(DynInstPtr &inst, Result mismatch_val, int start_idx) { // We've already popped one dest off the queue, @@ -599,39 +603,65 @@ Checker<Impl>::copyResult(DynInstPtr &inst, uint64_t mismatch_val, RegIndex idx = inst->destRegIdx(start_idx); switch (regIdxToClass(idx)) { case IntRegClass: - thread->setIntReg(idx, mismatch_val); + thread->setIntReg(idx, mismatch_val.integer); break; case FloatRegClass: - thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, mismatch_val); + thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, + mismatch_val.integer); break; case CCRegClass: - thread->setCCReg(idx - TheISA::CC_Reg_Base, mismatch_val); + thread->setCCReg(idx - TheISA::CC_Reg_Base, mismatch_val.integer); + break; + case VectorRegClass: + thread->setVectorReg(idx - TheISA::Vector_Reg_Base, + mismatch_val.vector); break; case MiscRegClass: thread->setMiscReg(idx - TheISA::Misc_Reg_Base, - mismatch_val); + mismatch_val.integer); break; } } + start_idx++; - uint64_t res = 0; for (int i = start_idx; i < inst->numDestRegs(); i++) { RegIndex idx = inst->destRegIdx(i); - inst->template popResult<uint64_t>(res); switch (regIdxToClass(idx)) { - case IntRegClass: - thread->setIntReg(idx, res); - break; - case FloatRegClass: - thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, res); - break; - case CCRegClass: - thread->setCCReg(idx - TheISA::CC_Reg_Base, res); - break; - case MiscRegClass: - // Try to get the proper misc register index for ARM here... - thread->setMiscReg(idx - TheISA::Misc_Reg_Base, res); - break; + case IntRegClass: { + uint64_t res = 0; + inst->template popResult<uint64_t>(res); + thread->setIntReg(idx, res); + } + break; + + case FloatRegClass: { + uint64_t res = 0; + inst->template popResult<uint64_t>(res); + thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, res); + } + break; + + case CCRegClass: { + uint64_t res = 0; + inst->template popResult<uint64_t>(res); + thread->setCCReg(idx - TheISA::CC_Reg_Base, res); + } + break; + + case VectorRegClass: { + VectorReg res; + inst->template popResult<VectorReg>(res); + thread->setVectorReg(idx - TheISA::Vector_Reg_Base, res); + } + break; + + case MiscRegClass: { + // Try to get the proper misc register index for ARM here... + uint64_t res = 0; + inst->template popResult<uint64_t>(res); + thread->setMiscReg(idx - TheISA::Misc_Reg_Base, res); + } + break; // else Register is out of range... } } diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index 71c231ba0..436c97847 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -216,6 +216,9 @@ class CheckerThreadContext : public ThreadContext CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } + const VectorReg &readVectorReg(int reg_idx) + { return actualTC->readVectorReg(reg_idx); } + void setIntReg(int reg_idx, uint64_t val) { actualTC->setIntReg(reg_idx, val); @@ -240,6 +243,12 @@ class CheckerThreadContext : public ThreadContext checkerTC->setCCReg(reg_idx, val); } + void setVectorReg(int reg_idx, const VectorReg &val) + { + actualTC->setVectorReg(reg_idx, val); + checkerTC->setVectorReg(reg_idx, val); + } + /** Reads this thread's PC state. */ TheISA::PCState pcState() { return actualTC->pcState(); } @@ -296,6 +305,7 @@ class CheckerThreadContext : public ThreadContext int flattenIntIndex(int reg) { return actualTC->flattenIntIndex(reg); } int flattenFloatIndex(int reg) { return actualTC->flattenFloatIndex(reg); } int flattenCCIndex(int reg) { return actualTC->flattenCCIndex(reg); } + int flattenVectorIndex(int reg) { return actualTC->flattenVectorIndex(reg); } int flattenMiscIndex(int reg) { return actualTC->flattenMiscIndex(reg); } unsigned readStCondFailures() @@ -331,6 +341,12 @@ class CheckerThreadContext : public ThreadContext void setCCRegFlat(int idx, CCReg val) { actualTC->setCCRegFlat(idx, val); } + + const VectorReg &readVectorRegFlat(int idx) + { return actualTC->readVectorRegFlat(idx); } + + void setVectorRegFlat(int idx, const VectorReg &val) + { actualTC->setVectorRegFlat(idx, val); } }; #endif // __CPU_CHECKER_EXEC_CONTEXT_HH__ diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh index c65841db2..5c6b3fad7 100644 --- a/src/cpu/exec_context.hh +++ b/src/cpu/exec_context.hh @@ -76,6 +76,7 @@ class ExecContext { typedef TheISA::MiscReg MiscReg; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; public: /** @@ -128,6 +129,22 @@ class ExecContext { /** * @{ + * @name Vector Register Interfaces + * + */ + + /** Reads a vector register. */ + virtual const VectorReg &readVectorRegOperand (const StaticInst *si, + int idx) = 0; + + /** Sets a vector register to a value. */ + virtual void setVectorRegOperand(const StaticInst *si, + int idx, const VectorReg &val) = 0; + + /** @} */ + + /** + * @{ * @name Misc Register Interfaces */ virtual MiscReg readMiscRegOperand(const StaticInst *si, int idx) = 0; diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc index ab08e6b4a..03cf785ef 100644 --- a/src/cpu/minor/dyn_inst.cc +++ b/src/cpu/minor/dyn_inst.cc @@ -157,6 +157,8 @@ printRegName(std::ostream &os, TheISA::RegIndex reg) break; case CCRegClass: os << 'c' << static_cast<unsigned int>(reg - TheISA::CC_Reg_Base); + case VectorRegClass: + os << 'v' << static_cast<unsigned int>(reg - TheISA::Vector_Reg_Base); } } diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh index 80d5d9872..6ea74047c 100644 --- a/src/cpu/minor/exec_context.hh +++ b/src/cpu/minor/exec_context.hh @@ -140,6 +140,20 @@ class ExecContext : public ::ExecContext return thread.readFloatRegBits(reg_idx); } + TheISA::CCReg + readCCRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base; + return thread.readCCReg(reg_idx); + } + + const TheISA::VectorReg & + readVectorRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::Vector_Reg_Base; + return thread.readVectorReg(reg_idx); + } + void setIntRegOperand(const StaticInst *si, int idx, IntReg val) { @@ -162,6 +176,21 @@ class ExecContext : public ::ExecContext thread.setFloatRegBits(reg_idx, val); } + void + setCCRegOperand(const StaticInst *si, int idx, TheISA::CCReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base; + thread.setCCReg(reg_idx, val); + } + + void + setVectorRegOperand(const StaticInst *si, int idx, + const TheISA::VectorReg &val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::Vector_Reg_Base; + thread.setVectorReg(reg_idx, val); + } + bool readPredicate() { @@ -265,20 +294,6 @@ class ExecContext : public ::ExecContext thread.getDTBPtr()->demapPage(vaddr, asn); } - TheISA::CCReg - readCCRegOperand(const StaticInst *si, int idx) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base; - return thread.readCCReg(reg_idx); - } - - void - setCCRegOperand(const StaticInst *si, int idx, TheISA::CCReg val) - { - int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base; - thread.setCCReg(reg_idx, val); - } - void demapInstPage(Addr vaddr, uint64_t asn) { diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc index f6b1f7944..3eb09271a 100644 --- a/src/cpu/minor/scoreboard.cc +++ b/src/cpu/minor/scoreboard.cc @@ -71,6 +71,11 @@ Scoreboard::findIndex(RegIndex reg, Index &scoreboard_index) scoreboard_index = TheISA::NumIntRegs + reg - TheISA::FP_Reg_Base; ret = true; break; + case VectorRegClass: + scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs + + TheISA::NumFloatRegs + reg - TheISA::Vector_Reg_Base; + ret = true; + break; case MiscRegClass: /* Don't bother with Misc registers */ ret = false; @@ -99,6 +104,9 @@ flattenRegIndex(TheISA::RegIndex reg, ThreadContext *thread_context) case CCRegClass: ret = thread_context->flattenCCIndex(reg); break; + case VectorRegClass: + ret = thread_context->flattenVectorIndex(reg); + break; case MiscRegClass: /* Don't bother to flatten misc regs as we don't need them here */ /* return thread_context->flattenMiscIndex(reg); */ diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh index 711bcafb2..3a3a9d3c3 100644 --- a/src/cpu/minor/scoreboard.hh +++ b/src/cpu/minor/scoreboard.hh @@ -60,11 +60,13 @@ class Scoreboard : public Named { public: /** The number of registers in the Scoreboard. These - * are just the integer, CC and float registers packed + * are just the integer, CC, float and vector registers packed * together with integer regs in the range [0,NumIntRegs-1], - * CC regs in the range [NumIntRegs, NumIntRegs+NumCCRegs-1] - * and float regs in the range - * [NumIntRegs+NumCCRegs, NumFloatRegs+NumIntRegs+NumCCRegs-1] */ + * CC regs in the range [NumIntRegs, NumIntRegs + NumCCRegs - 1], + * float regs in the range + * [NumIntRegs + NumCCRegs, NumFloatRegs + NumIntRegs + NumCCRegs - 1] + * and vector regs in the range [NumFloatRegs + NumIntRegs + NumCCRegs, + * NumFloatRegs + NumIntRegs + NumCCRegs + NumVectorRegs - 1]*/ const unsigned numRegs; /** Type to use for thread context registers */ @@ -97,7 +99,7 @@ class Scoreboard : public Named Scoreboard(const std::string &name) : Named(name), numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs + - TheISA::NumFloatRegs), + TheISA::NumFloatRegs + TheISA::NumVectorRegs), numResults(numRegs, 0), numUnpredictableResults(numRegs, 0), fuIndices(numRegs, 0), diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index 92f96a3b6..d2220de82 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -114,6 +114,7 @@ class DerivO3CPU(BaseCPU): numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers") numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point " "registers") + # most ISAs don't use condition-code regs, so default is 0 _defaultNumPhysCCRegs = 0 if buildEnv['TARGET_ISA'] in ('arm','x86'): @@ -126,6 +127,12 @@ class DerivO3CPU(BaseCPU): _defaultNumPhysCCRegs = Self.numPhysIntRegs * 5 numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs, "Number of physical cc registers") + + # most ISAs don't use vector regs, so default is 0 + _defaultNumPhysVectorRegs = 0 + numPhysVectorRegs = Param.Unsigned(_defaultNumPhysVectorRegs, + "Number of physical vector registers") + numIQEntries = Param.Unsigned(64, "Number of instruction queue entries") numROBEntries = Param.Unsigned(192, "Number of reorder buffer entries") diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 026907a94..d8f39bbe4 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -170,7 +170,8 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs, - params->numPhysCCRegs), + params->numPhysCCRegs, + params->numPhysVectorRegs), freeList(name() + ".freelist", ®File), @@ -269,6 +270,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); + assert(params->numPhysVectorRegs >= numThreads * TheISA::NumVectorRegs); rename.setScoreboard(&scoreboard); iew.setScoreboard(&scoreboard); @@ -313,6 +315,12 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) renameMap[tid].setCCEntry(ridx, phys_reg); commitRenameMap[tid].setCCEntry(ridx, phys_reg); } + + for (RegIndex ridx = 0; ridx < TheISA::NumVectorRegs; ++ridx) { + PhysRegIndex phys_reg = freeList.getVectorReg(); + renameMap[tid].setVectorEntry(ridx, phys_reg); + commitRenameMap[tid].setVectorEntry(ridx, phys_reg); + } } rename.setRenameMap(renameMap); @@ -521,6 +529,16 @@ FullO3CPU<Impl>::regStats() .desc("number of cc regfile writes") .prereq(ccRegfileWrites); + vectorRegfileReads + .name(name() + ".vector_regfile_reads") + .desc("number of vector regfile reads") + .prereq(vectorRegfileReads); + + vectorRegfileWrites + .name(name() + ".vector_regfile_writes") + .desc("number of vector regfile writes") + .prereq(vectorRegfileWrites); + miscRegfileReads .name(name() + ".misc_regfile_reads") .desc("number of misc regfile reads") @@ -807,6 +825,18 @@ FullO3CPU<Impl>::insertThread(ThreadID tid) scoreboard.setReg(phys_reg); } + //Bind vector Regs to Rename Map + max_reg = TheISA::NumIntRegs + TheISA::NumFloatRegs + TheISA::NumCCRegs + + TheISA::NumVectorRegs; + for (int vreg = TheISA::NumIntRegs + TheISA::NumFloatRegs + + TheISA::NumCCRegs; + vreg < max_reg; vreg++) { + PhysRegIndex phys_reg = freeList.getVectorReg(); + + renameMap[tid].setEntry(vreg, phys_reg); + scoreboard.setReg(phys_reg); + } + //Copy Thread Data Into RegFile //this->copyFromTC(tid); @@ -860,6 +890,14 @@ FullO3CPU<Impl>::removeThread(ThreadID tid) freeList.addReg(phys_reg); } + // Unbind condition-code Regs from Rename Map + max_reg = TheISA::Vector_Reg_Base + TheISA::NumVectorRegs; + for (int vreg = TheISA::Vector_Reg_Base; vreg < max_reg; vreg++) { + PhysRegIndex phys_reg = renameMap[tid].lookup(vreg); + scoreboard.unsetReg(phys_reg); + freeList.addReg(phys_reg); + } + // Squash Throughout Pipeline DynInstPtr inst = commit.rob->readHeadInst(tid); InstSeqNum squash_seq_num = inst->seqNum; @@ -1259,6 +1297,14 @@ FullO3CPU<Impl>::readCCReg(int reg_idx) } template <class Impl> +const VectorReg & +FullO3CPU<Impl>::readVectorReg(int reg_idx) +{ + vectorRegfileReads++; + return regFile.readVectorReg(reg_idx); +} + +template <class Impl> void FullO3CPU<Impl>::setIntReg(int reg_idx, uint64_t val) { @@ -1291,6 +1337,14 @@ FullO3CPU<Impl>::setCCReg(int reg_idx, CCReg val) } template <class Impl> +void +FullO3CPU<Impl>::setVectorReg(int reg_idx, const VectorReg &val) +{ + vectorRegfileWrites++; + regFile.setVectorReg(reg_idx, val); +} + +template <class Impl> uint64_t FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid) { @@ -1331,6 +1385,16 @@ FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) } template <class Impl> +const VectorReg& +FullO3CPU<Impl>::readArchVectorReg(int reg_idx, ThreadID tid) +{ + vectorRegfileReads++; + PhysRegIndex phys_reg = commitRenameMap[tid].lookupVector(reg_idx); + + return regFile.readVectorReg(phys_reg); +} + +template <class Impl> void FullO3CPU<Impl>::setArchIntReg(int reg_idx, uint64_t val, ThreadID tid) { @@ -1371,6 +1435,16 @@ FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid) } template <class Impl> +void +FullO3CPU<Impl>::setArchVectorReg(int reg_idx, const VectorReg &val, + ThreadID tid) +{ + vectorRegfileWrites++; + PhysRegIndex phys_reg = commitRenameMap[tid].lookupVector(reg_idx); + regFile.setVectorReg(phys_reg, val); +} + +template <class Impl> TheISA::PCState FullO3CPU<Impl>::pcState(ThreadID tid) { diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index aa02ee2ea..f16450d19 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -427,6 +427,8 @@ class FullO3CPU : public BaseO3CPU TheISA::CCReg readCCReg(int reg_idx); + const TheISA::VectorReg &readVectorReg(int reg_idx); + void setIntReg(int reg_idx, uint64_t val); void setFloatReg(int reg_idx, TheISA::FloatReg val); @@ -435,6 +437,8 @@ class FullO3CPU : public BaseO3CPU void setCCReg(int reg_idx, TheISA::CCReg val); + void setVectorReg(int reg_idx, const TheISA::VectorReg &val); + uint64_t readArchIntReg(int reg_idx, ThreadID tid); float readArchFloatReg(int reg_idx, ThreadID tid); @@ -443,6 +447,8 @@ class FullO3CPU : public BaseO3CPU TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid); + const TheISA::VectorReg &readArchVectorReg(int reg_idx, ThreadID tid); + /** Architectural register accessors. Looks up in the commit * rename table to obtain the true physical index of the * architected register first, then accesses that physical @@ -456,6 +462,9 @@ class FullO3CPU : public BaseO3CPU void setArchCCReg(int reg_idx, TheISA::CCReg val, ThreadID tid); + void setArchVectorReg(int reg_idx, const TheISA::VectorReg &val, + ThreadID tid); + /** Sets the commit PC state of a specific thread. */ void pcState(const TheISA::PCState &newPCState, ThreadID tid); @@ -734,6 +743,9 @@ class FullO3CPU : public BaseO3CPU //number of CC register file accesses Stats::Scalar ccRegfileReads; Stats::Scalar ccRegfileWrites; + //number of integer register file accesses + Stats::Scalar vectorRegfileReads; + Stats::Scalar vectorRegfileWrites; //number of misc Stats::Scalar miscRegfileReads; Stats::Scalar miscRegfileWrites; diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 6740c601d..d19e4d461 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -74,6 +74,7 @@ class BaseO3DynInst : public BaseDynInst<Impl> typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; /** Misc register index type. */ typedef TheISA::MiscReg MiscReg; @@ -206,7 +207,6 @@ class BaseO3DynInst : public BaseDynInst<Impl> void forwardOldRegs() { - for (int idx = 0; idx < this->numDestRegs(); idx++) { PhysRegIndex prev_phys_reg = this->prevDestRegIdx(idx); TheISA::RegIndex original_dest_reg = @@ -224,6 +224,11 @@ class BaseO3DynInst : public BaseDynInst<Impl> this->setCCRegOperand(this->staticInst.get(), idx, this->cpu->readCCReg(prev_phys_reg)); break; + case VectorRegClass: + this->setVectorRegOperand(this->staticInst.get(), idx, + this->cpu->readVectorReg(prev_phys_reg)); + break; + case MiscRegClass: // no need to forward misc reg values break; @@ -272,6 +277,11 @@ class BaseO3DynInst : public BaseDynInst<Impl> return this->cpu->readCCReg(this->_srcRegIdx[idx]); } + const VectorReg &readVectorRegOperand(const StaticInst *si, int idx) + { + return this->cpu->readVectorReg(this->_srcRegIdx[idx]); + } + /** @todo: Make results into arrays so they can handle multiple dest * registers. */ @@ -300,6 +310,13 @@ class BaseO3DynInst : public BaseDynInst<Impl> BaseDynInst<Impl>::setCCRegOperand(si, idx, val); } + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + this->cpu->setVectorReg(this->_destRegIdx[idx], val); + BaseDynInst<Impl>::setVectorRegOperand(si, idx, val); + } + #if THE_ISA == MIPS_ISA MiscReg readRegOtherThread(int misc_reg, ThreadID tid) { diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index aa805e26e..d345d7ac8 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -109,6 +109,9 @@ class UnifiedFreeList /** The list of free condition-code registers. */ SimpleFreeList ccList; + /** The list of free vector registers. */ + SimpleFreeList vectorList; + /** * The register file object is used only to distinguish integer * from floating-point physical register indices. @@ -148,6 +151,9 @@ class UnifiedFreeList /** Gets a free cc register. */ PhysRegIndex getCCReg() { return ccList.getReg(); } + /** Gets a free vector register. */ + PhysRegIndex getVectorReg() { return vectorList.getReg(); } + /** Adds a register back to the free list. */ void addReg(PhysRegIndex freed_reg); @@ -160,6 +166,9 @@ class UnifiedFreeList /** Adds a cc register back to the free list. */ void addCCReg(PhysRegIndex freed_reg) { ccList.addReg(freed_reg); } + /** Adds a vector register back to the free list. */ + void addVectorReg(PhysRegIndex freed_reg) { vectorList.addReg(freed_reg); } + /** Checks if there are any free integer registers. */ bool hasFreeIntRegs() const { return intList.hasFreeRegs(); } @@ -169,6 +178,9 @@ class UnifiedFreeList /** Checks if there are any free cc registers. */ bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); } + /** Checks if there are any free vector registers. */ + bool hasFreeVectorRegs() const { return vectorList.hasFreeRegs(); } + /** Returns the number of free integer registers. */ unsigned numFreeIntRegs() const { return intList.numFreeRegs(); } @@ -177,6 +189,9 @@ class UnifiedFreeList /** Returns the number of free cc registers. */ unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); } + + /** Returns the number of free vector registers. */ + unsigned numFreeVectorRegs() const { return vectorList.numFreeRegs(); } }; inline void @@ -189,9 +204,11 @@ UnifiedFreeList::addReg(PhysRegIndex freed_reg) intList.addReg(freed_reg); } else if (regFile->isFloatPhysReg(freed_reg)) { floatList.addReg(freed_reg); - } else { - assert(regFile->isCCPhysReg(freed_reg)); + } else if (regFile->isCCPhysReg(freed_reg)) { ccList.addReg(freed_reg); + } else { + assert(regFile->isVectorPhysReg(freed_reg)); + vectorList.addReg(freed_reg); } // These assert conditions ensure that the number of free diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 7d359b992..e16843160 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -99,7 +99,7 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, // Set the number of total physical registers numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs + - params->numPhysCCRegs; + params->numPhysCCRegs + params->numPhysVectorRegs; //Create an entry for each physical register within the //dependency graph. diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index 96ce44bdd..a7476c5ec 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -37,15 +37,20 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, - unsigned _numPhysicalCCRegs) + unsigned _numPhysicalCCRegs, + unsigned _numPhysicalVectorRegs) : intRegFile(_numPhysicalIntRegs), floatRegFile(_numPhysicalFloatRegs), ccRegFile(_numPhysicalCCRegs), + vectorRegFile(_numPhysicalVectorRegs), baseFloatRegIndex(_numPhysicalIntRegs), baseCCRegIndex(_numPhysicalIntRegs + _numPhysicalFloatRegs), + baseVectorRegIndex(_numPhysicalIntRegs + _numPhysicalFloatRegs + + _numPhysicalCCRegs), totalNumRegs(_numPhysicalIntRegs + _numPhysicalFloatRegs - + _numPhysicalCCRegs) + + _numPhysicalCCRegs + + _numPhysicalVectorRegs) { if (TheISA::NumCCRegs == 0 && _numPhysicalCCRegs != 0) { // Just make this a warning and go ahead and allocate them @@ -53,6 +58,13 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, warn("Non-zero number of physical CC regs specified, even though\n" " ISA does not use them.\n"); } + + if (TheISA::NumVectorRegs == 0 && _numPhysicalVectorRegs != 0) { + // Just make this a warning and go ahead and allocate them + // anyway, to keep from having to add checks everywhere + warn("Non-zero number of physical vector regs specified, even though\n" + " ISA does not use them.\n"); + } } @@ -73,9 +85,15 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList) freeList->addFloatReg(reg_idx++); } - // The rest of the registers are the condition-code physical + // The next batch of registers are the condition-code physical // registers; put them onto the condition-code free list. - while (reg_idx < totalNumRegs) { + while (reg_idx < baseVectorRegIndex) { freeList->addCCReg(reg_idx++); } + + // The rest of the registers are the vector physical + // registers; put them onto the vector free list. + while (reg_idx < totalNumRegs) { + freeList->addVectorReg(reg_idx++); + } } diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 8b87725ca..71ca5015f 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -56,6 +56,7 @@ class PhysRegFile typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; typedef union { FloatReg d; @@ -71,6 +72,9 @@ class PhysRegFile /** Condition-code register file. */ std::vector<CCReg> ccRegFile; + /** Vector register file. */ + std::vector<VectorReg> vectorRegFile; + /** * The first floating-point physical register index. The physical * register file has a single continuous index space, with the @@ -93,6 +97,12 @@ class PhysRegFile */ unsigned baseCCRegIndex; + /** + * The first vector physical register index. The vector registers follow + * the condition-code registers. + */ + unsigned baseVectorRegIndex; + /** Total number of physical registers. */ unsigned totalNumRegs; @@ -103,7 +113,8 @@ class PhysRegFile */ PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, - unsigned _numPhysicalCCRegs); + unsigned _numPhysicalCCRegs, + unsigned _numPhysicalVectorRegs); /** * Destructor to free resources @@ -122,7 +133,11 @@ class PhysRegFile /** @return the number of condition-code physical registers. */ unsigned numCCPhysRegs() const - { return totalNumRegs - baseCCRegIndex; } + { return baseVectorRegIndex - baseCCRegIndex; } + + /** @return the number of vector physical registers. */ + unsigned numVectorPhysRegs() const + { return totalNumRegs - baseVectorRegIndex; } /** @return the total number of physical registers. */ unsigned totalNumPhysRegs() const { return totalNumRegs; } @@ -151,7 +166,16 @@ class PhysRegFile */ bool isCCPhysReg(PhysRegIndex reg_idx) { - return (baseCCRegIndex <= reg_idx && reg_idx < totalNumRegs); + return (baseCCRegIndex <= reg_idx && reg_idx < baseVectorRegIndex); + } + + /** + * @return true if the specified physical register index + * corresponds to a vector physical register. + */ + bool isVectorPhysReg(PhysRegIndex reg_idx) const + { + return baseVectorRegIndex <= reg_idx && reg_idx < totalNumRegs; } /** Reads an integer register. */ @@ -207,6 +231,18 @@ class PhysRegFile return ccRegFile[reg_offset]; } + /** Reads a vector register. */ + const VectorReg &readVectorReg(PhysRegIndex reg_idx) const + { + assert(isVectorPhysReg(reg_idx)); + + // Remove the base vector reg dependency. + PhysRegIndex reg_offset = reg_idx - baseVectorRegIndex; + + DPRINTF(IEW, "RegFile: Access to vector register %i\n", int(reg_idx)); + return vectorRegFile[reg_offset]; + } + /** Sets an integer register to the given value. */ void setIntReg(PhysRegIndex reg_idx, uint64_t val) { @@ -262,6 +298,16 @@ class PhysRegFile ccRegFile[reg_offset] = val; } + + /** Sets a vector register to the given value. */ + void setVectorReg(PhysRegIndex reg_idx, const VectorReg &val) + { + assert(isVectorPhysReg(reg_idx)); + // Remove the base vector reg dependency. + PhysRegIndex reg_offset = reg_idx - baseVectorRegIndex; + DPRINTF(IEW, "RegFile: Setting vector register %i\n", int(reg_idx)); + vectorRegFile[reg_offset] = val; + } }; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 43b7ba9aa..3da6fd4fa 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -69,7 +69,7 @@ DefaultRename<Impl>::DefaultRename(O3CPU *_cpu, DerivO3CPUParams *params) commitWidth(params->commitWidth), numThreads(params->numThreads), maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs - + params->numPhysCCRegs) + + params->numPhysCCRegs + params->numPhysVectorRegs) { if (renameWidth > Impl::MaxWidth) fatal("renameWidth (%d) is larger than compiled limit (%d),\n" @@ -635,7 +635,8 @@ DefaultRename<Impl>::renameInsts(ThreadID tid) // to rename to. Otherwise block. if (!renameMap[tid]->canRename(inst->numIntDestRegs(), inst->numFPDestRegs(), - inst->numCCDestRegs())) { + inst->numCCDestRegs(), + inst->numVectorDestRegs())) { DPRINTF(Rename, "Blocking due to lack of free " "physical registers to rename to.\n"); blockThisCycle = true; @@ -1016,6 +1017,11 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst, ThreadID tid) renamed_reg = map->lookupCC(flat_rel_src_reg); break; + case VectorRegClass: + flat_rel_src_reg = tc->flattenVectorIndex(rel_src_reg); + renamed_reg = map->lookupVector(flat_rel_src_reg); + break; + case MiscRegClass: // misc regs don't get flattened flat_rel_src_reg = rel_src_reg; @@ -1082,6 +1088,12 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst, ThreadID tid) flat_uni_dest_reg = flat_rel_dest_reg + TheISA::CC_Reg_Base; break; + case VectorRegClass: + flat_rel_dest_reg = tc->flattenVectorIndex(rel_dest_reg); + rename_result = map->renameVector(flat_rel_dest_reg); + flat_uni_dest_reg = flat_rel_dest_reg + TheISA::Vector_Reg_Base; + break; + case MiscRegClass: // misc regs don't get flattened flat_rel_dest_reg = rel_dest_reg; @@ -1156,7 +1168,7 @@ inline int DefaultRename<Impl>::calcFreeLQEntries(ThreadID tid) { int num_free = freeEntries[tid].lqEntries - - (loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ); + (loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ); DPRINTF(Rename, "calcFreeLQEntries: free lqEntries: %d, loadsInProgress: %d, " "loads dispatchedToLQ: %d\n", freeEntries[tid].lqEntries, loadsInProgress[tid], fromIEW->iewInfo[tid].dispatchedToLQ); @@ -1168,7 +1180,7 @@ inline int DefaultRename<Impl>::calcFreeSQEntries(ThreadID tid) { int num_free = freeEntries[tid].sqEntries - - (storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ); + (storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ); DPRINTF(Rename, "calcFreeSQEntries: free sqEntries: %d, storesInProgress: %d, " "stores dispatchedToSQ: %d\n", freeEntries[tid].sqEntries, storesInProgress[tid], fromIEW->iewInfo[tid].dispatchedToSQ); diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index b0232df20..27ddd8c63 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -99,6 +99,9 @@ UnifiedRenameMap::init(PhysRegFile *_regFile, floatMap.init(TheISA::NumFloatRegs, &(freeList->floatList), _floatZeroReg); ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1); + + vectorMap.init(TheISA::NumVectorRegs, &(freeList->vectorList), + (RegIndex)-1); } @@ -117,6 +120,9 @@ UnifiedRenameMap::rename(RegIndex arch_reg) case CCRegClass: return renameCC(rel_arch_reg); + case VectorRegClass: + return renameVector(rel_arch_reg); + case MiscRegClass: return renameMisc(rel_arch_reg); @@ -142,6 +148,9 @@ UnifiedRenameMap::lookup(RegIndex arch_reg) const case CCRegClass: return lookupCC(rel_arch_reg); + case VectorRegClass: + return lookupVector(rel_arch_reg); + case MiscRegClass: return lookupMisc(rel_arch_reg); @@ -166,6 +175,9 @@ UnifiedRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex phys_reg) case CCRegClass: return setCCEntry(rel_arch_reg, phys_reg); + case VectorRegClass: + return setVectorEntry(rel_arch_reg, phys_reg); + case MiscRegClass: // Misc registers do not actually rename, so don't change // their mappings. We end up here when a commit or squash diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh index 9d91f232e..37487c3d3 100644 --- a/src/cpu/o3/rename_map.hh +++ b/src/cpu/o3/rename_map.hh @@ -178,6 +178,9 @@ class UnifiedRenameMap /** The condition-code register rename map */ SimpleRenameMap ccMap; + /** The vector register rename map */ + SimpleRenameMap vectorMap; + public: typedef TheISA::RegIndex RegIndex; @@ -240,6 +243,17 @@ class UnifiedRenameMap } /** + * Perform rename() on a vector register, given a relative vector register + * index. + */ + RenameInfo renameVector(RegIndex rel_arch_reg) + { + RenameInfo info = vectorMap.rename(rel_arch_reg); + assert(regFile->isVectorPhysReg(info.first)); + return info; + } + + /** * Perform rename() on a misc register, given a relative * misc register index. */ @@ -297,6 +311,17 @@ class UnifiedRenameMap } /** + * Perform lookup() on a vector register, given a relative + * vector register index. + */ + PhysRegIndex lookupVector(RegIndex rel_arch_reg) const + { + PhysRegIndex phys_reg = vectorMap.lookup(rel_arch_reg); + assert(regFile->isVectorPhysReg(phys_reg)); + return phys_reg; + } + + /** * Perform lookup() on a misc register, given a relative * misc register index. */ @@ -349,6 +374,16 @@ class UnifiedRenameMap } /** + * Perform setEntry() on a vector register, given a relative vector + * register index. + */ + void setVectorEntry(RegIndex arch_reg, PhysRegIndex phys_reg) + { + assert(regFile->isVectorPhysReg(phys_reg)); + vectorMap.setEntry(arch_reg, phys_reg); + } + + /** * Return the minimum number of free entries across all of the * register classes. The minimum is used so we guarantee that * this number of entries is available regardless of which class @@ -362,11 +397,13 @@ class UnifiedRenameMap /** * Return whether there are enough registers to serve the request. */ - bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs) const + bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs, + uint32_t vectorRegs) const { return intRegs <= intMap.numFreeEntries() && floatRegs <= floatMap.numFreeEntries() && - ccRegs <= ccMap.numFreeEntries(); + ccRegs <= ccMap.numFreeEntries() && + vectorRegs <= vectorMap.numFreeEntries(); } }; diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index 87d87900c..6e9b054da 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -189,6 +189,10 @@ class O3ThreadContext : public ThreadContext return readCCRegFlat(flattenCCIndex(reg_idx)); } + virtual const VectorReg &readVectorReg(int reg_idx) { + return readVectorRegFlat(flattenVectorIndex(reg_idx)); + } + /** Sets an integer register to a value. */ virtual void setIntReg(int reg_idx, uint64_t val) { setIntRegFlat(flattenIntIndex(reg_idx), val); @@ -206,6 +210,10 @@ class O3ThreadContext : public ThreadContext setCCRegFlat(flattenCCIndex(reg_idx), val); } + virtual void setVectorReg(int reg_idx, const VectorReg &val) { + setVectorRegFlat(flattenVectorIndex(reg_idx), val); + } + /** Reads this thread's PC state. */ virtual TheISA::PCState pcState() { return cpu->pcState(thread->threadId()); } @@ -246,6 +254,7 @@ class O3ThreadContext : public ThreadContext virtual int flattenIntIndex(int reg); virtual int flattenFloatIndex(int reg); virtual int flattenCCIndex(int reg); + virtual int flattenVectorIndex(int reg); virtual int flattenMiscIndex(int reg); /** Returns the number of consecutive store conditional failures. */ @@ -291,6 +300,9 @@ class O3ThreadContext : public ThreadContext virtual CCReg readCCRegFlat(int idx); virtual void setCCRegFlat(int idx, CCReg val); + + virtual const VectorReg &readVectorRegFlat(int idx); + virtual void setVectorRegFlat(int idx, const VectorReg &val); }; #endif diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index e6a3d5083..ecdd9ebb9 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -216,6 +216,13 @@ O3ThreadContext<Impl>::readCCRegFlat(int reg_idx) } template <class Impl> +const TheISA::VectorReg & +O3ThreadContext<Impl>::readVectorRegFlat(int reg_idx) +{ + return cpu->readArchVectorReg(reg_idx, thread->threadId()); +} + +template <class Impl> void O3ThreadContext<Impl>::setIntRegFlat(int reg_idx, uint64_t val) { @@ -253,6 +260,15 @@ O3ThreadContext<Impl>::setCCRegFlat(int reg_idx, TheISA::CCReg val) template <class Impl> void +O3ThreadContext<Impl>::setVectorRegFlat(int reg_idx, + const TheISA::VectorReg &val) +{ + cpu->setArchVectorReg(reg_idx, val, thread->threadId()); + conditionalSquash(); +} + +template <class Impl> +void O3ThreadContext<Impl>::pcState(const TheISA::PCState &val) { cpu->pcState(val, thread->threadId()); @@ -292,6 +308,13 @@ O3ThreadContext<Impl>::flattenCCIndex(int reg) template <class Impl> int +O3ThreadContext<Impl>::flattenVectorIndex(int reg) +{ + return cpu->isa[thread->threadId()]->flattenVectorIndex(reg); +} + +template <class Impl> +int O3ThreadContext<Impl>::flattenMiscIndex(int reg) { return cpu->isa[thread->threadId()]->flattenMiscIndex(reg); diff --git a/src/cpu/reg_class.cc b/src/cpu/reg_class.cc index 1805eae13..0cb789fe1 100644 --- a/src/cpu/reg_class.cc +++ b/src/cpu/reg_class.cc @@ -34,5 +34,6 @@ const char *RegClassStrings[] = { "IntRegClass", "FloatRegClass", "CCRegClass", + "VectorRegClass", "MiscRegClass" }; diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh index 549ebab26..6c7b1b55d 100644 --- a/src/cpu/reg_class.hh +++ b/src/cpu/reg_class.hh @@ -42,6 +42,7 @@ enum RegClass { IntRegClass, ///< Integer register FloatRegClass, ///< Floating-point register CCRegClass, ///< Condition-code register + VectorRegClass, ///< Vector register MiscRegClass ///< Control (misc) register }; @@ -76,12 +77,15 @@ RegClass regIdxToClass(TheISA::RegIndex reg_idx, } else if (reg_idx < TheISA::CC_Reg_Base) { cl = FloatRegClass; offset = TheISA::FP_Reg_Base; - } else if (reg_idx < TheISA::Misc_Reg_Base) { + } else if (reg_idx < TheISA::Vector_Reg_Base) { // if there are no CC regs, the ISA should set // CC_Reg_Base == Misc_Reg_Base so the if above // never succeeds cl = CCRegClass; offset = TheISA::CC_Reg_Base; + } else if (reg_idx < TheISA::Misc_Reg_Base) { + cl = VectorRegClass; + offset = TheISA::Vector_Reg_Base; } else { cl = MiscRegClass; offset = TheISA::Misc_Reg_Base; diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 2f7247010..27e434132 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -87,6 +87,7 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; BPredUnit *branchPred; @@ -239,6 +240,10 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext Stats::Scalar numCCRegReads; Stats::Scalar numCCRegWrites; + //number of vector register file accesses + Stats::Scalar numVectorRegReads; + Stats::Scalar numVectorRegWrites; + // number of simulated memory references Stats::Scalar numMemRefs; Stats::Scalar numLoadInsts; @@ -325,6 +330,13 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext return thread->readCCReg(reg_idx); } + const VectorReg &readVectorRegOperand(const StaticInst *si, int idx) + { + numVectorRegReads++; + int reg_idx = si->srcRegIdx(idx) - TheISA::Vector_Reg_Base; + return thread->readVectorReg(reg_idx); + } + void setIntRegOperand(const StaticInst *si, int idx, IntReg val) { numIntRegWrites++; @@ -353,6 +365,14 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext thread->setCCReg(reg_idx, val); } + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + numVectorRegWrites++; + int reg_idx = si->destRegIdx(idx) - TheISA::Vector_Reg_Base; + thread->setVectorReg(reg_idx, val); + } + bool readPredicate() { return thread->readPredicate(); } void setPredicate(bool val) { diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 20acff6ee..070a00dc8 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -58,6 +58,7 @@ #include "debug/CCRegs.hh" #include "debug/FloatRegs.hh" #include "debug/IntRegs.hh" +#include "debug/VectorRegs.hh" #include "mem/page_table.hh" #include "mem/request.hh" #include "sim/byteswap.hh" @@ -102,6 +103,8 @@ class SimpleThread : public ThreadState typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; + public: typedef ThreadContext::Status Status; @@ -111,9 +114,15 @@ class SimpleThread : public ThreadState FloatRegBits i[TheISA::NumFloatRegs]; } floatRegs; TheISA::IntReg intRegs[TheISA::NumIntRegs]; + #ifdef ISA_HAS_CC_REGS TheISA::CCReg ccRegs[TheISA::NumCCRegs]; #endif + +#ifdef ISA_HAS_VECTOR_REGS + TheISA::VectorReg vectorRegs[TheISA::NumVectorRegs]; +#endif + TheISA::ISA *const isa; // one "instance" of the current ISA. TheISA::PCState _pcState; @@ -282,6 +291,16 @@ class SimpleThread : public ThreadState #endif } + const VectorReg &readVectorReg(int reg_idx) + { + int flatIndex = isa->flattenVectorIndex(reg_idx); + assert(0 <= flatIndex); + assert(flatIndex < TheISA::NumVectorRegs); + DPRINTF(VectorRegs, "Reading vector reg %d (%d).\n", + reg_idx, flatIndex); + return readVectorRegFlat(flatIndex); + } + void setIntReg(int reg_idx, uint64_t val) { int flatIndex = isa->flattenIntIndex(reg_idx); @@ -325,6 +344,19 @@ class SimpleThread : public ThreadState #endif } + void setVectorReg(int reg_idx, const VectorReg &val) + { +#ifdef ISA_HAS_VECTOR_REGS + int flatIndex = isa->flattenVectorIndex(reg_idx); + assert(flatIndex < TheISA::NumVectorRegs); + DPRINTF(VectorRegs, "Setting vector reg %d (%d).\n", + reg_idx, flatIndex); + setVectorRegFlat(flatIndex, val); +#else + panic("Tried to set a vector register."); +#endif + } + TheISA::PCState pcState() { @@ -414,6 +446,12 @@ class SimpleThread : public ThreadState } int + flattenVectorIndex(int reg) + { + return isa->flattenVectorIndex(reg); + } + + int flattenMiscIndex(int reg) { return isa->flattenMiscIndex(reg); @@ -450,6 +488,18 @@ class SimpleThread : public ThreadState void setCCRegFlat(int idx, CCReg val) { panic("setCCRegFlat w/no CC regs!\n"); } #endif + +#ifdef ISA_HAS_VECTOR_REGS + const VectorReg &readVectorRegFlat(int idx) { return vectorRegs[idx]; } + void setVectorRegFlat(int idx, const VectorReg &val) + { vectorRegs[idx] = val; } +#else + const VectorReg &readVectorRegFlat(int idx) + { panic("readVectorRegFlat w/no Vector regs!\n"); } + + void setVectorRegFlat(int idx, const VectorReg &val) + { panic("setVectorRegFlat w/no Vector regs!\n"); } +#endif }; diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 684a22856..58cf752b7 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -98,6 +98,7 @@ class StaticInst : public RefCounted, public StaticInstFlags int8_t _numFPDestRegs; int8_t _numIntDestRegs; int8_t _numCCDestRegs; + int8_t _numVectorDestRegs; //@} public: @@ -116,9 +117,10 @@ class StaticInst : public RefCounted, public StaticInstFlags int8_t numFPDestRegs() const { return _numFPDestRegs; } /// Number of integer destination regs. int8_t numIntDestRegs() const { return _numIntDestRegs; } - //@} - /// Number of coprocesor destination regs. + /// Number of condition code destination regs. int8_t numCCDestRegs() const { return _numCCDestRegs; } + /// Number of vector destination regs. + int8_t numVectorDestRegs() const { return _numVectorDestRegs; } //@} /// @name Flag accessors. @@ -140,6 +142,7 @@ class StaticInst : public RefCounted, public StaticInstFlags bool isInteger() const { return flags[IsInteger]; } bool isFloating() const { return flags[IsFloating]; } + bool isVector() const { return flags[IsVector]; } bool isCC() const { return flags[IsCC]; } bool isControl() const { return flags[IsControl]; } @@ -252,7 +255,8 @@ class StaticInst : public RefCounted, public StaticInstFlags StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass) : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0), _numFPDestRegs(0), _numIntDestRegs(0), _numCCDestRegs(0), - machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0) + _numVectorDestRegs(0), machInst(_machInst), mnemonic(_mnemonic), + cachedDisassembly(0) { } public: @@ -326,7 +330,7 @@ class StaticInst : public RefCounted, public StaticInstFlags void printFlags(std::ostream &outs, const std::string &separator) const; /// Return name of machine instruction - std::string getName() { return mnemonic; } + std::string getName() const { return mnemonic; } }; #endif // __CPU_STATIC_INST_HH__ diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc index fe1ae69dd..ce7604d3c 100644 --- a/src/cpu/thread_context.cc +++ b/src/cpu/thread_context.cc @@ -88,6 +88,15 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two) panic("CC reg idx %d doesn't match, one: %#x, two: %#x", i, t1, t2); } + + // loop through the Vector registers. + for (int i = 0; i < TheISA::NumVectorRegs; ++i) { + const TheISA::VectorReg &t1 = one->readVectorReg(i); + const TheISA::VectorReg &t2 = two->readVectorReg(i); + if (t1 != t2) + panic("Vector reg idx %d doesn't match", i); + } + if (!(one->pcState() == two->pcState())) panic("PC state doesn't match."); int id1 = one->cpuId(); @@ -127,6 +136,16 @@ serialize(ThreadContext &tc, CheckpointOut &cp) SERIALIZE_ARRAY(ccRegs, NumCCRegs); #endif +#ifdef ISA_HAS_VECTOR_REGS + VectorRegElement vectorRegs[NumVectorRegs * NumVectorRegElements]; + for (int i = 0; i < NumVectorRegs; ++i) { + const VectorReg &v = tc.readVectorRegFlat(i); + for (int j = 0; i < NumVectorRegElements; ++j) + vectorRegs[i * NumVectorRegElements + j] = v[j]; + } + SERIALIZE_ARRAY(vectorRegs, NumVectorRegs * NumVectorRegElements); +#endif + tc.pcState().serialize(cp); // thread_num and cpu_id are deterministic from the config @@ -156,6 +175,17 @@ unserialize(ThreadContext &tc, CheckpointIn &cp) tc.setCCRegFlat(i, ccRegs[i]); #endif +#ifdef ISA_HAS_VECTOR_REGS + VectorRegElement vectorRegs[NumVectorRegs * NumVectorRegElements]; + UNSERIALIZE_ARRAY(vectorRegs, NumVectorRegs * NumVectorRegElements); + for (int i = 0; i < NumVectorRegs; ++i) { + VectorReg v; + for (int j = 0; i < NumVectorRegElements; ++j) + v[j] = vectorRegs[i * NumVectorRegElements + j]; + tc.setVectorRegFlat(i, v); + } +#endif + PCState pcState; pcState.unserialize(cp); tc.pcState(pcState); diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 2544b19c6..cd8b98f0c 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -98,6 +98,7 @@ class ThreadContext typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; typedef TheISA::MiscReg MiscReg; public: @@ -205,6 +206,8 @@ class ThreadContext virtual CCReg readCCReg(int reg_idx) = 0; + virtual const VectorReg &readVectorReg(int reg_idx) = 0; + virtual void setIntReg(int reg_idx, uint64_t val) = 0; virtual void setFloatReg(int reg_idx, FloatReg val) = 0; @@ -213,6 +216,8 @@ class ThreadContext virtual void setCCReg(int reg_idx, CCReg val) = 0; + virtual void setVectorReg(int reg_idx, const VectorReg &val) = 0; + virtual TheISA::PCState pcState() = 0; virtual void pcState(const TheISA::PCState &val) = 0; @@ -236,6 +241,7 @@ class ThreadContext virtual int flattenIntIndex(int reg) = 0; virtual int flattenFloatIndex(int reg) = 0; virtual int flattenCCIndex(int reg) = 0; + virtual int flattenVectorIndex(int reg) = 0; virtual int flattenMiscIndex(int reg) = 0; virtual uint64_t @@ -291,6 +297,9 @@ class ThreadContext virtual CCReg readCCRegFlat(int idx) = 0; virtual void setCCRegFlat(int idx, CCReg val) = 0; + + virtual const VectorReg &readVectorRegFlat(int idx) = 0; + virtual void setVectorRegFlat(int idx, const VectorReg &val) = 0; /** @} */ }; @@ -402,6 +411,9 @@ class ProxyThreadContext : public ThreadContext CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } + const VectorReg &readVectorReg(int reg_idx) + { return actualTC->readVectorReg(reg_idx); } + void setIntReg(int reg_idx, uint64_t val) { actualTC->setIntReg(reg_idx, val); } @@ -414,6 +426,9 @@ class ProxyThreadContext : public ThreadContext void setCCReg(int reg_idx, CCReg val) { actualTC->setCCReg(reg_idx, val); } + void setVectorReg(int reg_idx, const VectorReg &val) + { actualTC->setVectorReg(reg_idx, val); } + TheISA::PCState pcState() { return actualTC->pcState(); } void pcState(const TheISA::PCState &val) { actualTC->pcState(val); } @@ -450,6 +465,9 @@ class ProxyThreadContext : public ThreadContext int flattenCCIndex(int reg) { return actualTC->flattenCCIndex(reg); } + int flattenVectorIndex(int reg) + { return actualTC->flattenVectorIndex(reg); } + int flattenMiscIndex(int reg) { return actualTC->flattenMiscIndex(reg); } @@ -487,6 +505,12 @@ class ProxyThreadContext : public ThreadContext void setCCRegFlat(int idx, CCReg val) { actualTC->setCCRegFlat(idx, val); } + + const VectorReg &readVectorRegFlat(int idx) + { return actualTC->readVectorRegFlat(idx); } + + void setVectorRegFlat(int idx, const VectorReg &val) + { actualTC->setVectorRegFlat(idx, val); } }; /** @{ */ |