diff options
author | Nilay Vaish <nilay@cs.wisc.edu> | 2015-07-26 10:21:20 -0500 |
---|---|---|
committer | Nilay Vaish <nilay@cs.wisc.edu> | 2015-07-26 10:21:20 -0500 |
commit | 608641e23c7f2288810c3f23a1a63790b664f2ab (patch) | |
tree | 0656aaf9653e8d263f5daac0d5f0fe3190193ae5 /src | |
parent | 6e354e82d9395b20f5f148cd545d0666b626e8ac (diff) | |
download | gem5-608641e23c7f2288810c3f23a1a63790b664f2ab.tar.xz |
cpu: implements vector registers
This adds a vector register type. The type is defined as a std::array of a
fixed number of uint64_ts. The isa_parser.py has been modified to parse vector
register operands and generate the required code. Different cpus have vector
register files now.
Diffstat (limited to 'src')
55 files changed, 876 insertions, 78 deletions
diff --git a/src/arch/SConscript b/src/arch/SConscript index e0d6845f5..89ecdfa73 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -196,5 +196,7 @@ env.Append(BUILDERS = {'ScanISA' : DebugFlag('IntRegs') DebugFlag('FloatRegs') DebugFlag('CCRegs') +DebugFlag('VectorRegs') DebugFlag('MiscRegs') -CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'CCRegs', 'MiscRegs' ]) +CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'CCRegs', 'VectorRegs', + 'MiscRegs' ]) diff --git a/src/arch/alpha/isa.hh b/src/arch/alpha/isa.hh index 6a88ee40b..b5964e622 100644 --- a/src/arch/alpha/isa.hh +++ b/src/arch/alpha/isa.hh @@ -114,6 +114,13 @@ namespace AlphaISA return reg; } + // dummy + int + flattenVectorIndex(int reg) const + { + return reg; + } + int flattenMiscIndex(int reg) const { diff --git a/src/arch/alpha/registers.hh b/src/arch/alpha/registers.hh index 3fd774cf7..665ea30c7 100644 --- a/src/arch/alpha/registers.hh +++ b/src/arch/alpha/registers.hh @@ -56,6 +56,12 @@ typedef uint64_t MiscReg; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; +// vector register file entry type +typedef uint64_t VectorRegElement; +const int NumVectorRegElements = 0; +const int VectorRegBytes = NumVectorRegElements * sizeof(VectorRegElement); +typedef std::array<VectorRegElement, NumVectorRegElements> VectorReg; + union AnyReg { IntReg intreg; @@ -95,6 +101,7 @@ const int NumFloatArchRegs = 32; const int NumIntRegs = NumIntArchRegs + NumPALShadowRegs; const int NumFloatRegs = NumFloatArchRegs; const int NumCCRegs = 0; +const int NumVectorRegs = 0; const int NumMiscRegs = NUM_MISCREGS; const int TotalNumRegs = @@ -106,7 +113,8 @@ enum DependenceTags { // 32..63 are the FP regs 0..31, i.e. use (reg + FP_Reg_Base) FP_Reg_Base = NumIntRegs, CC_Reg_Base = FP_Reg_Base + NumFloatRegs, - Misc_Reg_Base = CC_Reg_Base + NumCCRegs, // NumCCRegs == 0 + Vector_Reg_Base = CC_Reg_Base + NumCCRegs, // NumCCRegs == 0 + Misc_Reg_Base = Vector_Reg_Base + NumCCRegs, // NumVectorRegs == 0 Max_Reg_Index = Misc_Reg_Base + NumMiscRegs + NumInternalProcRegs }; diff --git a/src/arch/alpha/utility.cc b/src/arch/alpha/utility.cc index 2dfe00f96..b0a503828 100644 --- a/src/arch/alpha/utility.cc +++ b/src/arch/alpha/utility.cc @@ -73,6 +73,7 @@ copyRegs(ThreadContext *src, ThreadContext *dest) // Would need to add condition-code regs if implemented assert(NumCCRegs == 0); + assert(NumVectorRegs == 0); // Copy misc. registers copyMiscRegs(src, dest); diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc index 9f878ac4d..417496579 100644 --- a/src/arch/arm/insts/static_inst.cc +++ b/src/arch/arm/insts/static_inst.cc @@ -337,6 +337,8 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const case CCRegClass: ccprintf(os, "cc_%s", ArmISA::ccRegName[rel_reg]); break; + case VectorRegClass: + panic("ARM ISA does not have any vector registers yet!"); } } diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index a07017c17..1e7edd637 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -288,6 +288,13 @@ namespace ArmISA } int + flattenVectorIndex(int reg) const + { + assert(reg >= 0); + return reg; + } + + int flattenMiscIndex(int reg) const { assert(reg >= 0); diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh index 23fc20450..e57802e53 100644 --- a/src/arch/arm/registers.hh +++ b/src/arch/arm/registers.hh @@ -72,6 +72,12 @@ typedef uint64_t MiscReg; // condition code register; must be at least 32 bits for FpCondCodes typedef uint64_t CCReg; +// vector register file entry type +typedef uint64_t VectorRegElement; +const int NumVectorRegElements = 0; +const int VectorRegBytes = NumVectorRegElements * sizeof(VectorRegElement); +typedef std::array<VectorRegElement, NumVectorRegElements> VectorReg; + // Constants Related to the number of registers const int NumIntArchRegs = NUM_ARCH_INTREGS; // The number of single precision floating point registers @@ -82,6 +88,7 @@ const int NumFloatSpecialRegs = 32; const int NumIntRegs = NUM_INTREGS; const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs; const int NumCCRegs = NUM_CCREGS; +const int NumVectorRegs = 0; const int NumMiscRegs = NUM_MISCREGS; #define ISA_HAS_CC_REGS @@ -112,7 +119,8 @@ const int SyscallSuccessReg = ReturnValueReg; // These help enumerate all the registers for dependence tracking. const int FP_Reg_Base = NumIntRegs * (MODE_MAXMODE + 1); const int CC_Reg_Base = FP_Reg_Base + NumFloatRegs; -const int Misc_Reg_Base = CC_Reg_Base + NumCCRegs; +const int Vector_Reg_Base = CC_Reg_Base + NumCCRegs; +const int Misc_Reg_Base = Vector_Reg_Base + NumVectorRegs; const int Max_Reg_Index = Misc_Reg_Base + NumMiscRegs; typedef union { diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc index 34fcfd482..e1f9dfe04 100644 --- a/src/arch/arm/utility.cc +++ b/src/arch/arm/utility.cc @@ -156,6 +156,9 @@ copyRegs(ThreadContext *src, ThreadContext *dest) for (int i = 0; i < NumCCRegs; i++) dest->setCCReg(i, src->readCCReg(i)); + // Copy vector registers when vector registers put to use. + assert(NumVectorRegs == 0); + for (int i = 0; i < NumMiscRegs; i++) dest->setMiscRegNoEffect(i, src->readMiscRegNoEffect(i)); diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py index f756161ea..5050d24d4 100755 --- a/src/arch/isa_parser.py +++ b/src/arch/isa_parser.py @@ -515,6 +515,9 @@ class Operand(object): def isCCReg(self): return 0 + def isVectorReg(self): + return 0 + def isControlReg(self): return 0 @@ -751,6 +754,106 @@ class CCRegOperand(Operand): return wb +class VectorRegOperand(Operand): + def isReg(self): + return 1 + + def isVectorReg(self): + return 1 + + def __init__(self, parser, full_name, ext, is_src, is_dest): + ## Vector registers are always treated as source registers since + ## not the whole of them might be written, in which case we need + ## to retain the earlier value. + super(VectorRegOperand, self).__init__(parser, full_name, ext, + True, is_dest) + self.size = 0 + + def finalize(self, predRead, predWrite): + self.flags = self.getFlags() + self.constructor = self.makeConstructor(predRead, predWrite) + self.op_decl = self.makeDecl() + + if self.is_src: + self.op_rd = self.makeRead(predRead) + self.op_src_decl = self.makeDecl() + else: + self.op_rd = '' + self.op_src_decl = '' + + if self.is_dest: + self.op_wb = self.makeWrite(predWrite) + self.op_dest_decl = self.makeDecl() + else: + self.op_wb = '' + self.op_dest_decl = '' + + def makeConstructor(self, predRead, predWrite): + c_src = '' + c_dest = '' + + if self.is_src: + c_src = '\n\t_srcRegIdx[_numSrcRegs++] = %s + Vector_Reg_Base;' % \ + (self.reg_spec) + if self.hasReadPred(): + c_src = '\n\tif (%s) {%s\n\t}' % \ + (self.read_predicate, c_src) + + if self.is_dest: + c_dest = '\n\t_destRegIdx[_numDestRegs++] = %s + Vector_Reg_Base;' % \ + (self.reg_spec) + c_dest += '\n\t_numVectorDestRegs++;' + if self.hasWritePred(): + c_dest = '\n\tif (%s) {%s\n\t}' % \ + (self.write_predicate, c_dest) + + return c_src + c_dest + + def makeRead(self, predRead): + if self.read_code != None: + return self.buildReadCode('readVectorRegOperand') + + vector_reg_val = '' + if predRead: + vector_reg_val = 'xc->readVectorRegOperand(this, _sourceIndex++)' + if self.hasReadPred(): + vector_reg_val = '(%s) ? %s : 0' % \ + (self.read_predicate, vector_reg_val) + else: + vector_reg_val = 'xc->readVectorRegOperand(this, %d)' % \ + self.src_reg_idx + + return '%s = %s;\n' % (self.base_name, vector_reg_val) + + def makeWrite(self, predWrite): + if self.write_code != None: + return self.buildWriteCode('setVectorRegOperand') + + if predWrite: + wp = 'true' + if self.hasWritePred(): + wp = self.write_predicate + + wcond = 'if (%s)' % (wp) + windex = '_destIndex++' + else: + wcond = '' + windex = '%d' % self.dest_reg_idx + + wb = ''' + %s + { + TheISA::VectorReg final_val = %s; + xc->setVectorRegOperand(this, %s, final_val);\n + if (traceData) { traceData->setData(final_val); } + }''' % (wcond, self.base_name, windex) + + return wb + + def makeDecl(self): + ctype = 'TheISA::VectorReg' + return '%s %s;\n' % (ctype, self.base_name) + class ControlRegOperand(Operand): def isReg(self): return 1 @@ -818,7 +921,10 @@ class MemOperand(Operand): # Note that initializations in the declarations are solely # to avoid 'uninitialized variable' errors from the compiler. # Declare memory data variable. - return '%s %s = 0;\n' % (self.ctype, self.base_name) + if 'IsVector' in self.flags: + return 'TheISA::VectorReg %s;\n' % self.base_name + else: + return '%s %s = 0;\n' % (self.ctype, self.base_name) def makeRead(self, predRead): if self.read_code != None: @@ -909,6 +1015,7 @@ class OperandList(object): self.numFPDestRegs = 0 self.numIntDestRegs = 0 self.numCCDestRegs = 0 + self.numVectorDestRegs = 0 self.numMiscDestRegs = 0 self.memOperand = None @@ -931,6 +1038,8 @@ class OperandList(object): self.numIntDestRegs += 1 elif op_desc.isCCReg(): self.numCCDestRegs += 1 + elif op_desc.isVectorReg(): + self.numVectorDestRegs += 1 elif op_desc.isControlReg(): self.numMiscDestRegs += 1 elif op_desc.isMem(): @@ -1127,6 +1236,7 @@ class InstObjParams(object): header += '\n\t_numFPDestRegs = 0;' header += '\n\t_numIntDestRegs = 0;' header += '\n\t_numCCDestRegs = 0;' + header += '\n\t_numVectorDestRegs = 0;' self.constructor = header + \ self.operands.concatAttrStrings('constructor') @@ -2292,7 +2402,8 @@ StaticInstPtr operandsREString = r''' (?<!\w) # neg. lookbehind assertion: prevent partial matches - ((%s)(?:_(%s))?) # match: operand with optional '_' then suffix + ((%s)(?:_(%s))?(?:\[\w+\])?) # match: operand with optional '_' + # then suffix, and then an optional array index. (?!\w) # neg. lookahead assertion: prevent partial matches ''' % (string.join(operands, '|'), string.join(extensions, '|')) diff --git a/src/arch/mips/isa.hh b/src/arch/mips/isa.hh index feb55e473..f61db6d57 100644 --- a/src/arch/mips/isa.hh +++ b/src/arch/mips/isa.hh @@ -184,6 +184,13 @@ namespace MipsISA return reg; } + // dummy + int + flattenVectorIndex(int reg) const + { + return reg; + } + int flattenMiscIndex(int reg) const { diff --git a/src/arch/mips/registers.hh b/src/arch/mips/registers.hh index 0ac84cc7f..e7d5e346c 100644 --- a/src/arch/mips/registers.hh +++ b/src/arch/mips/registers.hh @@ -55,6 +55,7 @@ const int MaxShadowRegSets = 16; // Maximum number of shadow register sets const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs; //HI & LO Regs const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs;// const int NumCCRegs = 0; +const int NumVectorRegs = 0; const uint32_t MIPS32_QNAN = 0x7fbfffff; const uint64_t MIPS64_QNAN = ULL(0x7ff7ffffffffffff); @@ -278,7 +279,8 @@ const int NumMiscRegs = MISCREG_NUMREGS; // These help enumerate all the registers for dependence tracking. const int FP_Reg_Base = NumIntRegs; const int CC_Reg_Base = FP_Reg_Base + NumFloatRegs; -const int Misc_Reg_Base = CC_Reg_Base + NumCCRegs; // NumCCRegs == 0 +const int Vector_Reg_Base = CC_Reg_Base + NumCCRegs; // NumCCRegs == 0 +const int Misc_Reg_Base = Vector_Reg_Base + NumVectorRegs; const int Max_Reg_Index = Misc_Reg_Base + NumMiscRegs; const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumMiscRegs; @@ -297,6 +299,12 @@ typedef uint64_t MiscReg; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; +// vector register file entry type +typedef uint64_t VectorRegElement; +const int NumVectorRegElements = 0; +const int VectorRegBytes = NumVectorRegElements * sizeof(VectorRegElement); +typedef std::array<VectorRegElement, NumVectorRegElements> VectorReg; + typedef union { IntReg intreg; FloatReg fpreg; diff --git a/src/arch/mips/utility.cc b/src/arch/mips/utility.cc index 80047fbfd..92ca8c6f0 100644 --- a/src/arch/mips/utility.cc +++ b/src/arch/mips/utility.cc @@ -252,6 +252,9 @@ copyRegs(ThreadContext *src, ThreadContext *dest) // Would need to add condition-code regs if implemented assert(NumCCRegs == 0); + // Copy vector registers when vector registers put to use. + assert(NumVectorRegs == 0); + // Copy misc. registers for (int i = 0; i < NumMiscRegs; i++) dest->setMiscRegNoEffect(i, src->readMiscRegNoEffect(i)); diff --git a/src/arch/null/registers.hh b/src/arch/null/registers.hh index 1e52fc5a6..3f1524554 100644 --- a/src/arch/null/registers.hh +++ b/src/arch/null/registers.hh @@ -49,6 +49,8 @@ typedef uint32_t FloatRegBits; typedef float FloatReg; typedef uint8_t CCReg; typedef uint64_t MiscReg; +typedef uint64_t VectorRegElement; +typedef std::array<VectorRegElement, 0> VectorReg; } diff --git a/src/arch/power/insts/static_inst.cc b/src/arch/power/insts/static_inst.cc index 087e1f740..5bd16b40d 100644 --- a/src/arch/power/insts/static_inst.cc +++ b/src/arch/power/insts/static_inst.cc @@ -57,6 +57,8 @@ PowerStaticInst::printReg(std::ostream &os, int reg) const } case CCRegClass: panic("printReg: POWER does not implement CCRegClass\n"); + case VectorRegClass: + panic("printReg: POWER does not implement VectorRegClass\n"); } } diff --git a/src/arch/power/isa.hh b/src/arch/power/isa.hh index aaf5bd92a..08ee82d5d 100644 --- a/src/arch/power/isa.hh +++ b/src/arch/power/isa.hh @@ -105,6 +105,13 @@ class ISA : public SimObject return reg; } + // dummy + int + flattenVectorIndex(int reg) const + { + return reg; + } + int flattenMiscIndex(int reg) const { diff --git a/src/arch/power/registers.hh b/src/arch/power/registers.hh index abee516fc..1d0b4a21f 100644 --- a/src/arch/power/registers.hh +++ b/src/arch/power/registers.hh @@ -55,6 +55,12 @@ typedef uint64_t MiscReg; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; +// typedefs for Vector registers +const int NumVectorRegElements = 0; +typedef uint64_t VectorRegElement; +const int VectorRegBytes = NumVectorRegElements * sizeof(VectorRegElement); +typedef std::array<VectorRegElement, NumVectorRegElements> VectorReg; + // Constants Related to the number of registers const int NumIntArchRegs = 32; @@ -68,6 +74,7 @@ const int NumInternalProcRegs = 0; const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs; const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs; const int NumCCRegs = 0; +const int NumVectorRegs = 0; const int NumMiscRegs = NUM_MISCREGS; // Semantically meaningful register indices @@ -90,7 +97,8 @@ const int SyscallSuccessReg = 3; // These help enumerate all the registers for dependence tracking. const int FP_Reg_Base = NumIntRegs; const int CC_Reg_Base = FP_Reg_Base + NumFloatRegs; -const int Misc_Reg_Base = CC_Reg_Base + NumCCRegs; // NumCCRegs == 0 +const int Vector_Reg_Base = CC_Reg_Base + NumCCRegs; // NumCCRegs == 0 +const int Misc_Reg_Base = Vector_Reg_Base + NumVectorRegs; // NumVectorRegs == 0 const int Max_Reg_Index = Misc_Reg_Base + NumMiscRegs; typedef union { diff --git a/src/arch/power/utility.cc b/src/arch/power/utility.cc index 7be195b8d..fa2a1d89b 100644 --- a/src/arch/power/utility.cc +++ b/src/arch/power/utility.cc @@ -51,6 +51,9 @@ copyRegs(ThreadContext *src, ThreadContext *dest) // Would need to add condition-code regs if implemented assert(NumCCRegs == 0); + // Copy vector registers when vector registers put to use. + assert(NumVectorRegs == 0); + // Copy misc. registers copyMiscRegs(src, dest); diff --git a/src/arch/sparc/isa.hh b/src/arch/sparc/isa.hh index 1d2a457d2..51e797c90 100644 --- a/src/arch/sparc/isa.hh +++ b/src/arch/sparc/isa.hh @@ -211,6 +211,13 @@ class ISA : public SimObject return reg; } + // dummy + int + flattenVectorIndex(int reg) const + { + return reg; + } + int flattenMiscIndex(int reg) const { diff --git a/src/arch/sparc/registers.hh b/src/arch/sparc/registers.hh index b25f34584..a59139ba2 100644 --- a/src/arch/sparc/registers.hh +++ b/src/arch/sparc/registers.hh @@ -51,6 +51,11 @@ typedef uint32_t FloatRegBits; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; +// vector register file entry type +typedef uint64_t VectorRegElement; +const int NumVectorRegElements = 0; +const int VectorRegBytes = NumVectorRegElements * sizeof(VectorRegElement); +typedef std::array<VectorRegElement, NumVectorRegElements> VectorReg; typedef union { @@ -75,6 +80,7 @@ const int SyscallPseudoReturnReg = 9; const int NumIntArchRegs = 32; const int NumIntRegs = (MaxGL + 1) * 8 + NWindows * 16 + NumMicroIntRegs; const int NumCCRegs = 0; +const int NumVectorRegs = 0; const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumMiscRegs; @@ -82,7 +88,8 @@ const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumMiscRegs; enum DependenceTags { FP_Reg_Base = NumIntRegs, CC_Reg_Base = FP_Reg_Base + NumFloatRegs, - Misc_Reg_Base = CC_Reg_Base + NumCCRegs, // NumCCRegs == 0 + Vector_Reg_Base = CC_Reg_Base + NumCCRegs, // NumCCRegs == 0 + Misc_Reg_Base = Vector_Reg_Base + NumVectorRegs, // NumVectorRegs == 0 Max_Reg_Index = Misc_Reg_Base + NumMiscRegs, }; diff --git a/src/arch/sparc/utility.cc b/src/arch/sparc/utility.cc index 34d4f79b3..6d7a1ba95 100644 --- a/src/arch/sparc/utility.cc +++ b/src/arch/sparc/utility.cc @@ -237,6 +237,9 @@ copyRegs(ThreadContext *src, ThreadContext *dest) // Would need to add condition-code regs if implemented assert(NumCCRegs == 0); + // Copy vector registers when vector registers put to use. + assert(NumVectorRegs == 0); + // Copy misc. registers copyMiscRegs(src, dest); diff --git a/src/arch/x86/insts/static_inst.cc b/src/arch/x86/insts/static_inst.cc index 39091289f..49ea6ef4e 100644 --- a/src/arch/x86/insts/static_inst.cc +++ b/src/arch/x86/insts/static_inst.cc @@ -225,12 +225,19 @@ namespace X86ISA ccprintf(os, "%%cc%d", rel_reg); break; + case VectorRegClass: + ccprintf(os, "%%cc%d", rel_reg); + break; + case MiscRegClass: switch (rel_reg) { default: ccprintf(os, "%%ctrl%d", rel_reg); } break; + + default: + panic("Invalid register class!\n"); } } diff --git a/src/arch/x86/isa.hh b/src/arch/x86/isa.hh index 88f4980ae..779241c55 100644 --- a/src/arch/x86/isa.hh +++ b/src/arch/x86/isa.hh @@ -92,6 +92,12 @@ namespace X86ISA } int + flattenVectorIndex(int reg) const + { + return reg; + } + + int flattenMiscIndex(int reg) const { return reg; diff --git a/src/arch/x86/registers.hh b/src/arch/x86/registers.hh index ebd88136e..ad40fe17f 100644 --- a/src/arch/x86/registers.hh +++ b/src/arch/x86/registers.hh @@ -57,6 +57,7 @@ const int NumMiscRegs = NUM_MISCREGS; const int NumIntArchRegs = NUM_INTREGS; const int NumIntRegs = NumIntArchRegs + NumMicroIntRegs + NumImplicitIntRegs; const int NumCCRegs = NUM_CCREGS; +const int NumVectorRegs = 0; #define ISA_HAS_CC_REGS @@ -72,7 +73,8 @@ enum DependenceTags { // we just start at (1 << 7) == 128. FP_Reg_Base = 128, CC_Reg_Base = FP_Reg_Base + NumFloatRegs, - Misc_Reg_Base = CC_Reg_Base + NumCCRegs, + Vector_Reg_Base = CC_Reg_Base + NumCCRegs, + Misc_Reg_Base = Vector_Reg_Base + NumVectorRegs, Max_Reg_Index = Misc_Reg_Base + NumMiscRegs }; @@ -91,6 +93,13 @@ const int SyscallPseudoReturnReg = INTREG_RDX; typedef uint64_t IntReg; typedef uint64_t CCReg; + +// vector register file entry type +typedef uint64_t VectorRegElement; +const int NumVectorRegElements = 0; +const int VectorRegBytes = NumVectorRegElements * sizeof(VectorRegElement); +typedef std::array<VectorRegElement, NumVectorRegElements> VectorReg; + //XXX Should this be a 128 bit structure for XMM memory ops? typedef uint64_t LargestRead; typedef uint64_t MiscReg; diff --git a/src/arch/x86/utility.cc b/src/arch/x86/utility.cc index f7d0f816e..e1be61180 100644 --- a/src/arch/x86/utility.cc +++ b/src/arch/x86/utility.cc @@ -245,6 +245,10 @@ copyRegs(ThreadContext *src, ThreadContext *dest) //copy condition-code regs for (int i = 0; i < NumCCRegs; ++i) dest->setCCRegFlat(i, src->readCCRegFlat(i)); + + // copy vector regs when added to the architecture + assert(NumVectorRegs == 0); + copyMiscRegs(src, dest); dest->pcState(src->pcState()); } diff --git a/src/cpu/StaticInstFlags.py b/src/cpu/StaticInstFlags.py index ef29726fc..3b00e5df8 100644 --- a/src/cpu/StaticInstFlags.py +++ b/src/cpu/StaticInstFlags.py @@ -55,8 +55,8 @@ class StaticInstFlags(Enum): vals = [ 'IsNop', # Is a no-op (no effect at all). - 'IsInteger', # References integer regs. - 'IsFloating', # References FP regs. + 'IsInteger', # References scalar integer regs. + 'IsFloating', # References scalar FP regs. 'IsCC', # References CC regs. 'IsMemRef', # References memory (load, store, or prefetch) @@ -108,5 +108,6 @@ class StaticInstFlags(Enum): 'IsMicroBranch', # This microop branches within the microcode for # a macroop 'IsDspOp', - 'IsSquashAfter' # Squash all uncommitted state after executed + 'IsSquashAfter', # Squash all uncommitted state after executed + 'IsVector', # References vector register. ] diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 5b54679c9..515df6821 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -99,10 +99,19 @@ class BaseDynInst : public ExecContext, public RefCounted union Result { uint64_t integer; double dbl; + + // I am assuming that vector register type is different from the two + // types used above. Else it seems useless to have a separate typedef + // for vector registers. + VectorReg vector; + void set(uint64_t i) { integer = i; } void set(double d) { dbl = d; } + void set(const VectorReg &v) { vector = v; } + void get(uint64_t& i) { i = integer; } void get(double& d) { d = dbl; } + void get(VectorReg& v) { v = vector; } }; protected: @@ -521,6 +530,9 @@ class BaseDynInst : public ExecContext, public RefCounted bool isDataPrefetch() const { return staticInst->isDataPrefetch(); } bool isInteger() const { return staticInst->isInteger(); } bool isFloating() const { return staticInst->isFloating(); } + bool isVector() const { return staticInst->isVector(); } + bool isCC() const { return staticInst->isCC(); } + bool isControl() const { return staticInst->isControl(); } bool isCall() const { return staticInst->isCall(); } bool isReturn() const { return staticInst->isReturn(); } @@ -550,6 +562,11 @@ class BaseDynInst : public ExecContext, public RefCounted bool isFirstMicroop() const { return staticInst->isFirstMicroop(); } bool isMicroBranch() const { return staticInst->isMicroBranch(); } + void printFlags(std::ostream &outs, const std::string &separator) const + { staticInst->printFlags(outs, separator); } + + std::string getName() const { return staticInst->getName(); } + /** Temporarily sets this instruction as a serialize before instruction. */ void setSerializeBefore() { status.set(SerializeBefore); } @@ -596,6 +613,8 @@ class BaseDynInst : public ExecContext, public RefCounted int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); } int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } int8_t numCCDestRegs() const { return staticInst->numCCDestRegs(); } + int8_t numVectorDestRegs() const + { return staticInst->numVectorDestRegs(); } /** Returns the logical register index of the i'th destination register. */ RegIndex destRegIdx(int i) const { return staticInst->destRegIdx(i); } @@ -655,6 +674,13 @@ class BaseDynInst : public ExecContext, public RefCounted setResult<uint64_t>(val); } + /** Records a vector register being set to a value. */ + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + setResult<const VectorReg &>(val); + } + /** Records that one of the source registers is ready. */ void markSrcRegReady(); diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index a363b6d0f..6d75f7c12 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -94,6 +94,7 @@ class CheckerCPU : public BaseCPU, public ExecContext typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscReg MiscReg; + typedef TheISA::VectorReg VectorReg; /** id attached to all issued requests */ MasterID masterId; @@ -145,10 +146,19 @@ class CheckerCPU : public BaseCPU, public ExecContext union Result { uint64_t integer; double dbl; + + // I am assuming that vector register type is different from the two + // types used above. Else it seems useless to have a separate typedef + // for vector registers. + VectorReg vector; + void set(uint64_t i) { integer = i; } void set(double d) { dbl = d; } + void set(const VectorReg &v) { vector = v; } + void get(uint64_t& i) { i = integer; } void get(double& d) { d = dbl; } + void get(VectorReg& v) { v = vector; } }; // ISAs like ARM can have multiple destination registers to check, @@ -231,6 +241,11 @@ class CheckerCPU : public BaseCPU, public ExecContext return thread->readCCReg(reg_idx); } + const VectorReg &readVectorRegOperand(const StaticInst *si, int idx) + { + return thread->readVectorReg(si->srcRegIdx(idx)); + } + template <class T> void setResult(T t) { @@ -267,6 +282,13 @@ class CheckerCPU : public BaseCPU, public ExecContext setResult<uint64_t>(val); } + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + thread->setVectorReg(si->destRegIdx(idx), val); + setResult<VectorReg>(val); + } + bool readPredicate() { return thread->readPredicate(); } void setPredicate(bool val) { @@ -441,7 +463,7 @@ class Checker : public CheckerCPU void validateExecution(DynInstPtr &inst); void validateState(); - void copyResult(DynInstPtr &inst, uint64_t mismatch_val, int start_idx); + void copyResult(DynInstPtr &inst, Result mismatch_val, int start_idx); void handlePendingInt(); private: diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index 289861521..d6a467358 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -491,7 +491,9 @@ Checker<Impl>::validateExecution(DynInstPtr &inst) // Unverifiable instructions assume they were executed // properly by the CPU. Grab the result from the // instruction and write it to the register. - copyResult(inst, 0, idx); + Result r; + r.integer = 0; + copyResult(inst, r, idx); } else if (inst->numDestRegs() > 0 && !result.empty()) { DPRINTF(Checker, "Dest regs %d, number of checker dest regs %d\n", inst->numDestRegs(), result.size()); @@ -525,7 +527,9 @@ Checker<Impl>::validateExecution(DynInstPtr &inst) // The load/store queue in Detailed CPU can also cause problems // if load/store forwarding is allowed. if (inst->isLoad() && warnOnlyOnLoadError) { - copyResult(inst, inst_val, idx); + Result r; + r.integer = inst_val; + copyResult(inst, r, idx); } else { handleError(inst); } @@ -590,7 +594,7 @@ Checker<Impl>::validateState() template <class Impl> void -Checker<Impl>::copyResult(DynInstPtr &inst, uint64_t mismatch_val, +Checker<Impl>::copyResult(DynInstPtr &inst, Result mismatch_val, int start_idx) { // We've already popped one dest off the queue, @@ -599,39 +603,65 @@ Checker<Impl>::copyResult(DynInstPtr &inst, uint64_t mismatch_val, RegIndex idx = inst->destRegIdx(start_idx); switch (regIdxToClass(idx)) { case IntRegClass: - thread->setIntReg(idx, mismatch_val); + thread->setIntReg(idx, mismatch_val.integer); break; case FloatRegClass: - thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, mismatch_val); + thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, + mismatch_val.integer); break; case CCRegClass: - thread->setCCReg(idx - TheISA::CC_Reg_Base, mismatch_val); + thread->setCCReg(idx - TheISA::CC_Reg_Base, mismatch_val.integer); + break; + case VectorRegClass: + thread->setVectorReg(idx - TheISA::Vector_Reg_Base, + mismatch_val.vector); break; case MiscRegClass: thread->setMiscReg(idx - TheISA::Misc_Reg_Base, - mismatch_val); + mismatch_val.integer); break; } } + start_idx++; - uint64_t res = 0; for (int i = start_idx; i < inst->numDestRegs(); i++) { RegIndex idx = inst->destRegIdx(i); - inst->template popResult<uint64_t>(res); switch (regIdxToClass(idx)) { - case IntRegClass: - thread->setIntReg(idx, res); - break; - case FloatRegClass: - thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, res); - break; - case CCRegClass: - thread->setCCReg(idx - TheISA::CC_Reg_Base, res); - break; - case MiscRegClass: - // Try to get the proper misc register index for ARM here... - thread->setMiscReg(idx - TheISA::Misc_Reg_Base, res); - break; + case IntRegClass: { + uint64_t res = 0; + inst->template popResult<uint64_t>(res); + thread->setIntReg(idx, res); + } + break; + + case FloatRegClass: { + uint64_t res = 0; + inst->template popResult<uint64_t>(res); + thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, res); + } + break; + + case CCRegClass: { + uint64_t res = 0; + inst->template popResult<uint64_t>(res); + thread->setCCReg(idx - TheISA::CC_Reg_Base, res); + } + break; + + case VectorRegClass: { + VectorReg res; + inst->template popResult<VectorReg>(res); + thread->setVectorReg(idx - TheISA::Vector_Reg_Base, res); + } + break; + + case MiscRegClass: { + // Try to get the proper misc register index for ARM here... + uint64_t res = 0; + inst->template popResult<uint64_t>(res); + thread->setMiscReg(idx - TheISA::Misc_Reg_Base, res); + } + break; // else Register is out of range... } } diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index 71c231ba0..436c97847 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -216,6 +216,9 @@ class CheckerThreadContext : public ThreadContext CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } + const VectorReg &readVectorReg(int reg_idx) + { return actualTC->readVectorReg(reg_idx); } + void setIntReg(int reg_idx, uint64_t val) { actualTC->setIntReg(reg_idx, val); @@ -240,6 +243,12 @@ class CheckerThreadContext : public ThreadContext checkerTC->setCCReg(reg_idx, val); } + void setVectorReg(int reg_idx, const VectorReg &val) + { + actualTC->setVectorReg(reg_idx, val); + checkerTC->setVectorReg(reg_idx, val); + } + /** Reads this thread's PC state. */ TheISA::PCState pcState() { return actualTC->pcState(); } @@ -296,6 +305,7 @@ class CheckerThreadContext : public ThreadContext int flattenIntIndex(int reg) { return actualTC->flattenIntIndex(reg); } int flattenFloatIndex(int reg) { return actualTC->flattenFloatIndex(reg); } int flattenCCIndex(int reg) { return actualTC->flattenCCIndex(reg); } + int flattenVectorIndex(int reg) { return actualTC->flattenVectorIndex(reg); } int flattenMiscIndex(int reg) { return actualTC->flattenMiscIndex(reg); } unsigned readStCondFailures() @@ -331,6 +341,12 @@ class CheckerThreadContext : public ThreadContext void setCCRegFlat(int idx, CCReg val) { actualTC->setCCRegFlat(idx, val); } + + const VectorReg &readVectorRegFlat(int idx) + { return actualTC->readVectorRegFlat(idx); } + + void setVectorRegFlat(int idx, const VectorReg &val) + { actualTC->setVectorRegFlat(idx, val); } }; #endif // __CPU_CHECKER_EXEC_CONTEXT_HH__ diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh index c65841db2..5c6b3fad7 100644 --- a/src/cpu/exec_context.hh +++ b/src/cpu/exec_context.hh @@ -76,6 +76,7 @@ class ExecContext { typedef TheISA::MiscReg MiscReg; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; public: /** @@ -128,6 +129,22 @@ class ExecContext { /** * @{ + * @name Vector Register Interfaces + * + */ + + /** Reads a vector register. */ + virtual const VectorReg &readVectorRegOperand (const StaticInst *si, + int idx) = 0; + + /** Sets a vector register to a value. */ + virtual void setVectorRegOperand(const StaticInst *si, + int idx, const VectorReg &val) = 0; + + /** @} */ + + /** + * @{ * @name Misc Register Interfaces */ virtual MiscReg readMiscRegOperand(const StaticInst *si, int idx) = 0; diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc index ab08e6b4a..03cf785ef 100644 --- a/src/cpu/minor/dyn_inst.cc +++ b/src/cpu/minor/dyn_inst.cc @@ -157,6 +157,8 @@ printRegName(std::ostream &os, TheISA::RegIndex reg) break; case CCRegClass: os << 'c' << static_cast<unsigned int>(reg - TheISA::CC_Reg_Base); + case VectorRegClass: + os << 'v' << static_cast<unsigned int>(reg - TheISA::Vector_Reg_Base); } } diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh index 80d5d9872..6ea74047c 100644 --- a/src/cpu/minor/exec_context.hh +++ b/src/cpu/minor/exec_context.hh @@ -140,6 +140,20 @@ class ExecContext : public ::ExecContext return thread.readFloatRegBits(reg_idx); } + TheISA::CCReg + readCCRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base; + return thread.readCCReg(reg_idx); + } + + const TheISA::VectorReg & + readVectorRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::Vector_Reg_Base; + return thread.readVectorReg(reg_idx); + } + void setIntRegOperand(const StaticInst *si, int idx, IntReg val) { @@ -162,6 +176,21 @@ class ExecContext : public ::ExecContext thread.setFloatRegBits(reg_idx, val); } + void + setCCRegOperand(const StaticInst *si, int idx, TheISA::CCReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base; + thread.setCCReg(reg_idx, val); + } + + void + setVectorRegOperand(const StaticInst *si, int idx, + const TheISA::VectorReg &val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::Vector_Reg_Base; + thread.setVectorReg(reg_idx, val); + } + bool readPredicate() { @@ -265,20 +294,6 @@ class ExecContext : public ::ExecContext thread.getDTBPtr()->demapPage(vaddr, asn); } - TheISA::CCReg - readCCRegOperand(const StaticInst *si, int idx) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base; - return thread.readCCReg(reg_idx); - } - - void - setCCRegOperand(const StaticInst *si, int idx, TheISA::CCReg val) - { - int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base; - thread.setCCReg(reg_idx, val); - } - void demapInstPage(Addr vaddr, uint64_t asn) { diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc index f6b1f7944..3eb09271a 100644 --- a/src/cpu/minor/scoreboard.cc +++ b/src/cpu/minor/scoreboard.cc @@ -71,6 +71,11 @@ Scoreboard::findIndex(RegIndex reg, Index &scoreboard_index) scoreboard_index = TheISA::NumIntRegs + reg - TheISA::FP_Reg_Base; ret = true; break; + case VectorRegClass: + scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs + + TheISA::NumFloatRegs + reg - TheISA::Vector_Reg_Base; + ret = true; + break; case MiscRegClass: /* Don't bother with Misc registers */ ret = false; @@ -99,6 +104,9 @@ flattenRegIndex(TheISA::RegIndex reg, ThreadContext *thread_context) case CCRegClass: ret = thread_context->flattenCCIndex(reg); break; + case VectorRegClass: + ret = thread_context->flattenVectorIndex(reg); + break; case MiscRegClass: /* Don't bother to flatten misc regs as we don't need them here */ /* return thread_context->flattenMiscIndex(reg); */ diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh index 711bcafb2..3a3a9d3c3 100644 --- a/src/cpu/minor/scoreboard.hh +++ b/src/cpu/minor/scoreboard.hh @@ -60,11 +60,13 @@ class Scoreboard : public Named { public: /** The number of registers in the Scoreboard. These - * are just the integer, CC and float registers packed + * are just the integer, CC, float and vector registers packed * together with integer regs in the range [0,NumIntRegs-1], - * CC regs in the range [NumIntRegs, NumIntRegs+NumCCRegs-1] - * and float regs in the range - * [NumIntRegs+NumCCRegs, NumFloatRegs+NumIntRegs+NumCCRegs-1] */ + * CC regs in the range [NumIntRegs, NumIntRegs + NumCCRegs - 1], + * float regs in the range + * [NumIntRegs + NumCCRegs, NumFloatRegs + NumIntRegs + NumCCRegs - 1] + * and vector regs in the range [NumFloatRegs + NumIntRegs + NumCCRegs, + * NumFloatRegs + NumIntRegs + NumCCRegs + NumVectorRegs - 1]*/ const unsigned numRegs; /** Type to use for thread context registers */ @@ -97,7 +99,7 @@ class Scoreboard : public Named Scoreboard(const std::string &name) : Named(name), numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs + - TheISA::NumFloatRegs), + TheISA::NumFloatRegs + TheISA::NumVectorRegs), numResults(numRegs, 0), numUnpredictableResults(numRegs, 0), fuIndices(numRegs, 0), diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index 92f96a3b6..d2220de82 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -114,6 +114,7 @@ class DerivO3CPU(BaseCPU): numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers") numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point " "registers") + # most ISAs don't use condition-code regs, so default is 0 _defaultNumPhysCCRegs = 0 if buildEnv['TARGET_ISA'] in ('arm','x86'): @@ -126,6 +127,12 @@ class DerivO3CPU(BaseCPU): _defaultNumPhysCCRegs = Self.numPhysIntRegs * 5 numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs, "Number of physical cc registers") + + # most ISAs don't use vector regs, so default is 0 + _defaultNumPhysVectorRegs = 0 + numPhysVectorRegs = Param.Unsigned(_defaultNumPhysVectorRegs, + "Number of physical vector registers") + numIQEntries = Param.Unsigned(64, "Number of instruction queue entries") numROBEntries = Param.Unsigned(192, "Number of reorder buffer entries") diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 026907a94..d8f39bbe4 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -170,7 +170,8 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs, - params->numPhysCCRegs), + params->numPhysCCRegs, + params->numPhysVectorRegs), freeList(name() + ".freelist", ®File), @@ -269,6 +270,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); + assert(params->numPhysVectorRegs >= numThreads * TheISA::NumVectorRegs); rename.setScoreboard(&scoreboard); iew.setScoreboard(&scoreboard); @@ -313,6 +315,12 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) renameMap[tid].setCCEntry(ridx, phys_reg); commitRenameMap[tid].setCCEntry(ridx, phys_reg); } + + for (RegIndex ridx = 0; ridx < TheISA::NumVectorRegs; ++ridx) { + PhysRegIndex phys_reg = freeList.getVectorReg(); + renameMap[tid].setVectorEntry(ridx, phys_reg); + commitRenameMap[tid].setVectorEntry(ridx, phys_reg); + } } rename.setRenameMap(renameMap); @@ -521,6 +529,16 @@ FullO3CPU<Impl>::regStats() .desc("number of cc regfile writes") .prereq(ccRegfileWrites); + vectorRegfileReads + .name(name() + ".vector_regfile_reads") + .desc("number of vector regfile reads") + .prereq(vectorRegfileReads); + + vectorRegfileWrites + .name(name() + ".vector_regfile_writes") + .desc("number of vector regfile writes") + .prereq(vectorRegfileWrites); + miscRegfileReads .name(name() + ".misc_regfile_reads") .desc("number of misc regfile reads") @@ -807,6 +825,18 @@ FullO3CPU<Impl>::insertThread(ThreadID tid) scoreboard.setReg(phys_reg); } + //Bind vector Regs to Rename Map + max_reg = TheISA::NumIntRegs + TheISA::NumFloatRegs + TheISA::NumCCRegs + + TheISA::NumVectorRegs; + for (int vreg = TheISA::NumIntRegs + TheISA::NumFloatRegs + + TheISA::NumCCRegs; + vreg < max_reg; vreg++) { + PhysRegIndex phys_reg = freeList.getVectorReg(); + + renameMap[tid].setEntry(vreg, phys_reg); + scoreboard.setReg(phys_reg); + } + //Copy Thread Data Into RegFile //this->copyFromTC(tid); @@ -860,6 +890,14 @@ FullO3CPU<Impl>::removeThread(ThreadID tid) freeList.addReg(phys_reg); } + // Unbind condition-code Regs from Rename Map + max_reg = TheISA::Vector_Reg_Base + TheISA::NumVectorRegs; + for (int vreg = TheISA::Vector_Reg_Base; vreg < max_reg; vreg++) { + PhysRegIndex phys_reg = renameMap[tid].lookup(vreg); + scoreboard.unsetReg(phys_reg); + freeList.addReg(phys_reg); + } + // Squash Throughout Pipeline DynInstPtr inst = commit.rob->readHeadInst(tid); InstSeqNum squash_seq_num = inst->seqNum; @@ -1259,6 +1297,14 @@ FullO3CPU<Impl>::readCCReg(int reg_idx) } template <class Impl> +const VectorReg & +FullO3CPU<Impl>::readVectorReg(int reg_idx) +{ + vectorRegfileReads++; + return regFile.readVectorReg(reg_idx); +} + +template <class Impl> void FullO3CPU<Impl>::setIntReg(int reg_idx, uint64_t val) { @@ -1291,6 +1337,14 @@ FullO3CPU<Impl>::setCCReg(int reg_idx, CCReg val) } template <class Impl> +void +FullO3CPU<Impl>::setVectorReg(int reg_idx, const VectorReg &val) +{ + vectorRegfileWrites++; + regFile.setVectorReg(reg_idx, val); +} + +template <class Impl> uint64_t FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid) { @@ -1331,6 +1385,16 @@ FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) } template <class Impl> +const VectorReg& +FullO3CPU<Impl>::readArchVectorReg(int reg_idx, ThreadID tid) +{ + vectorRegfileReads++; + PhysRegIndex phys_reg = commitRenameMap[tid].lookupVector(reg_idx); + + return regFile.readVectorReg(phys_reg); +} + +template <class Impl> void FullO3CPU<Impl>::setArchIntReg(int reg_idx, uint64_t val, ThreadID tid) { @@ -1371,6 +1435,16 @@ FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid) } template <class Impl> +void +FullO3CPU<Impl>::setArchVectorReg(int reg_idx, const VectorReg &val, + ThreadID tid) +{ + vectorRegfileWrites++; + PhysRegIndex phys_reg = commitRenameMap[tid].lookupVector(reg_idx); + regFile.setVectorReg(phys_reg, val); +} + +template <class Impl> TheISA::PCState FullO3CPU<Impl>::pcState(ThreadID tid) { diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index aa02ee2ea..f16450d19 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -427,6 +427,8 @@ class FullO3CPU : public BaseO3CPU TheISA::CCReg readCCReg(int reg_idx); + const TheISA::VectorReg &readVectorReg(int reg_idx); + void setIntReg(int reg_idx, uint64_t val); void setFloatReg(int reg_idx, TheISA::FloatReg val); @@ -435,6 +437,8 @@ class FullO3CPU : public BaseO3CPU void setCCReg(int reg_idx, TheISA::CCReg val); + void setVectorReg(int reg_idx, const TheISA::VectorReg &val); + uint64_t readArchIntReg(int reg_idx, ThreadID tid); float readArchFloatReg(int reg_idx, ThreadID tid); @@ -443,6 +447,8 @@ class FullO3CPU : public BaseO3CPU TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid); + const TheISA::VectorReg &readArchVectorReg(int reg_idx, ThreadID tid); + /** Architectural register accessors. Looks up in the commit * rename table to obtain the true physical index of the * architected register first, then accesses that physical @@ -456,6 +462,9 @@ class FullO3CPU : public BaseO3CPU void setArchCCReg(int reg_idx, TheISA::CCReg val, ThreadID tid); + void setArchVectorReg(int reg_idx, const TheISA::VectorReg &val, + ThreadID tid); + /** Sets the commit PC state of a specific thread. */ void pcState(const TheISA::PCState &newPCState, ThreadID tid); @@ -734,6 +743,9 @@ class FullO3CPU : public BaseO3CPU //number of CC register file accesses Stats::Scalar ccRegfileReads; Stats::Scalar ccRegfileWrites; + //number of integer register file accesses + Stats::Scalar vectorRegfileReads; + Stats::Scalar vectorRegfileWrites; //number of misc Stats::Scalar miscRegfileReads; Stats::Scalar miscRegfileWrites; diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 6740c601d..d19e4d461 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -74,6 +74,7 @@ class BaseO3DynInst : public BaseDynInst<Impl> typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; /** Misc register index type. */ typedef TheISA::MiscReg MiscReg; @@ -206,7 +207,6 @@ class BaseO3DynInst : public BaseDynInst<Impl> void forwardOldRegs() { - for (int idx = 0; idx < this->numDestRegs(); idx++) { PhysRegIndex prev_phys_reg = this->prevDestRegIdx(idx); TheISA::RegIndex original_dest_reg = @@ -224,6 +224,11 @@ class BaseO3DynInst : public BaseDynInst<Impl> this->setCCRegOperand(this->staticInst.get(), idx, this->cpu->readCCReg(prev_phys_reg)); break; + case VectorRegClass: + this->setVectorRegOperand(this->staticInst.get(), idx, + this->cpu->readVectorReg(prev_phys_reg)); + break; + case MiscRegClass: // no need to forward misc reg values break; @@ -272,6 +277,11 @@ class BaseO3DynInst : public BaseDynInst<Impl> return this->cpu->readCCReg(this->_srcRegIdx[idx]); } + const VectorReg &readVectorRegOperand(const StaticInst *si, int idx) + { + return this->cpu->readVectorReg(this->_srcRegIdx[idx]); + } + /** @todo: Make results into arrays so they can handle multiple dest * registers. */ @@ -300,6 +310,13 @@ class BaseO3DynInst : public BaseDynInst<Impl> BaseDynInst<Impl>::setCCRegOperand(si, idx, val); } + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + this->cpu->setVectorReg(this->_destRegIdx[idx], val); + BaseDynInst<Impl>::setVectorRegOperand(si, idx, val); + } + #if THE_ISA == MIPS_ISA MiscReg readRegOtherThread(int misc_reg, ThreadID tid) { diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index aa805e26e..d345d7ac8 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -109,6 +109,9 @@ class UnifiedFreeList /** The list of free condition-code registers. */ SimpleFreeList ccList; + /** The list of free vector registers. */ + SimpleFreeList vectorList; + /** * The register file object is used only to distinguish integer * from floating-point physical register indices. @@ -148,6 +151,9 @@ class UnifiedFreeList /** Gets a free cc register. */ PhysRegIndex getCCReg() { return ccList.getReg(); } + /** Gets a free vector register. */ + PhysRegIndex getVectorReg() { return vectorList.getReg(); } + /** Adds a register back to the free list. */ void addReg(PhysRegIndex freed_reg); @@ -160,6 +166,9 @@ class UnifiedFreeList /** Adds a cc register back to the free list. */ void addCCReg(PhysRegIndex freed_reg) { ccList.addReg(freed_reg); } + /** Adds a vector register back to the free list. */ + void addVectorReg(PhysRegIndex freed_reg) { vectorList.addReg(freed_reg); } + /** Checks if there are any free integer registers. */ bool hasFreeIntRegs() const { return intList.hasFreeRegs(); } @@ -169,6 +178,9 @@ class UnifiedFreeList /** Checks if there are any free cc registers. */ bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); } + /** Checks if there are any free vector registers. */ + bool hasFreeVectorRegs() const { return vectorList.hasFreeRegs(); } + /** Returns the number of free integer registers. */ unsigned numFreeIntRegs() const { return intList.numFreeRegs(); } @@ -177,6 +189,9 @@ class UnifiedFreeList /** Returns the number of free cc registers. */ unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); } + + /** Returns the number of free vector registers. */ + unsigned numFreeVectorRegs() const { return vectorList.numFreeRegs(); } }; inline void @@ -189,9 +204,11 @@ UnifiedFreeList::addReg(PhysRegIndex freed_reg) intList.addReg(freed_reg); } else if (regFile->isFloatPhysReg(freed_reg)) { floatList.addReg(freed_reg); - } else { - assert(regFile->isCCPhysReg(freed_reg)); + } else if (regFile->isCCPhysReg(freed_reg)) { ccList.addReg(freed_reg); + } else { + assert(regFile->isVectorPhysReg(freed_reg)); + vectorList.addReg(freed_reg); } // These assert conditions ensure that the number of free diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 7d359b992..e16843160 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -99,7 +99,7 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, // Set the number of total physical registers numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs + - params->numPhysCCRegs; + params->numPhysCCRegs + params->numPhysVectorRegs; //Create an entry for each physical register within the //dependency graph. diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index 96ce44bdd..a7476c5ec 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -37,15 +37,20 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, - unsigned _numPhysicalCCRegs) + unsigned _numPhysicalCCRegs, + unsigned _numPhysicalVectorRegs) : intRegFile(_numPhysicalIntRegs), floatRegFile(_numPhysicalFloatRegs), ccRegFile(_numPhysicalCCRegs), + vectorRegFile(_numPhysicalVectorRegs), baseFloatRegIndex(_numPhysicalIntRegs), baseCCRegIndex(_numPhysicalIntRegs + _numPhysicalFloatRegs), + baseVectorRegIndex(_numPhysicalIntRegs + _numPhysicalFloatRegs + + _numPhysicalCCRegs), totalNumRegs(_numPhysicalIntRegs + _numPhysicalFloatRegs - + _numPhysicalCCRegs) + + _numPhysicalCCRegs + + _numPhysicalVectorRegs) { if (TheISA::NumCCRegs == 0 && _numPhysicalCCRegs != 0) { // Just make this a warning and go ahead and allocate them @@ -53,6 +58,13 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, warn("Non-zero number of physical CC regs specified, even though\n" " ISA does not use them.\n"); } + + if (TheISA::NumVectorRegs == 0 && _numPhysicalVectorRegs != 0) { + // Just make this a warning and go ahead and allocate them + // anyway, to keep from having to add checks everywhere + warn("Non-zero number of physical vector regs specified, even though\n" + " ISA does not use them.\n"); + } } @@ -73,9 +85,15 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList) freeList->addFloatReg(reg_idx++); } - // The rest of the registers are the condition-code physical + // The next batch of registers are the condition-code physical // registers; put them onto the condition-code free list. - while (reg_idx < totalNumRegs) { + while (reg_idx < baseVectorRegIndex) { freeList->addCCReg(reg_idx++); } + + // The rest of the registers are the vector physical + // registers; put them onto the vector free list. + while (reg_idx < totalNumRegs) { + freeList->addVectorReg(reg_idx++); + } } diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 8b87725ca..71ca5015f 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -56,6 +56,7 @@ class PhysRegFile typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; typedef union { FloatReg d; @@ -71,6 +72,9 @@ class PhysRegFile /** Condition-code register file. */ std::vector<CCReg> ccRegFile; + /** Vector register file. */ + std::vector<VectorReg> vectorRegFile; + /** * The first floating-point physical register index. The physical * register file has a single continuous index space, with the @@ -93,6 +97,12 @@ class PhysRegFile */ unsigned baseCCRegIndex; + /** + * The first vector physical register index. The vector registers follow + * the condition-code registers. + */ + unsigned baseVectorRegIndex; + /** Total number of physical registers. */ unsigned totalNumRegs; @@ -103,7 +113,8 @@ class PhysRegFile */ PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, - unsigned _numPhysicalCCRegs); + unsigned _numPhysicalCCRegs, + unsigned _numPhysicalVectorRegs); /** * Destructor to free resources @@ -122,7 +133,11 @@ class PhysRegFile /** @return the number of condition-code physical registers. */ unsigned numCCPhysRegs() const - { return totalNumRegs - baseCCRegIndex; } + { return baseVectorRegIndex - baseCCRegIndex; } + + /** @return the number of vector physical registers. */ + unsigned numVectorPhysRegs() const + { return totalNumRegs - baseVectorRegIndex; } /** @return the total number of physical registers. */ unsigned totalNumPhysRegs() const { return totalNumRegs; } @@ -151,7 +166,16 @@ class PhysRegFile */ bool isCCPhysReg(PhysRegIndex reg_idx) { - return (baseCCRegIndex <= reg_idx && reg_idx < totalNumRegs); + return (baseCCRegIndex <= reg_idx && reg_idx < baseVectorRegIndex); + } + + /** + * @return true if the specified physical register index + * corresponds to a vector physical register. + */ + bool isVectorPhysReg(PhysRegIndex reg_idx) const + { + return baseVectorRegIndex <= reg_idx && reg_idx < totalNumRegs; } /** Reads an integer register. */ @@ -207,6 +231,18 @@ class PhysRegFile return ccRegFile[reg_offset]; } + /** Reads a vector register. */ + const VectorReg &readVectorReg(PhysRegIndex reg_idx) const + { + assert(isVectorPhysReg(reg_idx)); + + // Remove the base vector reg dependency. + PhysRegIndex reg_offset = reg_idx - baseVectorRegIndex; + + DPRINTF(IEW, "RegFile: Access to vector register %i\n", int(reg_idx)); + return vectorRegFile[reg_offset]; + } + /** Sets an integer register to the given value. */ void setIntReg(PhysRegIndex reg_idx, uint64_t val) { @@ -262,6 +298,16 @@ class PhysRegFile ccRegFile[reg_offset] = val; } + + /** Sets a vector register to the given value. */ + void setVectorReg(PhysRegIndex reg_idx, const VectorReg &val) + { + assert(isVectorPhysReg(reg_idx)); + // Remove the base vector reg dependency. + PhysRegIndex reg_offset = reg_idx - baseVectorRegIndex; + DPRINTF(IEW, "RegFile: Setting vector register %i\n", int(reg_idx)); + vectorRegFile[reg_offset] = val; + } }; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 43b7ba9aa..3da6fd4fa 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -69,7 +69,7 @@ DefaultRename<Impl>::DefaultRename(O3CPU *_cpu, DerivO3CPUParams *params) commitWidth(params->commitWidth), numThreads(params->numThreads), maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs - + params->numPhysCCRegs) + + params->numPhysCCRegs + params->numPhysVectorRegs) { if (renameWidth > Impl::MaxWidth) fatal("renameWidth (%d) is larger than compiled limit (%d),\n" @@ -635,7 +635,8 @@ DefaultRename<Impl>::renameInsts(ThreadID tid) // to rename to. Otherwise block. if (!renameMap[tid]->canRename(inst->numIntDestRegs(), inst->numFPDestRegs(), - inst->numCCDestRegs())) { + inst->numCCDestRegs(), + inst->numVectorDestRegs())) { DPRINTF(Rename, "Blocking due to lack of free " "physical registers to rename to.\n"); blockThisCycle = true; @@ -1016,6 +1017,11 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst, ThreadID tid) renamed_reg = map->lookupCC(flat_rel_src_reg); break; + case VectorRegClass: + flat_rel_src_reg = tc->flattenVectorIndex(rel_src_reg); + renamed_reg = map->lookupVector(flat_rel_src_reg); + break; + case MiscRegClass: // misc regs don't get flattened flat_rel_src_reg = rel_src_reg; @@ -1082,6 +1088,12 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst, ThreadID tid) flat_uni_dest_reg = flat_rel_dest_reg + TheISA::CC_Reg_Base; break; + case VectorRegClass: + flat_rel_dest_reg = tc->flattenVectorIndex(rel_dest_reg); + rename_result = map->renameVector(flat_rel_dest_reg); + flat_uni_dest_reg = flat_rel_dest_reg + TheISA::Vector_Reg_Base; + break; + case MiscRegClass: // misc regs don't get flattened flat_rel_dest_reg = rel_dest_reg; @@ -1156,7 +1168,7 @@ inline int DefaultRename<Impl>::calcFreeLQEntries(ThreadID tid) { int num_free = freeEntries[tid].lqEntries - - (loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ); + (loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ); DPRINTF(Rename, "calcFreeLQEntries: free lqEntries: %d, loadsInProgress: %d, " "loads dispatchedToLQ: %d\n", freeEntries[tid].lqEntries, loadsInProgress[tid], fromIEW->iewInfo[tid].dispatchedToLQ); @@ -1168,7 +1180,7 @@ inline int DefaultRename<Impl>::calcFreeSQEntries(ThreadID tid) { int num_free = freeEntries[tid].sqEntries - - (storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ); + (storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ); DPRINTF(Rename, "calcFreeSQEntries: free sqEntries: %d, storesInProgress: %d, " "stores dispatchedToSQ: %d\n", freeEntries[tid].sqEntries, storesInProgress[tid], fromIEW->iewInfo[tid].dispatchedToSQ); diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index b0232df20..27ddd8c63 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -99,6 +99,9 @@ UnifiedRenameMap::init(PhysRegFile *_regFile, floatMap.init(TheISA::NumFloatRegs, &(freeList->floatList), _floatZeroReg); ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1); + + vectorMap.init(TheISA::NumVectorRegs, &(freeList->vectorList), + (RegIndex)-1); } @@ -117,6 +120,9 @@ UnifiedRenameMap::rename(RegIndex arch_reg) case CCRegClass: return renameCC(rel_arch_reg); + case VectorRegClass: + return renameVector(rel_arch_reg); + case MiscRegClass: return renameMisc(rel_arch_reg); @@ -142,6 +148,9 @@ UnifiedRenameMap::lookup(RegIndex arch_reg) const case CCRegClass: return lookupCC(rel_arch_reg); + case VectorRegClass: + return lookupVector(rel_arch_reg); + case MiscRegClass: return lookupMisc(rel_arch_reg); @@ -166,6 +175,9 @@ UnifiedRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex phys_reg) case CCRegClass: return setCCEntry(rel_arch_reg, phys_reg); + case VectorRegClass: + return setVectorEntry(rel_arch_reg, phys_reg); + case MiscRegClass: // Misc registers do not actually rename, so don't change // their mappings. We end up here when a commit or squash diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh index 9d91f232e..37487c3d3 100644 --- a/src/cpu/o3/rename_map.hh +++ b/src/cpu/o3/rename_map.hh @@ -178,6 +178,9 @@ class UnifiedRenameMap /** The condition-code register rename map */ SimpleRenameMap ccMap; + /** The vector register rename map */ + SimpleRenameMap vectorMap; + public: typedef TheISA::RegIndex RegIndex; @@ -240,6 +243,17 @@ class UnifiedRenameMap } /** + * Perform rename() on a vector register, given a relative vector register + * index. + */ + RenameInfo renameVector(RegIndex rel_arch_reg) + { + RenameInfo info = vectorMap.rename(rel_arch_reg); + assert(regFile->isVectorPhysReg(info.first)); + return info; + } + + /** * Perform rename() on a misc register, given a relative * misc register index. */ @@ -297,6 +311,17 @@ class UnifiedRenameMap } /** + * Perform lookup() on a vector register, given a relative + * vector register index. + */ + PhysRegIndex lookupVector(RegIndex rel_arch_reg) const + { + PhysRegIndex phys_reg = vectorMap.lookup(rel_arch_reg); + assert(regFile->isVectorPhysReg(phys_reg)); + return phys_reg; + } + + /** * Perform lookup() on a misc register, given a relative * misc register index. */ @@ -349,6 +374,16 @@ class UnifiedRenameMap } /** + * Perform setEntry() on a vector register, given a relative vector + * register index. + */ + void setVectorEntry(RegIndex arch_reg, PhysRegIndex phys_reg) + { + assert(regFile->isVectorPhysReg(phys_reg)); + vectorMap.setEntry(arch_reg, phys_reg); + } + + /** * Return the minimum number of free entries across all of the * register classes. The minimum is used so we guarantee that * this number of entries is available regardless of which class @@ -362,11 +397,13 @@ class UnifiedRenameMap /** * Return whether there are enough registers to serve the request. */ - bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs) const + bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs, + uint32_t vectorRegs) const { return intRegs <= intMap.numFreeEntries() && floatRegs <= floatMap.numFreeEntries() && - ccRegs <= ccMap.numFreeEntries(); + ccRegs <= ccMap.numFreeEntries() && + vectorRegs <= vectorMap.numFreeEntries(); } }; diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index 87d87900c..6e9b054da 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -189,6 +189,10 @@ class O3ThreadContext : public ThreadContext return readCCRegFlat(flattenCCIndex(reg_idx)); } + virtual const VectorReg &readVectorReg(int reg_idx) { + return readVectorRegFlat(flattenVectorIndex(reg_idx)); + } + /** Sets an integer register to a value. */ virtual void setIntReg(int reg_idx, uint64_t val) { setIntRegFlat(flattenIntIndex(reg_idx), val); @@ -206,6 +210,10 @@ class O3ThreadContext : public ThreadContext setCCRegFlat(flattenCCIndex(reg_idx), val); } + virtual void setVectorReg(int reg_idx, const VectorReg &val) { + setVectorRegFlat(flattenVectorIndex(reg_idx), val); + } + /** Reads this thread's PC state. */ virtual TheISA::PCState pcState() { return cpu->pcState(thread->threadId()); } @@ -246,6 +254,7 @@ class O3ThreadContext : public ThreadContext virtual int flattenIntIndex(int reg); virtual int flattenFloatIndex(int reg); virtual int flattenCCIndex(int reg); + virtual int flattenVectorIndex(int reg); virtual int flattenMiscIndex(int reg); /** Returns the number of consecutive store conditional failures. */ @@ -291,6 +300,9 @@ class O3ThreadContext : public ThreadContext virtual CCReg readCCRegFlat(int idx); virtual void setCCRegFlat(int idx, CCReg val); + + virtual const VectorReg &readVectorRegFlat(int idx); + virtual void setVectorRegFlat(int idx, const VectorReg &val); }; #endif diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index e6a3d5083..ecdd9ebb9 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -216,6 +216,13 @@ O3ThreadContext<Impl>::readCCRegFlat(int reg_idx) } template <class Impl> +const TheISA::VectorReg & +O3ThreadContext<Impl>::readVectorRegFlat(int reg_idx) +{ + return cpu->readArchVectorReg(reg_idx, thread->threadId()); +} + +template <class Impl> void O3ThreadContext<Impl>::setIntRegFlat(int reg_idx, uint64_t val) { @@ -253,6 +260,15 @@ O3ThreadContext<Impl>::setCCRegFlat(int reg_idx, TheISA::CCReg val) template <class Impl> void +O3ThreadContext<Impl>::setVectorRegFlat(int reg_idx, + const TheISA::VectorReg &val) +{ + cpu->setArchVectorReg(reg_idx, val, thread->threadId()); + conditionalSquash(); +} + +template <class Impl> +void O3ThreadContext<Impl>::pcState(const TheISA::PCState &val) { cpu->pcState(val, thread->threadId()); @@ -292,6 +308,13 @@ O3ThreadContext<Impl>::flattenCCIndex(int reg) template <class Impl> int +O3ThreadContext<Impl>::flattenVectorIndex(int reg) +{ + return cpu->isa[thread->threadId()]->flattenVectorIndex(reg); +} + +template <class Impl> +int O3ThreadContext<Impl>::flattenMiscIndex(int reg) { return cpu->isa[thread->threadId()]->flattenMiscIndex(reg); diff --git a/src/cpu/reg_class.cc b/src/cpu/reg_class.cc index 1805eae13..0cb789fe1 100644 --- a/src/cpu/reg_class.cc +++ b/src/cpu/reg_class.cc @@ -34,5 +34,6 @@ const char *RegClassStrings[] = { "IntRegClass", "FloatRegClass", "CCRegClass", + "VectorRegClass", "MiscRegClass" }; diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh index 549ebab26..6c7b1b55d 100644 --- a/src/cpu/reg_class.hh +++ b/src/cpu/reg_class.hh @@ -42,6 +42,7 @@ enum RegClass { IntRegClass, ///< Integer register FloatRegClass, ///< Floating-point register CCRegClass, ///< Condition-code register + VectorRegClass, ///< Vector register MiscRegClass ///< Control (misc) register }; @@ -76,12 +77,15 @@ RegClass regIdxToClass(TheISA::RegIndex reg_idx, } else if (reg_idx < TheISA::CC_Reg_Base) { cl = FloatRegClass; offset = TheISA::FP_Reg_Base; - } else if (reg_idx < TheISA::Misc_Reg_Base) { + } else if (reg_idx < TheISA::Vector_Reg_Base) { // if there are no CC regs, the ISA should set // CC_Reg_Base == Misc_Reg_Base so the if above // never succeeds cl = CCRegClass; offset = TheISA::CC_Reg_Base; + } else if (reg_idx < TheISA::Misc_Reg_Base) { + cl = VectorRegClass; + offset = TheISA::Vector_Reg_Base; } else { cl = MiscRegClass; offset = TheISA::Misc_Reg_Base; diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 2f7247010..27e434132 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -87,6 +87,7 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; BPredUnit *branchPred; @@ -239,6 +240,10 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext Stats::Scalar numCCRegReads; Stats::Scalar numCCRegWrites; + //number of vector register file accesses + Stats::Scalar numVectorRegReads; + Stats::Scalar numVectorRegWrites; + // number of simulated memory references Stats::Scalar numMemRefs; Stats::Scalar numLoadInsts; @@ -325,6 +330,13 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext return thread->readCCReg(reg_idx); } + const VectorReg &readVectorRegOperand(const StaticInst *si, int idx) + { + numVectorRegReads++; + int reg_idx = si->srcRegIdx(idx) - TheISA::Vector_Reg_Base; + return thread->readVectorReg(reg_idx); + } + void setIntRegOperand(const StaticInst *si, int idx, IntReg val) { numIntRegWrites++; @@ -353,6 +365,14 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext thread->setCCReg(reg_idx, val); } + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + numVectorRegWrites++; + int reg_idx = si->destRegIdx(idx) - TheISA::Vector_Reg_Base; + thread->setVectorReg(reg_idx, val); + } + bool readPredicate() { return thread->readPredicate(); } void setPredicate(bool val) { diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 20acff6ee..070a00dc8 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -58,6 +58,7 @@ #include "debug/CCRegs.hh" #include "debug/FloatRegs.hh" #include "debug/IntRegs.hh" +#include "debug/VectorRegs.hh" #include "mem/page_table.hh" #include "mem/request.hh" #include "sim/byteswap.hh" @@ -102,6 +103,8 @@ class SimpleThread : public ThreadState typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; + public: typedef ThreadContext::Status Status; @@ -111,9 +114,15 @@ class SimpleThread : public ThreadState FloatRegBits i[TheISA::NumFloatRegs]; } floatRegs; TheISA::IntReg intRegs[TheISA::NumIntRegs]; + #ifdef ISA_HAS_CC_REGS TheISA::CCReg ccRegs[TheISA::NumCCRegs]; #endif + +#ifdef ISA_HAS_VECTOR_REGS + TheISA::VectorReg vectorRegs[TheISA::NumVectorRegs]; +#endif + TheISA::ISA *const isa; // one "instance" of the current ISA. TheISA::PCState _pcState; @@ -282,6 +291,16 @@ class SimpleThread : public ThreadState #endif } + const VectorReg &readVectorReg(int reg_idx) + { + int flatIndex = isa->flattenVectorIndex(reg_idx); + assert(0 <= flatIndex); + assert(flatIndex < TheISA::NumVectorRegs); + DPRINTF(VectorRegs, "Reading vector reg %d (%d).\n", + reg_idx, flatIndex); + return readVectorRegFlat(flatIndex); + } + void setIntReg(int reg_idx, uint64_t val) { int flatIndex = isa->flattenIntIndex(reg_idx); @@ -325,6 +344,19 @@ class SimpleThread : public ThreadState #endif } + void setVectorReg(int reg_idx, const VectorReg &val) + { +#ifdef ISA_HAS_VECTOR_REGS + int flatIndex = isa->flattenVectorIndex(reg_idx); + assert(flatIndex < TheISA::NumVectorRegs); + DPRINTF(VectorRegs, "Setting vector reg %d (%d).\n", + reg_idx, flatIndex); + setVectorRegFlat(flatIndex, val); +#else + panic("Tried to set a vector register."); +#endif + } + TheISA::PCState pcState() { @@ -414,6 +446,12 @@ class SimpleThread : public ThreadState } int + flattenVectorIndex(int reg) + { + return isa->flattenVectorIndex(reg); + } + + int flattenMiscIndex(int reg) { return isa->flattenMiscIndex(reg); @@ -450,6 +488,18 @@ class SimpleThread : public ThreadState void setCCRegFlat(int idx, CCReg val) { panic("setCCRegFlat w/no CC regs!\n"); } #endif + +#ifdef ISA_HAS_VECTOR_REGS + const VectorReg &readVectorRegFlat(int idx) { return vectorRegs[idx]; } + void setVectorRegFlat(int idx, const VectorReg &val) + { vectorRegs[idx] = val; } +#else + const VectorReg &readVectorRegFlat(int idx) + { panic("readVectorRegFlat w/no Vector regs!\n"); } + + void setVectorRegFlat(int idx, const VectorReg &val) + { panic("setVectorRegFlat w/no Vector regs!\n"); } +#endif }; diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 684a22856..58cf752b7 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -98,6 +98,7 @@ class StaticInst : public RefCounted, public StaticInstFlags int8_t _numFPDestRegs; int8_t _numIntDestRegs; int8_t _numCCDestRegs; + int8_t _numVectorDestRegs; //@} public: @@ -116,9 +117,10 @@ class StaticInst : public RefCounted, public StaticInstFlags int8_t numFPDestRegs() const { return _numFPDestRegs; } /// Number of integer destination regs. int8_t numIntDestRegs() const { return _numIntDestRegs; } - //@} - /// Number of coprocesor destination regs. + /// Number of condition code destination regs. int8_t numCCDestRegs() const { return _numCCDestRegs; } + /// Number of vector destination regs. + int8_t numVectorDestRegs() const { return _numVectorDestRegs; } //@} /// @name Flag accessors. @@ -140,6 +142,7 @@ class StaticInst : public RefCounted, public StaticInstFlags bool isInteger() const { return flags[IsInteger]; } bool isFloating() const { return flags[IsFloating]; } + bool isVector() const { return flags[IsVector]; } bool isCC() const { return flags[IsCC]; } bool isControl() const { return flags[IsControl]; } @@ -252,7 +255,8 @@ class StaticInst : public RefCounted, public StaticInstFlags StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass) : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0), _numFPDestRegs(0), _numIntDestRegs(0), _numCCDestRegs(0), - machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0) + _numVectorDestRegs(0), machInst(_machInst), mnemonic(_mnemonic), + cachedDisassembly(0) { } public: @@ -326,7 +330,7 @@ class StaticInst : public RefCounted, public StaticInstFlags void printFlags(std::ostream &outs, const std::string &separator) const; /// Return name of machine instruction - std::string getName() { return mnemonic; } + std::string getName() const { return mnemonic; } }; #endif // __CPU_STATIC_INST_HH__ diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc index fe1ae69dd..ce7604d3c 100644 --- a/src/cpu/thread_context.cc +++ b/src/cpu/thread_context.cc @@ -88,6 +88,15 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two) panic("CC reg idx %d doesn't match, one: %#x, two: %#x", i, t1, t2); } + + // loop through the Vector registers. + for (int i = 0; i < TheISA::NumVectorRegs; ++i) { + const TheISA::VectorReg &t1 = one->readVectorReg(i); + const TheISA::VectorReg &t2 = two->readVectorReg(i); + if (t1 != t2) + panic("Vector reg idx %d doesn't match", i); + } + if (!(one->pcState() == two->pcState())) panic("PC state doesn't match."); int id1 = one->cpuId(); @@ -127,6 +136,16 @@ serialize(ThreadContext &tc, CheckpointOut &cp) SERIALIZE_ARRAY(ccRegs, NumCCRegs); #endif +#ifdef ISA_HAS_VECTOR_REGS + VectorRegElement vectorRegs[NumVectorRegs * NumVectorRegElements]; + for (int i = 0; i < NumVectorRegs; ++i) { + const VectorReg &v = tc.readVectorRegFlat(i); + for (int j = 0; i < NumVectorRegElements; ++j) + vectorRegs[i * NumVectorRegElements + j] = v[j]; + } + SERIALIZE_ARRAY(vectorRegs, NumVectorRegs * NumVectorRegElements); +#endif + tc.pcState().serialize(cp); // thread_num and cpu_id are deterministic from the config @@ -156,6 +175,17 @@ unserialize(ThreadContext &tc, CheckpointIn &cp) tc.setCCRegFlat(i, ccRegs[i]); #endif +#ifdef ISA_HAS_VECTOR_REGS + VectorRegElement vectorRegs[NumVectorRegs * NumVectorRegElements]; + UNSERIALIZE_ARRAY(vectorRegs, NumVectorRegs * NumVectorRegElements); + for (int i = 0; i < NumVectorRegs; ++i) { + VectorReg v; + for (int j = 0; i < NumVectorRegElements; ++j) + v[j] = vectorRegs[i * NumVectorRegElements + j]; + tc.setVectorRegFlat(i, v); + } +#endif + PCState pcState; pcState.unserialize(cp); tc.pcState(pcState); diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 2544b19c6..cd8b98f0c 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -98,6 +98,7 @@ class ThreadContext typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; typedef TheISA::MiscReg MiscReg; public: @@ -205,6 +206,8 @@ class ThreadContext virtual CCReg readCCReg(int reg_idx) = 0; + virtual const VectorReg &readVectorReg(int reg_idx) = 0; + virtual void setIntReg(int reg_idx, uint64_t val) = 0; virtual void setFloatReg(int reg_idx, FloatReg val) = 0; @@ -213,6 +216,8 @@ class ThreadContext virtual void setCCReg(int reg_idx, CCReg val) = 0; + virtual void setVectorReg(int reg_idx, const VectorReg &val) = 0; + virtual TheISA::PCState pcState() = 0; virtual void pcState(const TheISA::PCState &val) = 0; @@ -236,6 +241,7 @@ class ThreadContext virtual int flattenIntIndex(int reg) = 0; virtual int flattenFloatIndex(int reg) = 0; virtual int flattenCCIndex(int reg) = 0; + virtual int flattenVectorIndex(int reg) = 0; virtual int flattenMiscIndex(int reg) = 0; virtual uint64_t @@ -291,6 +297,9 @@ class ThreadContext virtual CCReg readCCRegFlat(int idx) = 0; virtual void setCCRegFlat(int idx, CCReg val) = 0; + + virtual const VectorReg &readVectorRegFlat(int idx) = 0; + virtual void setVectorRegFlat(int idx, const VectorReg &val) = 0; /** @} */ }; @@ -402,6 +411,9 @@ class ProxyThreadContext : public ThreadContext CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } + const VectorReg &readVectorReg(int reg_idx) + { return actualTC->readVectorReg(reg_idx); } + void setIntReg(int reg_idx, uint64_t val) { actualTC->setIntReg(reg_idx, val); } @@ -414,6 +426,9 @@ class ProxyThreadContext : public ThreadContext void setCCReg(int reg_idx, CCReg val) { actualTC->setCCReg(reg_idx, val); } + void setVectorReg(int reg_idx, const VectorReg &val) + { actualTC->setVectorReg(reg_idx, val); } + TheISA::PCState pcState() { return actualTC->pcState(); } void pcState(const TheISA::PCState &val) { actualTC->pcState(val); } @@ -450,6 +465,9 @@ class ProxyThreadContext : public ThreadContext int flattenCCIndex(int reg) { return actualTC->flattenCCIndex(reg); } + int flattenVectorIndex(int reg) + { return actualTC->flattenVectorIndex(reg); } + int flattenMiscIndex(int reg) { return actualTC->flattenMiscIndex(reg); } @@ -487,6 +505,12 @@ class ProxyThreadContext : public ThreadContext void setCCRegFlat(int idx, CCReg val) { actualTC->setCCRegFlat(idx, val); } + + const VectorReg &readVectorRegFlat(int idx) + { return actualTC->readVectorRegFlat(idx); } + + void setVectorRegFlat(int idx, const VectorReg &val) + { actualTC->setVectorRegFlat(idx, val); } }; /** @{ */ diff --git a/src/sim/insttracer.hh b/src/sim/insttracer.hh index 6819c2199..3c954df26 100644 --- a/src/sim/insttracer.hh +++ b/src/sim/insttracer.hh @@ -58,6 +58,8 @@ namespace Trace { class InstRecord { protected: + typedef TheISA::VectorReg VectorReg; + Tick when; // The following fields are initialized by the constructor and @@ -97,6 +99,7 @@ class InstRecord union { uint64_t as_int; double as_double; + VectorReg as_vector; } data; /** @defgroup fetch_seq @@ -120,7 +123,8 @@ class InstRecord DataInt16 = 2, DataInt32 = 4, DataInt64 = 8, - DataDouble = 3 + DataDouble = 3, + DataVector = sizeof(VectorReg), } data_status; /** @ingroup memory @@ -173,6 +177,8 @@ class InstRecord void setData(int8_t d) { setData((uint8_t)d); } void setData(double d) { data.as_double = d; data_status = DataDouble; } + void setData(const VectorReg& v) + { data.as_vector = v; data_status = DataVector; } void setFetchSeq(InstSeqNum seq) { fetch_seq = seq; fetch_seq_valid = true; } @@ -198,6 +204,7 @@ class InstRecord uint64_t getIntData() const { return data.as_int; } double getFloatData() const { return data.as_double; } + const VectorReg &getVectorData() const { return data.as_vector; } int getDataStatus() const { return data_status; } InstSeqNum getFetchSeq() const { return fetch_seq; } |