From 4d8f4db8d135a23ceb5d54d3096e0598dd31e2fe Mon Sep 17 00:00:00 2001 From: Gene WU Date: Wed, 25 Aug 2010 19:10:42 -0500 Subject: ARM: Use fewer micro-ops for register update loads if possible. Allow some loads that update the base register to use just two micro-ops. three micro-ops are only used if the destination register matches the offset register or the PC is the destination regsiter. If the PC is updated it needs to be the last micro-op otherwise O3 will mispredict. --- src/arch/arm/isa/insts/ldr.isa | 46 ++++++++++++++++++++++++------------- src/arch/arm/isa/insts/macromem.isa | 38 ++++++++++++++++++++++++++---- src/arch/arm/isa/insts/mem.isa | 42 +++++++++++++++++++++++++-------- src/arch/arm/isa/insts/str.isa | 37 +++++++++++++++++++---------- 4 files changed, 121 insertions(+), 42 deletions(-) (limited to 'src/arch/arm/isa/insts') diff --git a/src/arch/arm/isa/insts/ldr.isa b/src/arch/arm/isa/insts/ldr.isa index a936ffaaf..38a458b23 100644 --- a/src/arch/arm/isa/insts/ldr.isa +++ b/src/arch/arm/isa/insts/ldr.isa @@ -67,7 +67,7 @@ let {{ self.memFlags = ["ArmISA::TLB::MustBeOne"] self.codeBlobs = {"postacc_code" : ""} - def emitHelper(self, base = 'Memory'): + def emitHelper(self, base = 'Memory', wbDecl = None): global header_output, decoder_output, exec_output @@ -76,7 +76,7 @@ let {{ (newHeader, newDecoder, newExec) = self.fillTemplates(self.name, self.Name, codeBlobs, - self.memFlags, [], base) + self.memFlags, [], base, wbDecl) header_output += newHeader decoder_output += newDecoder @@ -113,22 +113,36 @@ let {{ Cpsr = ~CondCodesMask & newCpsr; CondCodes = CondCodesMask & newCpsr; ''' - if self.writeback: - accCode += "Base = Base + %s;\n" % wbDiff self.codeBlobs["memacc_code"] = accCode - self.emitHelper('RfeOp') + wbDecl = None + if self.writeback: + wbDecl = "MicroAddiUop(machInst, base, base, %d);" % wbDiff + self.emitHelper('RfeOp', wbDecl) class LoadImmInst(LoadInst): def __init__(self, *args, **kargs): super(LoadImmInst, self).__init__(*args, **kargs) self.offset = self.op + " imm" + if self.add: + self.wbDecl = "MicroAddiUop(machInst, base, base, imm);" + else: + self.wbDecl = "MicroSubiUop(machInst, base, base, imm);" + class LoadRegInst(LoadInst): def __init__(self, *args, **kargs): super(LoadRegInst, self).__init__(*args, **kargs) self.offset = self.op + " shift_rm_imm(Index, shiftAmt," + \ " shiftType, CondCodes<29:>)" + if self.add: + self.wbDecl = ''' + MicroAddUop(machInst, base, base, wbIndexReg, shiftAmt, shiftType); + ''' + else: + self.wbDecl = ''' + MicroSubUop(machInst, base, base, wbIndexReg, shiftAmt, shiftType); + ''' class LoadSingle(LoadInst): def __init__(self, *args, **kargs): @@ -175,20 +189,20 @@ let {{ accCode = "IWDest = cSwap(Mem%s, ((CPSR)Cpsr).e);" accCode = accCode % buildMemSuffix(self.sign, self.size) - if self.writeback: - accCode += "Base = Base %s;\n" % self.offset - self.codeBlobs["memacc_code"] = accCode # Push it out to the output files base = buildMemBase(self.basePrefix, self.post, self.writeback) - self.emitHelper(base) + wbDecl = None + if self.writeback: + wbDecl = self.wbDecl + self.emitHelper(base, wbDecl) def loadImmClassName(post, add, writeback, size=4, sign=False, user=False): return memClassName("LOAD_IMM", post, add, writeback, size, sign, user) class LoadImm(LoadImmInst, LoadSingle): - decConstBase = 'LoadStoreImm' + decConstBase = 'LoadImm' basePrefix = 'MemoryImm' nameFunc = staticmethod(loadImmClassName) @@ -196,7 +210,7 @@ let {{ return memClassName("LOAD_REG", post, add, writeback, size, sign, user) class LoadReg(LoadRegInst, LoadSingle): - decConstBase = 'LoadStoreReg' + decConstBase = 'LoadReg' basePrefix = 'MemoryReg' nameFunc = staticmethod(loadRegClassName) @@ -244,14 +258,14 @@ let {{ FpDest2.uw = (uint32_t)(swappedMem >> 32); ''' - if self.writeback: - accCode += "Base = Base %s;\n" % self.offset - self.codeBlobs["memacc_code"] = accCode # Push it out to the output files base = buildMemBase(self.basePrefix, self.post, self.writeback) - self.emitHelper(base) + wbDecl = None + if self.writeback: + wbDecl = self.wbDecl + self.emitHelper(base, wbDecl) def loadDoubleImmClassName(post, add, writeback): return memClassName("LOAD_IMMD", post, add, writeback, 4, False, False) @@ -265,7 +279,7 @@ let {{ return memClassName("LOAD_REGD", post, add, writeback, 4, False, False) class LoadDoubleReg(LoadRegInst, LoadDouble): - decConstBase = 'LoadStoreDReg' + decConstBase = 'LoadDReg' basePrefix = 'MemoryDReg' nameFunc = staticmethod(loadDoubleRegClassName) diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa index bcb1e26b8..f595f4043 100644 --- a/src/arch/arm/isa/insts/macromem.isa +++ b/src/arch/arm/isa/insts/macromem.isa @@ -575,8 +575,12 @@ let {{ ['IsMicroop']) microAddUopIop = InstObjParams('add_uop', 'MicroAddUop', - 'MicroIntOp', - {'code': 'Ra = Rb + Rc;', + 'MicroIntRegOp', + {'code': + '''Ra = Rb + shift_rm_imm(Rc, shiftAmt, + shiftType, + CondCodes<29:>); + ''', 'predicate_test': predicateTest}, ['IsMicroop']) @@ -586,15 +590,39 @@ let {{ 'predicate_test': predicateTest}, ['IsMicroop']) + microSubUopIop = InstObjParams('sub_uop', 'MicroSubUop', + 'MicroIntRegOp', + {'code': + '''Ra = Rb - shift_rm_imm(Rc, shiftAmt, + shiftType, + CondCodes<29:>); + ''', + 'predicate_test': predicateTest}, + ['IsMicroop']) + + microUopRegMovIop = InstObjParams('uopReg_uop', 'MicroUopRegMov', + 'MicroIntMov', + {'code': 'IWRa = Rb;', + 'predicate_test': predicateTest}, + ['IsMicroop']) + header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \ MicroIntImmDeclare.subst(microSubiUopIop) + \ - MicroIntDeclare.subst(microAddUopIop) + MicroIntRegDeclare.subst(microAddUopIop) + \ + MicroIntRegDeclare.subst(microSubUopIop) + \ + MicroIntMovDeclare.subst(microUopRegMovIop) + decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \ MicroIntImmConstructor.subst(microSubiUopIop) + \ - MicroIntConstructor.subst(microAddUopIop) + MicroIntRegConstructor.subst(microAddUopIop) + \ + MicroIntRegConstructor.subst(microSubUopIop) + \ + MicroIntMovConstructor.subst(microUopRegMovIop) + exec_output = PredOpExecute.subst(microAddiUopIop) + \ PredOpExecute.subst(microSubiUopIop) + \ - PredOpExecute.subst(microAddUopIop) + PredOpExecute.subst(microAddUopIop) + \ + PredOpExecute.subst(microSubUopIop) + \ + PredOpExecute.subst(microUopRegMovIop) }}; let {{ diff --git a/src/arch/arm/isa/insts/mem.isa b/src/arch/arm/isa/insts/mem.isa index aa47d5b7f..507f8cd4b 100644 --- a/src/arch/arm/isa/insts/mem.isa +++ b/src/arch/arm/isa/insts/mem.isa @@ -48,7 +48,7 @@ let {{ self.constructTemplate = eval(self.decConstBase + 'Constructor') def fillTemplates(self, name, Name, codeBlobs, memFlags, instFlags, - base = 'Memory'): + base = 'Memory', wbDecl = None): # Make sure flags are in lists (convert to lists if not). memFlags = makeList(memFlags) instFlags = makeList(instFlags) @@ -62,14 +62,38 @@ let {{ codeBlobs["ea_code"] = eaCode - iop = InstObjParams(name, Name, base, codeBlobs, instFlags) - - # (header_output, decoder_output, decode_block, exec_output) - return (self.declareTemplate.subst(iop), - self.constructTemplate.subst(iop), - self.fullExecTemplate.subst(iop) - + self.initiateAccTemplate.subst(iop) - + self.completeAccTemplate.subst(iop)) + macroName = Name + instFlagsCopy = list(instFlags) + codeBlobsCopy = dict(codeBlobs) + if wbDecl is not None: + instFlagsCopy.append('IsMicroop') + Name = Name + 'Acc' + codeBlobsCopy['acc_name'] = Name + codeBlobsCopy['wb_decl'] = wbDecl + codeBlobsCopy['use_uops'] = 0 + + iop = InstObjParams(name, Name, base, + codeBlobsCopy, instFlagsCopy) + + header_output = self.declareTemplate.subst(iop) + decoder_output = self.constructTemplate.subst(iop) + exec_output = self.fullExecTemplate.subst(iop) + \ + self.initiateAccTemplate.subst(iop) + \ + self.completeAccTemplate.subst(iop) + + if wbDecl is not None: + iop = InstObjParams(name, macroName, base, + { "wb_decl" : wbDecl, + "acc_name" : Name, + "use_uops" : 1 }, + ['IsMacroop']) + header_output += self.declareTemplate.subst(iop) + decoder_output += self.constructTemplate.subst(iop) + exec_output += PanicExecute.subst(iop) + \ + PanicInitiateAcc.subst(iop) + \ + PanicCompleteAcc.subst(iop) + + return (header_output, decoder_output, exec_output) def pickPredicate(blobs): for val in blobs.values(): diff --git a/src/arch/arm/isa/insts/str.isa b/src/arch/arm/isa/insts/str.isa index 66a486ecf..ff98c58d2 100644 --- a/src/arch/arm/isa/insts/str.isa +++ b/src/arch/arm/isa/insts/str.isa @@ -67,7 +67,7 @@ let {{ self.memFlags = ["ArmISA::TLB::MustBeOne"] self.codeBlobs = { "postacc_code" : "" } - def emitHelper(self, base = 'Memory'): + def emitHelper(self, base = 'Memory', wbDecl = None): global header_output, decoder_output, exec_output @@ -76,7 +76,7 @@ let {{ (newHeader, newDecoder, newExec) = self.fillTemplates(self.name, self.Name, codeBlobs, - self.memFlags, [], base) + self.memFlags, [], base, wbDecl) header_output += newHeader decoder_output += newDecoder @@ -137,11 +137,24 @@ let {{ super(StoreImmInst, self).__init__(*args, **kargs) self.offset = self.op + " imm" + if self.add: + self.wbDecl = "MicroAddiUop(machInst, base, base, imm);" + else: + self.wbDecl = "MicroSubiUop(machInst, base, base, imm);" + class StoreRegInst(StoreInst): def __init__(self, *args, **kargs): super(StoreRegInst, self).__init__(*args, **kargs) self.offset = self.op + " shift_rm_imm(Index, shiftAmt," + \ " shiftType, CondCodes<29:>)" + if self.add: + self.wbDecl = ''' + MicroAddUop(machInst, base, base, index, shiftAmt, shiftType); + ''' + else: + self.wbDecl = ''' + MicroSubUop(machInst, base, base, index, shiftAmt, shiftType); + ''' class StoreSingle(StoreInst): def __init__(self, *args, **kargs): @@ -186,14 +199,14 @@ let {{ accCode = accCode % \ { "suffix" : buildMemSuffix(self.sign, self.size) } - if self.writeback: - accCode += "Base = Base %s;\n" % self.offset - self.codeBlobs["memacc_code"] = accCode # Push it out to the output files base = buildMemBase(self.basePrefix, self.post, self.writeback) - self.emitHelper(base) + wbDecl = None + if self.writeback: + wbDecl = self.wbDecl + self.emitHelper(base, wbDecl) def storeImmClassName(post, add, writeback, size=4, sign=False, user=False): return memClassName("STORE_IMM", post, add, writeback, size, sign, user) @@ -217,7 +230,7 @@ let {{ return memClassName("STORE_REG", post, add, writeback, size, sign, user) class StoreReg(StoreRegInst, StoreSingle): - decConstBase = 'LoadStoreReg' + decConstBase = 'StoreReg' basePrefix = 'MemoryReg' nameFunc = staticmethod(storeRegClassName) @@ -265,14 +278,14 @@ let {{ ((uint64_t)cSwap(Dest2.uw, cpsr.e) << 32); ''' - if self.writeback: - accCode += "Base = Base %s;\n" % self.offset - self.codeBlobs["memacc_code"] = accCode # Push it out to the output files base = buildMemBase(self.basePrefix, self.post, self.writeback) - self.emitHelper(base) + wbDecl = None + if self.writeback: + wbDecl = self.wbDecl + self.emitHelper(base, wbDecl) def storeDoubleImmClassName(post, add, writeback): return memClassName("STORE_IMMD", post, add, writeback, 4, False, False) @@ -296,7 +309,7 @@ let {{ return memClassName("STORE_REGD", post, add, writeback, 4, False, False) class StoreDoubleReg(StoreRegInst, StoreDouble): - decConstBase = 'LoadStoreDReg' + decConstBase = 'StoreDReg' basePrefix = 'MemoryDReg' nameFunc = staticmethod(storeDoubleRegClassName) -- cgit v1.2.3