diff options
author | Ali Saidi <Ali.Saidi@ARM.com> | 2011-03-17 19:24:37 -0500 |
---|---|---|
committer | Ali Saidi <Ali.Saidi@ARM.com> | 2011-03-17 19:24:37 -0500 |
commit | 6daf44dae6dbe931e2a1493cd0e33ca9732509dd (patch) | |
tree | f8815e56c08a92059b5d576728f564c7bfce7bf0 /src | |
parent | c4de6a05229bbc42ae4b247541c823edb8d4ca76 (diff) | |
parent | 63eb337b3b93ab71ab3157ec6487901d4fc6cda6 (diff) | |
download | gem5-6daf44dae6dbe931e2a1493cd0e33ca9732509dd.tar.xz |
Automated merge with ssh://hg@repo.m5sim.org/m5
Diffstat (limited to 'src')
52 files changed, 697 insertions, 178 deletions
diff --git a/src/arch/alpha/predecoder.hh b/src/arch/alpha/predecoder.hh index a8788051f..2f8c4c2ef 100644 --- a/src/arch/alpha/predecoder.hh +++ b/src/arch/alpha/predecoder.hh @@ -76,6 +76,12 @@ class Predecoder emiIsReady = false; } + void + reset(const ExtMachInst &old_emi) + { + reset(); + } + // Use this to give data to the predecoder. This should be used // when there is control flow. void diff --git a/src/arch/arm/insts/branch.hh b/src/arch/arm/insts/branch.hh index fbdd10d68..0e33a9214 100644 --- a/src/arch/arm/insts/branch.hh +++ b/src/arch/arm/insts/branch.hh @@ -57,6 +57,7 @@ class BranchImm : public PredOp int32_t _imm) : PredOp(mnem, _machInst, __opClass), imm(_imm) {} + }; // Conditionally Branch to a target computed with an immediate diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc index 2a45cf2e6..8a82bd319 100644 --- a/src/arch/arm/insts/macromem.cc +++ b/src/arch/arm/insts/macromem.cc @@ -58,8 +58,13 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, { uint32_t regs = reglist; uint32_t ones = number_of_ones(reglist); - // Remember that writeback adds a uop - numMicroops = ones + (writeback ? 1 : 0) + 1; + // Remember that writeback adds a uop or two and the temp register adds one + numMicroops = ones + (writeback ? (load ? 2 : 1) : 0) + 1; + + // It's technically legal to do a lot of nothing + if (!ones) + numMicroops = 1; + microOps = new StaticInstPtr[numMicroops]; uint32_t addr = 0; @@ -70,28 +75,13 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, addr += 4; StaticInstPtr *uop = microOps; - StaticInstPtr wbUop; - if (writeback) { - if (up) { - wbUop = new MicroAddiUop(machInst, rn, rn, ones * 4); - } else { - wbUop = new MicroSubiUop(machInst, rn, rn, ones * 4); - } - } // Add 0 to Rn and stick it in ureg0. // This is equivalent to a move. *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0); - // Write back at the start for loads. This covers the ldm exception return - // case where the base needs to be written in the old mode. Stores may need - // the original value of the base, but they don't change mode and can - // write back at the end like before. - if (load && writeback) { - *++uop = wbUop; - } - unsigned reg = 0; + unsigned regIdx = 0; bool force_user = user & !bits(reglist, 15); bool exception_ret = user & bits(reglist, 15); @@ -101,19 +91,28 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, reg++; replaceBits(regs, reg, 0); - unsigned regIdx = reg; + regIdx = reg; if (force_user) { regIdx = intRegInMode(MODE_USER, regIdx); } if (load) { - if (reg == INTREG_PC && exception_ret) { - // This must be the exception return form of ldm. - *++uop = new MicroLdrRetUop(machInst, regIdx, - INTREG_UREG0, up, addr); + if (writeback && i == ones - 1) { + // If it's a writeback and this is the last register + // do the load into a temporary register which we'll move + // into the final one later + *++uop = new MicroLdrUop(machInst, INTREG_UREG1, INTREG_UREG0, + up, addr); } else { - *++uop = new MicroLdrUop(machInst, regIdx, - INTREG_UREG0, up, addr); + // Otherwise just do it normally + if (reg == INTREG_PC && exception_ret) { + // This must be the exception return form of ldm. + *++uop = new MicroLdrRetUop(machInst, regIdx, + INTREG_UREG0, up, addr); + } else { + *++uop = new MicroLdrUop(machInst, regIdx, + INTREG_UREG0, up, addr); + } } } else { *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr); @@ -125,8 +124,32 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, addr -= 4; } - if (!load && writeback) { - *++uop = wbUop; + if (writeback && ones) { + // put the register update after we're done all loading + if (up) + *++uop = new MicroAddiUop(machInst, rn, rn, ones * 4); + else + *++uop = new MicroSubiUop(machInst, rn, rn, ones * 4); + + // If this was a load move the last temporary value into place + // this way we can't take an exception after we update the base + // register. + if (load && reg == INTREG_PC && exception_ret) { + *++uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1); + warn("creating instruction with exception return at curTick:%d\n", + curTick()); + } else if (load) { + *++uop = new MicroUopRegMov(machInst, regIdx, INTREG_UREG1); + if (reg == INTREG_PC) { + (*uop)->setFlag(StaticInstBase::IsControl); + (*uop)->setFlag(StaticInstBase::IsCondControl); + (*uop)->setFlag(StaticInstBase::IsIndirectControl); + // This is created as a RAS POP + if (rn == INTREG_SP) + (*uop)->setFlag(StaticInstBase::IsReturn); + + } + } } (*uop)->setLastMicroop(); @@ -896,6 +919,15 @@ MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + ss << "[PC,CPSR]"; + return ss.str(); +} + +std::string MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; diff --git a/src/arch/arm/insts/macromem.hh b/src/arch/arm/insts/macromem.hh index 1a2db8b9a..4933a1e7c 100644 --- a/src/arch/arm/insts/macromem.hh +++ b/src/arch/arm/insts/macromem.hh @@ -134,6 +134,27 @@ class MicroNeonMixLaneOp : public MicroNeonMixOp { } }; + +/** + * Microops of the form + * PC = IntRegA + * CPSR = IntRegB + */ +class MicroSetPCCPSR : public MicroOp +{ + protected: + IntRegIndex ura, urb, urc; + + MicroSetPCCPSR(const char *mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex _ura, IntRegIndex _urb, IntRegIndex _urc) + : MicroOp(mnem, machInst, __opClass), + ura(_ura), urb(_urb), urc(_urc) + { + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + /** * Microops of the form IntRegA = IntRegB */ diff --git a/src/arch/arm/insts/mem.hh b/src/arch/arm/insts/mem.hh index a4fc62603..324d86fed 100644 --- a/src/arch/arm/insts/mem.hh +++ b/src/arch/arm/insts/mem.hh @@ -97,14 +97,18 @@ class RfeOp : public MightBeMicro IntRegIndex base; AddrMode mode; bool wb; - static const unsigned numMicroops = 2; + IntRegIndex ura, urb, urc; + static const unsigned numMicroops = 3; StaticInstPtr *uops; RfeOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, IntRegIndex _base, AddrMode _mode, bool _wb) : MightBeMicro(mnem, _machInst, __opClass), - base(_base), mode(_mode), wb(_wb), uops(NULL) + base(_base), mode(_mode), wb(_wb), + ura(INTREG_UREG0), urb(INTREG_UREG1), + urc(INTREG_UREG2), + uops(NULL) {} virtual diff --git a/src/arch/arm/intregs.hh b/src/arch/arm/intregs.hh index 4b2cc560d..2cbed6c59 100644 --- a/src/arch/arm/intregs.hh +++ b/src/arch/arm/intregs.hh @@ -110,6 +110,8 @@ enum IntRegIndex INTREG_ZERO, // Dummy zero reg since there has to be one. INTREG_UREG0, + INTREG_UREG1, + INTREG_UREG2, INTREG_CONDCODES, INTREG_FPCONDCODES, diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index 4bdbe77ce..d720becba 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -143,6 +143,16 @@ ISA::clear() miscRegs[MISCREG_CPACR] = 0; miscRegs[MISCREG_FPSID] = 0x410430A0; + + // See section B4.1.84 of ARM ARM + // All values are latest for ARMv7-A profile + miscRegs[MISCREG_ID_ISAR0] = 0x01101111; + miscRegs[MISCREG_ID_ISAR1] = 0x02112111; + miscRegs[MISCREG_ID_ISAR2] = 0x21232141; + miscRegs[MISCREG_ID_ISAR3] = 0x01112131; + miscRegs[MISCREG_ID_ISAR4] = 0x10010142; + miscRegs[MISCREG_ID_ISAR5] = 0x00000000; + //XXX We need to initialize the rest of the state. } diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index 3a0cad1c5..2267ee34f 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -143,7 +143,7 @@ let {{ bits(machInst, 22) << 4); const uint32_t type = bits(machInst, 11, 8); uint32_t size = 0; - uint32_t align = 0; + uint32_t align = TLB::MustBeOne; unsigned inc = 1; unsigned regs = 1; unsigned lane = 0; diff --git a/src/arch/arm/isa/insts/branch.isa b/src/arch/arm/isa/insts/branch.isa index d8ea2118e..84b9bb720 100644 --- a/src/arch/arm/isa/insts/branch.isa +++ b/src/arch/arm/isa/insts/branch.isa @@ -48,6 +48,8 @@ let {{ bCode = ''' NPC = (uint32_t)(PC + imm); ''' + br_tgt_code = '''pcs.instNPC(branchPC.instPC() + imm);''' + instFlags = ["IsDirectControl"] if (link): bCode += ''' if (Thumb) @@ -55,12 +57,15 @@ let {{ else LR = PC - 4; ''' + instFlags += ["IsCall"] + bIop = InstObjParams(mnem, mnem.capitalize(), "BranchImmCond", - {"code": bCode, - "predicate_test": predicateTest}) + {"code": bCode, "predicate_test": predicateTest, + "brTgtCode" : br_tgt_code}, instFlags) header_output += BranchImmCondDeclare.subst(bIop) - decoder_output += BranchImmCondConstructor.subst(bIop) + decoder_output += BranchImmCondConstructor.subst(bIop) + \ + BranchTarget.subst(bIop) exec_output += PredOpExecute.subst(bIop) # BX, BLX @@ -81,15 +86,22 @@ let {{ # Since we're switching ISAs, the target ISA will be the opposite # of the current ISA. Thumb is whether the target is ARM. newPC = '(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))' + br_tgt_code = ''' + pcs.instNPC((branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) : + (branchPC.instPC() + imm))); + ''' base = "BranchImmCond" declare = BranchImmCondDeclare constructor = BranchImmCondConstructor + instFlags = ["IsDirectControl"] else: Name += "Reg" newPC = 'Op1' + br_tgt_code = '' base = "BranchRegCond" declare = BranchRegCondDeclare constructor = BranchRegCondConstructor + instFlags = ["IsIndirectControl"] if link and imm: linkStr = ''' // The immediate version of the blx thumb instruction @@ -100,6 +112,7 @@ let {{ else LR = PC - 4; ''' + instFlags += ["IsCall"] elif link: linkStr = ''' if (Thumb) @@ -107,14 +120,18 @@ let {{ else LR = PC - 4; ''' + instFlags += ["IsCall"] else: linkStr = "" + instFlags += ["IsReturn"] if imm and link: #blx with imm branchStr = ''' NextThumb = !Thumb; NPC = %(newPC)s; ''' + br_tgt_code = '''pcs.nextThumb(!branchPC.thumb());\n''' + \ + br_tgt_code else: branchStr = "IWNPC = %(newPC)s;" branchStr = branchStr % { "newPC" : newPC } @@ -123,11 +140,13 @@ let {{ "newPC": newPC, "branch": branchStr} blxIop = InstObjParams(mnem, Name, base, - {"code": code, - "predicate_test": predicateTest}) + {"code": code, "brTgtCode" : br_tgt_code, + "predicate_test": predicateTest}, instFlags) header_output += declare.subst(blxIop) decoder_output += constructor.subst(blxIop) exec_output += PredOpExecute.subst(blxIop) + if imm: + decoder_output += BranchTarget.subst(blxIop) #Ignore BXJ for now @@ -136,7 +155,8 @@ let {{ code = 'NPC = (uint32_t)(PC + imm);\n' predTest = "Op1 %(test)s 0" % {"test": test} iop = InstObjParams(mnem, mnem.capitalize(), "BranchImmReg", - {"code": code, "predicate_test": predTest}) + {"code": code, "predicate_test": predTest}, + ["IsIndirectControl"]) header_output += BranchImmRegDeclare.subst(iop) decoder_output += BranchImmRegConstructor.subst(iop) exec_output += PredOpExecute.subst(iop) @@ -164,7 +184,8 @@ let {{ iop = InstObjParams(mnem, mnem.capitalize(), "BranchRegReg", {'ea_code': eaCode, 'memacc_code': accCode, - 'predicate_test': predicateTest}) + 'predicate_test': predicateTest}, + ["IsIndirectControl"]) header_output += BranchTableDeclare.subst(iop) decoder_output += BranchRegRegConstructor.subst(iop) exec_output += LoadExecute.subst(iop) + \ diff --git a/src/arch/arm/isa/insts/ldr.isa b/src/arch/arm/isa/insts/ldr.isa index c60a91a50..2e45f2875 100644 --- a/src/arch/arm/isa/insts/ldr.isa +++ b/src/arch/arm/isa/insts/ldr.isa @@ -67,7 +67,7 @@ let {{ self.memFlags = ["ArmISA::TLB::MustBeOne"] self.codeBlobs = {"postacc_code" : ""} - def emitHelper(self, base = 'Memory', wbDecl = None, instFlags = []): + def emitHelper(self, base = 'Memory', wbDecl = None, instFlags = [], pcDecl = None): global header_output, decoder_output, exec_output @@ -76,7 +76,8 @@ let {{ (newHeader, newDecoder, newExec) = self.fillTemplates(self.name, self.Name, codeBlobs, - self.memFlags, instFlags, base, wbDecl) + self.memFlags, instFlags, base, + wbDecl, pcDecl) header_output += newHeader decoder_output += newDecoder @@ -104,26 +105,18 @@ let {{ wbDiff = 8 accCode = ''' CPSR cpsr = Cpsr; - SCTLR sctlr = Sctlr; - // Use the version of NPC that gets set before NextThumb - pNPC = cSwap<uint32_t>(Mem.ud, cpsr.e); - uint32_t tempSpsr = cSwap<uint32_t>(Mem.ud >> 32, cpsr.e); - uint32_t newCpsr = - cpsrWriteByInstr(cpsr | CondCodes, tempSpsr, - 0xF, true, sctlr.nmfi); - Cpsr = ~CondCodesMask & newCpsr; - NextThumb = ((CPSR)newCpsr).t; - NextJazelle = ((CPSR)newCpsr).j; - ForcedItState = ((((CPSR)tempSpsr).it2 << 2) & 0xFC) - | (((CPSR)tempSpsr).it1 & 0x3); - CondCodes = CondCodesMask & newCpsr; + URc = cpsr | CondCodes; + URa = cSwap<uint32_t>(Mem.ud, cpsr.e); + URb = cSwap<uint32_t>(Mem.ud >> 32, cpsr.e); ''' self.codeBlobs["memacc_code"] = accCode wbDecl = None + pcDecl = "MicroUopSetPCCPSR(machInst, INTREG_UREG0, INTREG_UREG1, INTREG_UREG2);" + if self.writeback: wbDecl = "MicroAddiUop(machInst, base, base, %d);" % wbDiff - self.emitHelper('RfeOp', wbDecl, ["IsSerializeAfter", "IsNonSpeculative"]) + self.emitHelper('RfeOp', wbDecl, ["IsSerializeAfter", "IsNonSpeculative"], pcDecl) class LoadImmInst(LoadInst): def __init__(self, *args, **kargs): diff --git a/src/arch/arm/isa/insts/m5ops.isa b/src/arch/arm/isa/insts/m5ops.isa index da3609bbc..8521cbc97 100644 --- a/src/arch/arm/isa/insts/m5ops.isa +++ b/src/arch/arm/isa/insts/m5ops.isa @@ -66,7 +66,7 @@ let {{ ["IsNonSpeculative", "IsQuiesce"]) header_output += BasicDeclare.subst(quiesceIop) decoder_output += BasicConstructor.subst(quiesceIop) - exec_output += PredOpExecute.subst(quiesceIop) + exec_output += QuiescePredOpExecute.subst(quiesceIop) quiesceNsCode = ''' #if FULL_SYSTEM @@ -80,7 +80,7 @@ let {{ ["IsNonSpeculative", "IsQuiesce"]) header_output += BasicDeclare.subst(quiesceNsIop) decoder_output += BasicConstructor.subst(quiesceNsIop) - exec_output += PredOpExecute.subst(quiesceNsIop) + exec_output += QuiescePredOpExecute.subst(quiesceNsIop) quiesceCyclesCode = ''' #if FULL_SYSTEM @@ -94,7 +94,7 @@ let {{ ["IsNonSpeculative", "IsQuiesce", "IsUnverifiable"]) header_output += BasicDeclare.subst(quiesceCyclesIop) decoder_output += BasicConstructor.subst(quiesceCyclesIop) - exec_output += PredOpExecute.subst(quiesceCyclesIop) + exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop) quiesceTimeCode = ''' #if FULL_SYSTEM diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa index 33d57a60b..15879e0e3 100644 --- a/src/arch/arm/isa/insts/macromem.isa +++ b/src/arch/arm/isa/insts/macromem.isa @@ -51,7 +51,7 @@ let {{ microLdrUopIop = InstObjParams('ldr_uop', 'MicroLdrUop', 'MicroMemOp', {'memacc_code': microLdrUopCode, - 'ea_code': 'EA = Rb + (up ? imm : -imm);', + 'ea_code': 'EA = URb + (up ? imm : -imm);', 'predicate_test': predicateTest}, ['IsMicroop']) @@ -60,7 +60,7 @@ let {{ 'MicroMemOp', {'memacc_code': microLdrFpUopCode, 'ea_code': vfpEnabledCheckCode + - 'EA = Rb + (up ? imm : -imm);', + 'EA = URb + (up ? imm : -imm);', 'predicate_test': predicateTest}, ['IsMicroop']) @@ -69,7 +69,7 @@ let {{ 'MicroMemOp', {'memacc_code': microLdrFpUopCode, 'ea_code': vfpEnabledCheckCode + ''' - EA = Rb + (up ? imm : -imm) + + EA = URb + (up ? imm : -imm) + (((CPSR)Cpsr).e ? 4 : 0); ''', 'predicate_test': predicateTest}, @@ -80,37 +80,40 @@ let {{ 'MicroMemOp', {'memacc_code': microLdrFpUopCode, 'ea_code': vfpEnabledCheckCode + ''' - EA = Rb + (up ? imm : -imm) - + EA = URb + (up ? imm : -imm) - (((CPSR)Cpsr).e ? 4 : 0); ''', 'predicate_test': predicateTest}, ['IsMicroop']) - microLdrRetUopCode = ''' + microRetUopCode = ''' CPSR cpsr = Cpsr; SCTLR sctlr = Sctlr; uint32_t newCpsr = cpsrWriteByInstr(cpsr | CondCodes, Spsr, 0xF, true, sctlr.nmfi); Cpsr = ~CondCodesMask & newCpsr; CondCodes = CondCodesMask & newCpsr; - IWNPC = cSwap(Mem.uw, cpsr.e) | ((Spsr & 0x20) ? 1 : 0); + IWNPC = cSwap(%s, cpsr.e) | ((Spsr & 0x20) ? 1 : 0); ForcedItState = ((((CPSR)Spsr).it2 << 2) & 0xFC) | (((CPSR)Spsr).it1 & 0x3); ''' + microLdrRetUopIop = InstObjParams('ldr_ret_uop', 'MicroLdrRetUop', 'MicroMemOp', - {'memacc_code': microLdrRetUopCode, + {'memacc_code': + microRetUopCode % 'Mem.uw', 'ea_code': - 'EA = Rb + (up ? imm : -imm);', + 'EA = URb + (up ? imm : -imm);', 'predicate_test': condPredicateTest}, - ['IsMicroop','IsNonSpeculative','IsSerializeAfter']) + ['IsMicroop','IsNonSpeculative', + 'IsSerializeAfter']) - microStrUopCode = "Mem = cSwap(Ra.uw, ((CPSR)Cpsr).e);" + microStrUopCode = "Mem = cSwap(URa.uw, ((CPSR)Cpsr).e);" microStrUopIop = InstObjParams('str_uop', 'MicroStrUop', 'MicroMemOp', {'memacc_code': microStrUopCode, 'postacc_code': "", - 'ea_code': 'EA = Rb + (up ? imm : -imm);', + 'ea_code': 'EA = URb + (up ? imm : -imm);', 'predicate_test': predicateTest}, ['IsMicroop']) @@ -120,7 +123,7 @@ let {{ {'memacc_code': microStrFpUopCode, 'postacc_code': "", 'ea_code': vfpEnabledCheckCode + - 'EA = Rb + (up ? imm : -imm);', + 'EA = URb + (up ? imm : -imm);', 'predicate_test': predicateTest}, ['IsMicroop']) @@ -130,7 +133,7 @@ let {{ {'memacc_code': microStrFpUopCode, 'postacc_code': "", 'ea_code': vfpEnabledCheckCode + ''' - EA = Rb + (up ? imm : -imm) + + EA = URb + (up ? imm : -imm) + (((CPSR)Cpsr).e ? 4 : 0); ''', 'predicate_test': predicateTest}, @@ -142,7 +145,7 @@ let {{ {'memacc_code': microStrFpUopCode, 'postacc_code': "", 'ea_code': vfpEnabledCheckCode + ''' - EA = Rb + (up ? imm : -imm) - + EA = URb + (up ? imm : -imm) - (((CPSR)Cpsr).e ? 4 : 0); ''', 'predicate_test': predicateTest}, @@ -170,7 +173,7 @@ let {{ let {{ exec_output = header_output = '' - eaCode = 'EA = Ra + imm;' + eaCode = 'EA = URa + imm;' for size in (1, 2, 3, 4, 6, 8, 12, 16): # Set up the memory access. @@ -572,14 +575,14 @@ let {{ let {{ microAddiUopIop = InstObjParams('addi_uop', 'MicroAddiUop', 'MicroIntImmOp', - {'code': 'Ra = Rb + imm;', + {'code': 'URa = URb + imm;', 'predicate_test': predicateTest}, ['IsMicroop']) microAddUopIop = InstObjParams('add_uop', 'MicroAddUop', 'MicroIntRegOp', {'code': - '''Ra = Rb + shift_rm_imm(Rc, shiftAmt, + '''URa = URb + shift_rm_imm(URc, shiftAmt, shiftType, CondCodes<29:>); ''', @@ -588,14 +591,14 @@ let {{ microSubiUopIop = InstObjParams('subi_uop', 'MicroSubiUop', 'MicroIntImmOp', - {'code': 'Ra = Rb - imm;', + {'code': 'URa = URb - imm;', 'predicate_test': predicateTest}, ['IsMicroop']) microSubUopIop = InstObjParams('sub_uop', 'MicroSubUop', 'MicroIntRegOp', {'code': - '''Ra = Rb - shift_rm_imm(Rc, shiftAmt, + '''URa = URb - shift_rm_imm(URc, shiftAmt, shiftType, CondCodes<29:>); ''', @@ -604,27 +607,62 @@ let {{ microUopRegMovIop = InstObjParams('uopReg_uop', 'MicroUopRegMov', 'MicroIntMov', - {'code': 'IWRa = Rb;', + {'code': 'IWRa = URb;', 'predicate_test': predicateTest}, ['IsMicroop']) + microUopRegMovRetIop = InstObjParams('movret_uop', 'MicroUopRegMovRet', + 'MicroIntMov', + {'code': microRetUopCode % 'URb', + 'predicate_test': predicateTest}, + ['IsMicroop', 'IsNonSpeculative', + 'IsSerializeAfter']) + + setPCCPSRDecl = ''' + CPSR cpsrOrCondCodes = URc; + SCTLR sctlr = Sctlr; + pNPC = URa; + uint32_t newCpsr = + cpsrWriteByInstr(cpsrOrCondCodes, URb, + 0xF, true, sctlr.nmfi); + Cpsr = ~CondCodesMask & newCpsr; + NextThumb = ((CPSR)newCpsr).t; + NextJazelle = ((CPSR)newCpsr).j; + ForcedItState = ((((CPSR)URb).it2 << 2) & 0xFC) + | (((CPSR)URb).it1 & 0x3); + CondCodes = CondCodesMask & newCpsr; + ''' + + microUopSetPCCPSRIop = InstObjParams('uopSet_uop', 'MicroUopSetPCCPSR', + 'MicroSetPCCPSR', + {'code': setPCCPSRDecl, + 'predicate_test': predicateTest}, + ['IsMicroop']) + header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \ MicroIntImmDeclare.subst(microSubiUopIop) + \ MicroIntRegDeclare.subst(microAddUopIop) + \ MicroIntRegDeclare.subst(microSubUopIop) + \ - MicroIntMovDeclare.subst(microUopRegMovIop) + MicroIntMovDeclare.subst(microUopRegMovIop) + \ + MicroIntMovDeclare.subst(microUopRegMovRetIop) + \ + MicroSetPCCPSRDeclare.subst(microUopSetPCCPSRIop) decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \ MicroIntImmConstructor.subst(microSubiUopIop) + \ MicroIntRegConstructor.subst(microAddUopIop) + \ MicroIntRegConstructor.subst(microSubUopIop) + \ - MicroIntMovConstructor.subst(microUopRegMovIop) + MicroIntMovConstructor.subst(microUopRegMovIop) + \ + MicroIntMovConstructor.subst(microUopRegMovRetIop) + \ + MicroSetPCCPSRConstructor.subst(microUopSetPCCPSRIop) exec_output = PredOpExecute.subst(microAddiUopIop) + \ PredOpExecute.subst(microSubiUopIop) + \ PredOpExecute.subst(microAddUopIop) + \ PredOpExecute.subst(microSubUopIop) + \ - PredOpExecute.subst(microUopRegMovIop) + PredOpExecute.subst(microUopRegMovIop) + \ + PredOpExecute.subst(microUopRegMovRetIop) + \ + PredOpExecute.subst(microUopSetPCCPSRIop) + }}; let {{ diff --git a/src/arch/arm/isa/insts/mem.isa b/src/arch/arm/isa/insts/mem.isa index 507f8cd4b..d0c0f0710 100644 --- a/src/arch/arm/isa/insts/mem.isa +++ b/src/arch/arm/isa/insts/mem.isa @@ -48,7 +48,7 @@ let {{ self.constructTemplate = eval(self.decConstBase + 'Constructor') def fillTemplates(self, name, Name, codeBlobs, memFlags, instFlags, - base = 'Memory', wbDecl = None): + base = 'Memory', wbDecl = None, pcDecl = None): # Make sure flags are in lists (convert to lists if not). memFlags = makeList(memFlags) instFlags = makeList(instFlags) @@ -65,12 +65,26 @@ let {{ macroName = Name instFlagsCopy = list(instFlags) codeBlobsCopy = dict(codeBlobs) - if wbDecl is not None: + + use_uops = 0 + if wbDecl is not None or pcDecl is not None: instFlagsCopy.append('IsMicroop') Name = Name + 'Acc' + use_uops = 1 + + use_wb = 0 + use_pc = 0 + if wbDecl is not None: + use_wb = 1 + if pcDecl is not None: + use_pc = 1 + codeBlobsCopy['acc_name'] = Name codeBlobsCopy['wb_decl'] = wbDecl + codeBlobsCopy['pc_decl'] = pcDecl codeBlobsCopy['use_uops'] = 0 + codeBlobsCopy['use_wb'] = 0 + codeBlobsCopy['use_pc'] = 0 iop = InstObjParams(name, Name, base, codeBlobsCopy, instFlagsCopy) @@ -81,11 +95,14 @@ let {{ self.initiateAccTemplate.subst(iop) + \ self.completeAccTemplate.subst(iop) - if wbDecl is not None: + if wbDecl is not None or pcDecl is not None: iop = InstObjParams(name, macroName, base, { "wb_decl" : wbDecl, + "pc_decl" : pcDecl, "acc_name" : Name, - "use_uops" : 1 }, + "use_uops" : use_uops, + "use_pc" : use_pc, + "use_wb" : use_wb }, ['IsMacroop']) header_output += self.declareTemplate.subst(iop) decoder_output += self.constructTemplate.subst(iop) diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa index be51d927d..cf5c7b47a 100644 --- a/src/arch/arm/isa/insts/misc.isa +++ b/src/arch/arm/isa/insts/misc.isa @@ -491,10 +491,13 @@ let {{ wfeCode = ''' #if FULL_SYSTEM - if (SevMailbox) + if (SevMailbox) { SevMailbox = 0; - else + PseudoInst::quiesceSkip(xc->tcBase()); + } + else { PseudoInst::quiesce(xc->tcBase()); + } #endif ''' wfeIop = InstObjParams("wfe", "WfeInst", "PredOp", \ @@ -502,7 +505,7 @@ let {{ ["IsNonSpeculative", "IsQuiesce", "IsSerializeAfter"]) header_output += BasicDeclare.subst(wfeIop) decoder_output += BasicConstructor.subst(wfeIop) - exec_output += PredOpExecute.subst(wfeIop) + exec_output += QuiescePredOpExecute.subst(wfeIop) wfiCode = ''' #if FULL_SYSTEM @@ -511,22 +514,25 @@ let {{ ''' wfiIop = InstObjParams("wfi", "WfiInst", "PredOp", \ { "code" : wfiCode, "predicate_test" : predicateTest }, - ["IsNonSpeculative", "IsQuiesce"]) + ["IsNonSpeculative", "IsQuiesce", "IsSerializeAfter"]) header_output += BasicDeclare.subst(wfiIop) decoder_output += BasicConstructor.subst(wfiIop) - exec_output += PredOpExecute.subst(wfiIop) + exec_output += QuiescePredOpExecute.subst(wfiIop) sevCode = ''' // Need a way for O3 to not scoreboard these accesses as pipe flushes. + SevMailbox = 1; System *sys = xc->tcBase()->getSystemPtr(); for (int x = 0; x < sys->numContexts(); x++) { ThreadContext *oc = sys->getThreadContext(x); - oc->setMiscReg(MISCREG_SEV_MAILBOX, 1); + if (oc != xc->tcBase()) { + oc->setMiscReg(MISCREG_SEV_MAILBOX, 1); + } } ''' sevIop = InstObjParams("sev", "SevInst", "PredOp", \ { "code" : sevCode, "predicate_test" : predicateTest }, - ["IsNonSpeculative", "IsQuiesce", "IsSerializeAfter"]) + ["IsNonSpeculative", "IsSquashAfter"]) header_output += BasicDeclare.subst(sevIop) decoder_output += BasicConstructor.subst(sevIop) exec_output += PredOpExecute.subst(sevIop) diff --git a/src/arch/arm/isa/insts/str.isa b/src/arch/arm/isa/insts/str.isa index f661961f7..e5d47c28f 100644 --- a/src/arch/arm/isa/insts/str.isa +++ b/src/arch/arm/isa/insts/str.isa @@ -222,7 +222,6 @@ let {{ decConstBase = 'StoreExImm' basePrefix = 'MemoryExImm' nameFunc = staticmethod(storeImmClassName) - instFlags = ['IsStoreConditional'] def __init__(self, *args, **kargs): super(StoreImmEx, self).__init__(*args, **kargs) @@ -302,7 +301,6 @@ let {{ decConstBase = 'StoreExDImm' basePrefix = 'MemoryExDImm' nameFunc = staticmethod(storeDoubleImmClassName) - instFlags = ['IsStoreConditional'] def __init__(self, *args, **kargs): super(StoreDoubleImmEx, self).__init__(*args, **kargs) @@ -370,10 +368,14 @@ let {{ buildDoubleStores("strd") - StoreImmEx("strex", False, True, False, size=4, flavor="exclusive").emit() - StoreImmEx("strexh", False, True, False, size=2, flavor="exclusive").emit() - StoreImmEx("strexb", False, True, False, size=1, flavor="exclusive").emit() - StoreDoubleImmEx("strexd", False, True, False, flavor="exclusive").emit() + StoreImmEx("strex", False, True, False, size=4, flavor="exclusive", + instFlags = ['IsStoreConditional']).emit() + StoreImmEx("strexh", False, True, False, size=2, flavor="exclusive", + instFlags = ['IsStoreConditional']).emit() + StoreImmEx("strexb", False, True, False, size=1, flavor="exclusive", + instFlags = ['IsStoreConditional']).emit() + StoreDoubleImmEx("strexd", False, True, False, flavor="exclusive", + instFlags = ['IsStoreConditional']).emit() StoreImm("vstr", False, True, False, size=4, flavor="fp").emit() StoreImm("vstr", False, False, False, size=4, flavor="fp").emit() diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index f403f9372..7b014acd0 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -228,11 +228,11 @@ def operands {{ 'SevMailbox': cntrlRegNC('MISCREG_SEV_MAILBOX'), #Register fields for microops - 'Ra' : intReg('ura'), + 'URa' : intReg('ura'), 'IWRa' : intRegIWPC('ura'), 'Fa' : floatReg('ura'), - 'Rb' : intReg('urb'), - 'Rc' : intReg('urc'), + 'URb' : intReg('urb'), + 'URc' : intReg('urc'), #Memory Operand 'Mem': ('Mem', 'uw', None, ('IsMemRef', 'IsLoad', 'IsStore'), srtNormal), diff --git a/src/arch/arm/isa/templates/branch.isa b/src/arch/arm/isa/templates/branch.isa index d1f581f51..6abf76963 100644 --- a/src/arch/arm/isa/templates/branch.isa +++ b/src/arch/arm/isa/templates/branch.isa @@ -57,7 +57,11 @@ def template BranchImmConstructor {{ for (int x = 0; x < _numDestRegs; x++) { _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; } + flags[IsCondControl] = true; + } else { + flags[IsUncondControl] = true; } + } }}; @@ -69,6 +73,7 @@ class %(class_name)s : public %(base_class)s %(class_name)s(ExtMachInst machInst, int32_t _imm, ConditionCode _condCode); %(BasicExecDeclare)s + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; }; }}; @@ -84,6 +89,9 @@ def template BranchImmCondConstructor {{ for (int x = 0; x < _numDestRegs; x++) { _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; } + flags[IsCondControl] = true; + } else { + flags[IsUncondControl] = true; } } }}; @@ -108,6 +116,9 @@ def template BranchRegConstructor {{ for (int x = 0; x < _numDestRegs; x++) { _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; } + flags[IsCondControl] = true; + } else { + flags[IsUncondControl] = true; } } }}; @@ -135,6 +146,9 @@ def template BranchRegCondConstructor {{ for (int x = 0; x < _numDestRegs; x++) { _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; } + flags[IsCondControl] = true; + } else { + flags[IsUncondControl] = true; } } }}; @@ -176,6 +190,9 @@ def template BranchRegRegConstructor {{ for (int x = 0; x < _numDestRegs; x++) { _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; } + flags[IsCondControl] = true; + } else { + flags[IsUncondControl] = true; } } }}; @@ -202,6 +219,26 @@ def template BranchImmRegConstructor {{ for (int x = 0; x < _numDestRegs; x++) { _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; } + flags[IsCondControl] = true; + } else { + flags[IsUncondControl] = true; } } }}; + +def template BranchTarget {{ + + ArmISA::PCState + %(class_name)s::branchTarget(const ArmISA::PCState &branchPC) const + { + %(op_decl)s; + %(op_rd)s; + + ArmISA::PCState pcs = branchPC; + %(brTgtCode)s + pcs.advance(); + return pcs; + } +}}; + + diff --git a/src/arch/arm/isa/templates/macromem.isa b/src/arch/arm/isa/templates/macromem.isa index b7ca7fa48..a7f7f0da8 100644 --- a/src/arch/arm/isa/templates/macromem.isa +++ b/src/arch/arm/isa/templates/macromem.isa @@ -109,6 +109,41 @@ def template MicroNeonMemDeclare {{ //////////////////////////////////////////////////////////////////// // +// PC = Integer(ura) +// CPSR = Integer(urb) +// + +def template MicroSetPCCPSRDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, + IntRegIndex _ura, + IntRegIndex _urb, + IntRegIndex _urc); + %(BasicExecDeclare)s + }; +}}; + +def template MicroSetPCCPSRConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _ura, + IntRegIndex _urb, + IntRegIndex _urc) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _ura, _urb, _urc) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + +//////////////////////////////////////////////////////////////////// +// // Integer = Integer op Integer microops // diff --git a/src/arch/arm/isa/templates/mem.isa b/src/arch/arm/isa/templates/mem.isa index 3d073b322..dcfd47ace 100644 --- a/src/arch/arm/isa/templates/mem.isa +++ b/src/arch/arm/isa/templates/mem.isa @@ -917,9 +917,9 @@ def template CompleteAccDeclare {{ def template RfeConstructor {{ inline %(class_name)s::%(class_name)s(ExtMachInst machInst, - uint32_t _base, int _mode, bool _wb) - : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, - (IntRegIndex)_base, (AddrMode)_mode, _wb) + uint32_t _base, int _mode, bool _wb) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_base, (AddrMode)_mode, _wb) { %(constructor)s; if (!(condCode == COND_AL || condCode == COND_UC)) { @@ -928,12 +928,18 @@ def template RfeConstructor {{ } } #if %(use_uops)d - assert(numMicroops >= 2); - uops = new StaticInstPtr[numMicroops]; - uops[0] = new %(acc_name)s(machInst, _base, _mode, _wb); - uops[0]->setDelayedCommit(); - uops[1] = new %(wb_decl)s; - uops[1]->setLastMicroop(); + uops = new StaticInstPtr[1 + %(use_wb)d + %(use_pc)d]; + int uopIdx = 0; + uops[uopIdx] = new %(acc_name)s(machInst, _base, _mode, _wb); + uops[uopIdx]->setDelayedCommit(); +#if %(use_wb)d + uops[++uopIdx] = new %(wb_decl)s; + uops[uopIdx]->setDelayedCommit(); +#endif +#if %(use_pc)d + uops[++uopIdx] = new %(pc_decl)s; +#endif + uops[uopIdx]->setLastMicroop(); #endif } }}; diff --git a/src/arch/arm/isa/templates/pred.isa b/src/arch/arm/isa/templates/pred.isa index c9e7b1803..2a4bd9dab 100644 --- a/src/arch/arm/isa/templates/pred.isa +++ b/src/arch/arm/isa/templates/pred.isa @@ -170,6 +170,38 @@ def template PredOpExecute {{ } }}; +def template QuiescePredOpExecute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + uint64_t resTemp = 0; + resTemp = resTemp; + %(op_decl)s; + %(op_rd)s; + + if (%(predicate_test)s) + { + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + } else { + xc->setPredicate(false); +#if FULL_SYSTEM + PseudoInst::quiesceSkip(xc->tcBase()); +#endif + } + + if (fault == NoFault && machInst.itstateMask != 0&& + (!isMicroop() || isLastMicroop())) { + xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate); + } + + return fault; + } +}}; + def template DataDecode {{ if (machInst.opcode4 == 0) { if (machInst.sField == 0) diff --git a/src/arch/arm/linux/process.cc b/src/arch/arm/linux/process.cc index e3455d211..fc48ec12d 100644 --- a/src/arch/arm/linux/process.cc +++ b/src/arch/arm/linux/process.cc @@ -197,7 +197,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 117 */ SyscallDesc("ipc", unimplementedFunc), /* 118 */ SyscallDesc("fsync", unimplementedFunc), /* 119 */ SyscallDesc("sigreturn", unimplementedFunc), - /* 120 */ SyscallDesc("clone", unimplementedFunc), + /* 120 */ SyscallDesc("clone", cloneFunc), /* 121 */ SyscallDesc("setdomainname", unimplementedFunc), /* 122 */ SyscallDesc("uname", unameFunc), /* 123 */ SyscallDesc("unused#123", unimplementedFunc), @@ -239,7 +239,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 159 */ SyscallDesc("sched_get_priority_max", unimplementedFunc), /* 160 */ SyscallDesc("sched_get_priority_min", unimplementedFunc), /* 161 */ SyscallDesc("sched_rr_get_interval", unimplementedFunc), - /* 162 */ SyscallDesc("nanosleep", unimplementedFunc), + /* 162 */ SyscallDesc("nanosleep", ignoreWarnOnceFunc), /* 163 */ SyscallDesc("mremap", mremapFunc<ArmLinux>), // ARM-specific /* 164 */ SyscallDesc("setresuid", unimplementedFunc), /* 165 */ SyscallDesc("getresuid", unimplementedFunc), @@ -251,8 +251,8 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 171 */ SyscallDesc("getresgid", unimplementedFunc), /* 172 */ SyscallDesc("prctl", unimplementedFunc), /* 173 */ SyscallDesc("rt_sigreturn", unimplementedFunc), - /* 174 */ SyscallDesc("rt_sigaction", ignoreFunc), - /* 175 */ SyscallDesc("rt_sigprocmask", unimplementedFunc), + /* 174 */ SyscallDesc("rt_sigaction", ignoreWarnOnceFunc), + /* 175 */ SyscallDesc("rt_sigprocmask", ignoreWarnOnceFunc), /* 176 */ SyscallDesc("rt_sigpending", unimplementedFunc), /* 177 */ SyscallDesc("rt_sigtimedwait", unimplementedFunc), /* 178 */ SyscallDesc("rt_sigqueueinfo", ignoreFunc), @@ -317,7 +317,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 237 */ SyscallDesc("fremovexattr", unimplementedFunc), /* 238 */ SyscallDesc("tkill", unimplementedFunc), /* 239 */ SyscallDesc("sendfile64", unimplementedFunc), - /* 240 */ SyscallDesc("futex", unimplementedFunc), + /* 240 */ SyscallDesc("futex", ignoreWarnOnceFunc), /* 241 */ SyscallDesc("sched_setaffinity", unimplementedFunc), /* 242 */ SyscallDesc("sched_getaffinity", unimplementedFunc), /* 243 */ SyscallDesc("io_setup", unimplementedFunc), @@ -456,6 +456,7 @@ setTLSFunc(SyscallDesc *desc, int callnum, LiveProcess *process, tc->getMemPort()->writeBlob(ArmLinuxProcess::commPage + 0x0ff0, (uint8_t *)&tlsPtr, sizeof(tlsPtr)); + tc->setMiscReg(MISCREG_TPIDRURO,tlsPtr); return 0; } @@ -508,7 +509,7 @@ ArmLinuxProcess::startup() ThreadContext *tc = system->getThreadContext(contextIds[0]); uint8_t swiNeg1[] = { - 0xff, 0xff, 0xff, 0xef //swi -1 + 0xff, 0xff, 0xff, 0xef // swi -1 }; // Fill this page with swi -1 so we'll no if we land in it somewhere. @@ -521,7 +522,8 @@ ArmLinuxProcess::startup() // @todo Add a barrrier in this code uint8_t memory_barrier[] = { - 0x0e, 0xf0, 0xa0, 0xe1 //usr_ret lr + 0x5f, 0xf0, 0x7f, 0xf5, // dmb + 0x0e, 0xf0, 0xa0, 0xe1 // return }; tc->getMemPort()->writeBlob(commPage + 0x0fa0, memory_barrier, sizeof(memory_barrier)); @@ -531,18 +533,22 @@ ArmLinuxProcess::startup() // @todo replace this with ldrex/strex and dmb uint8_t cmpxchg[] = { - 0x00, 0x30, 0x92, 0xe5, //ldr r3, [r2] - 0x00, 0x30, 0x53, 0xe0, //subs r3, r3, r0 - 0x00, 0x10, 0x82, 0x05, //streq r1, [r2] - 0x03, 0x00, 0xa0, 0xe1, //mov r0, r3 - 0x0e, 0xf0, 0xa0, 0xe1 //usr_ret lr + 0x9f, 0x3f, 0x92, 0xe1, // ldrex r3, [r2] + 0x00, 0x30, 0x53, 0xe0, // subs r3, r3, r0 + 0x91, 0x3f, 0x82, 0x01, // strexeq r3, r1, [r2] + 0x01, 0x00, 0x33, 0x03, // teqeq r3, #1 + 0xfa, 0xff, 0xff, 0x0a, // beq 1b + 0x00, 0x00, 0x73, 0xe2, // rsbs r0, r3, #0 + 0x5f, 0xf0, 0x7f, 0xf5, // dmb + 0x0e, 0xf0, 0xa0, 0xe1 // return }; tc->getMemPort()->writeBlob(commPage + 0x0fc0, cmpxchg, sizeof(cmpxchg)); uint8_t get_tls[] = { - 0x08, 0x00, 0x9f, 0xe5, //ldr r0, [pc, #(16 - 8)] - 0x0e, 0xf0, 0xa0, 0xe1 //usr_ret lr + // read user read-only thread id register + 0x70, 0x0f, 0x1d, 0xee, // mrc p15, 0, r0, c13, c0, 3 + 0x0e, 0xf0, 0xa0, 0xe1 // return }; tc->getMemPort()->writeBlob(commPage + 0x0fe0, get_tls, sizeof(get_tls)); } diff --git a/src/arch/arm/linux/system.cc b/src/arch/arm/linux/system.cc index 38024c058..7aff2b6ef 100644 --- a/src/arch/arm/linux/system.cc +++ b/src/arch/arm/linux/system.cc @@ -47,9 +47,11 @@ #include "base/loader/object_file.hh" #include "base/loader/symtab.hh" #include "cpu/thread_context.hh" +#include "kern/linux/events.hh" #include "mem/physical.hh" using namespace ArmISA; +using namespace Linux; LinuxArmSystem::LinuxArmSystem(Params *p) : ArmSystem(p) @@ -96,6 +98,24 @@ LinuxArmSystem::LinuxArmSystem(Params *p) if (!kernelPanicEvent) panic("could not find kernel symbol \'panic\'"); #endif + + // With ARM udelay() is #defined to __udelay + Addr addr = 0; + if (kernelSymtab->findAddress("__udelay", addr)) { + uDelaySkipEvent = new UDelayEvent(&pcEventQueue, "__udelay", + fixFuncEventAddr(addr), 1000, 0); + } else { + panic("couldn't find kernel symbol \'udelay\'"); + } + + // constant arguments to udelay() have some precomputation done ahead of + // time. Constant comes from code. + if (kernelSymtab->findAddress("__const_udelay", addr)) { + constUDelaySkipEvent = new UDelayEvent(&pcEventQueue, "__const_udelay", + fixFuncEventAddr(addr), 1000, 107374); + } else { + panic("couldn't find kernel symbol \'udelay\'"); + } } void @@ -115,6 +135,10 @@ LinuxArmSystem::initState() LinuxArmSystem::~LinuxArmSystem() { + if (uDelaySkipEvent) + delete uDelaySkipEvent; + if (constUDelaySkipEvent) + delete constUDelaySkipEvent; } LinuxArmSystem * diff --git a/src/arch/arm/linux/system.hh b/src/arch/arm/linux/system.hh index 4e5ebcd73..2ef65fea2 100644 --- a/src/arch/arm/linux/system.hh +++ b/src/arch/arm/linux/system.hh @@ -74,6 +74,19 @@ class LinuxArmSystem : public ArmSystem /** Event to halt the simulator if the kernel calls panic() */ BreakPCEvent *kernelPanicEvent; #endif + /** + * PC based event to skip udelay(<time>) calls and quiesce the + * processor for the appropriate amount of time. This is not functionally + * required but does speed up simulation. + */ + Linux::UDelayEvent *uDelaySkipEvent; + + /** Another PC based skip event for const_udelay(). Similar to the udelay + * skip, but this function precomputes the first multiply that is done + * in the generic case since the parameter is known at compile time. + * Thus we need to do some division to get back to us. + */ + Linux::UDelayEvent *constUDelaySkipEvent; }; #endif // __ARCH_ARM_LINUX_SYSTEM_HH__ diff --git a/src/arch/arm/miscregs.hh b/src/arch/arm/miscregs.hh index 90b4fd999..47495a4e7 100644 --- a/src/arch/arm/miscregs.hh +++ b/src/arch/arm/miscregs.hh @@ -165,6 +165,12 @@ namespace ArmISA MISCREG_PMUSERENR, MISCREG_PMINTENSET, MISCREG_PMINTENCLR, + MISCREG_ID_ISAR0, + MISCREG_ID_ISAR1, + MISCREG_ID_ISAR2, + MISCREG_ID_ISAR3, + MISCREG_ID_ISAR4, + MISCREG_ID_ISAR5, MISCREG_CP15_UNIMP_START, MISCREG_TCMTR = MISCREG_CP15_UNIMP_START, MISCREG_ID_PFR1, @@ -173,12 +179,6 @@ namespace ArmISA MISCREG_ID_MMFR1, MISCREG_ID_MMFR2, MISCREG_ID_MMFR3, - MISCREG_ID_ISAR0, - MISCREG_ID_ISAR1, - MISCREG_ID_ISAR2, - MISCREG_ID_ISAR3, - MISCREG_ID_ISAR4, - MISCREG_ID_ISAR5, MISCREG_AIDR, MISCREG_ADFSR, MISCREG_AIFSR, @@ -233,13 +233,12 @@ namespace ArmISA "pmswinc", "pmselr", "pmceid0", "pmceid1", "pmc_other", "pmxevcntr", "pmuserenr", "pmintenset", "pmintenclr", + "id_isar0", "id_isar1", "id_isar2", "id_isar3", "id_isar4", "id_isar5", // Unimplemented below "tcmtr", "id_pfr1", "id_dfr0", "id_afr0", "id_mmfr1", "id_mmfr2", "id_mmfr3", - "id_isar0", "id_isar1", "id_isar2", "id_isar3", "id_isar4", "id_isar5", - "aidr", - "adfsr", "aifsr", + "aidr", "adfsr", "aifsr", "dcimvac", "dcisw", "mccsw", "dccmvau", "nsacr", diff --git a/src/arch/arm/predecoder.hh b/src/arch/arm/predecoder.hh index a99e38ce7..08e1676c0 100644 --- a/src/arch/arm/predecoder.hh +++ b/src/arch/arm/predecoder.hh @@ -83,6 +83,12 @@ namespace ArmISA predAddrValid = false; } + void reset(const ExtMachInst &old_emi) + { + reset(); + itstate = old_emi.newItstate; + } + Predecoder(ThreadContext * _tc) : tc(_tc), data(0) { @@ -122,17 +128,17 @@ namespace ArmISA outOfBytes = true; } - bool needMoreBytes() + bool needMoreBytes() const { return outOfBytes; } - bool extMachInstReady() + bool extMachInstReady() const { return emiReady; } - int getInstSize() + int getInstSize() const { return (!emi.thumb || emi.bigThumb) ? 4 : 2; } @@ -145,6 +151,7 @@ namespace ArmISA pc.npc(pc.pc() + getInstSize()); predAddrValid = true; predAddr = pc.pc() + getInstSize(); + pc.size(getInstSize()); emi = 0; emiReady = false; return thisEmi; diff --git a/src/arch/arm/types.hh b/src/arch/arm/types.hh index e6865e280..b4ef07d8a 100644 --- a/src/arch/arm/types.hh +++ b/src/arch/arm/types.hh @@ -203,6 +203,7 @@ namespace ArmISA uint8_t flags; uint8_t nextFlags; uint8_t forcedItStateValue; + uint8_t _size; bool forcedItStateValid; public: PCState() : flags(0), nextFlags(0), forcedItStateValue(0), forcedItStateValid(false) @@ -248,6 +249,16 @@ namespace ArmISA nextFlags &= ~ThumbBit; } + void size(uint8_t s) { _size = s; } + uint8_t size() const { return _size; } + + bool + branching() const + { + return ((this->pc() + this->size()) != this->npc()); + } + + bool jazelle() const { @@ -392,6 +403,7 @@ namespace ArmISA { Base::serialize(os); SERIALIZE_SCALAR(flags); + SERIALIZE_SCALAR(_size); SERIALIZE_SCALAR(nextFlags); SERIALIZE_SCALAR(forcedItStateValue); SERIALIZE_SCALAR(forcedItStateValid); @@ -402,6 +414,7 @@ namespace ArmISA { Base::unserialize(cp, section); UNSERIALIZE_SCALAR(flags); + UNSERIALIZE_SCALAR(_size); UNSERIALIZE_SCALAR(nextFlags); UNSERIALIZE_SCALAR(forcedItStateValue); UNSERIALIZE_SCALAR(forcedItStateValid); diff --git a/src/arch/mips/predecoder.hh b/src/arch/mips/predecoder.hh index 4220b768c..110493cc5 100644 --- a/src/arch/mips/predecoder.hh +++ b/src/arch/mips/predecoder.hh @@ -75,6 +75,12 @@ class Predecoder emiIsReady = false; } + void + reset(const ExtMachInst &old_emi) + { + reset(); + } + //Use this to give data to the predecoder. This should be used //when there is control flow. void diff --git a/src/arch/power/predecoder.hh b/src/arch/power/predecoder.hh index 8b1089095..c10bc51bf 100644 --- a/src/arch/power/predecoder.hh +++ b/src/arch/power/predecoder.hh @@ -82,6 +82,12 @@ class Predecoder emiIsReady = false; } + void + reset(const ExtMachInst &old_emi) + { + reset(); + } + // Use this to give data to the predecoder. This should be used // when there is control flow. void diff --git a/src/arch/sparc/predecoder.hh b/src/arch/sparc/predecoder.hh index 670c547d0..082adeb72 100644 --- a/src/arch/sparc/predecoder.hh +++ b/src/arch/sparc/predecoder.hh @@ -68,12 +68,19 @@ class Predecoder } void process() {} + void reset() { emiIsReady = false; } + void + reset(const ExtMachInst &old_emi) + { + reset(); + } + // Use this to give data to the predecoder. This should be used // when there is control flow. void diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh index 5c67e28e1..790453b98 100644 --- a/src/arch/x86/predecoder.hh +++ b/src/arch/x86/predecoder.hh @@ -174,6 +174,12 @@ namespace X86ISA state = ResetState; } + void + reset(const ExtMachInst &old_emi) + { + reset(); + } + ThreadContext * getTC() { return tc; diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index 897807fdb..840dde9ea 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -123,7 +135,6 @@ struct TimeBufStruct { bool branchTaken; Addr mispredPC; TheISA::PCState nextPC; - unsigned branchCount; }; @@ -151,29 +162,45 @@ struct TimeBufStruct { iewComm iewInfo[Impl::MaxThreads]; struct commitComm { - bool usedROB; - unsigned freeROBEntries; - bool emptyROB; + /////////////// For Decode, IEW, Rename, Fetch /////////// bool squash; bool robSquashing; - bool branchMispredict; - DynInstPtr mispredictInst; - bool branchTaken; - Addr mispredPC; - TheISA::PCState pc; - + ////////// For Fetch & IEW ///////////// // Represents the instruction that has either been retired or // squashed. Similar to having a single bus that broadcasts the // retired or squashed sequence number. InstSeqNum doneSeqNum; - //Just in case we want to do a commit/squash on a cycle - //(necessary for multiple ROBs?) - bool commitInsts; - InstSeqNum squashSeqNum; + ////////////// For Rename ///////////////// + // Rename should re-read number of free rob entries + bool usedROB; + // Notify Rename that the ROB is empty + bool emptyROB; + // Tell Rename how many free entries it has in the ROB + unsigned freeROBEntries; + + + ///////////// For Fetch ////////////////// + // Provide fetch the instruction that mispredicted, if this + // pointer is not-null a misprediction occured + DynInstPtr mispredictInst; + // Was the branch taken or not + bool branchTaken; + // The pc of the next instruction to execute. This is the next + // instruction for a branch mispredict, but the same instruction for + // order violation and the like + TheISA::PCState pc; + + // Instruction that caused the a non-mispredict squash + DynInstPtr squashInst; + // If an interrupt is pending and fetch should stall + bool interruptPending; + // If the interrupt ended up being cleared before being handled + bool clearInterrupt; + //////////// For IEW ////////////////// // Communication specifically to the IQ to tell the IQ that it can // schedule a non-speculative instruction. InstSeqNum nonSpecSeqNum; @@ -182,8 +209,6 @@ struct TimeBufStruct { bool uncached; DynInstPtr uncachedLoad; - bool interruptPending; - bool clearInterrupt; }; commitComm commitInfo[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 047e29f5d..ff7b53440 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -262,7 +262,8 @@ class DefaultCommit * instructions instead of the current instruction and doesn't * clean up various status bits about traps/tc writes pending. */ - void squashAfter(ThreadID tid, uint64_t squash_after_seq_num); + void squashAfter(ThreadID tid, DynInstPtr &head_inst, + uint64_t squash_after_seq_num); #if FULL_SYSTEM /** Handles processing an interrupt. */ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 104e7fb58..8c651e203 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -541,8 +541,8 @@ DefaultCommit<Impl>::squashAll(ThreadID tid) // the ROB is in the process of squashing. toIEW->commitInfo[tid].robSquashing = true; - toIEW->commitInfo[tid].branchMispredict = false; toIEW->commitInfo[tid].mispredictInst = NULL; + toIEW->commitInfo[tid].squashInst = NULL; toIEW->commitInfo[tid].pc = pc[tid]; } @@ -584,7 +584,8 @@ DefaultCommit<Impl>::squashFromTC(ThreadID tid) template <class Impl> void -DefaultCommit<Impl>::squashAfter(ThreadID tid, uint64_t squash_after_seq_num) +DefaultCommit<Impl>::squashAfter(ThreadID tid, DynInstPtr &head_inst, + uint64_t squash_after_seq_num) { youngestSeqNum[tid] = squash_after_seq_num; @@ -594,6 +595,7 @@ DefaultCommit<Impl>::squashAfter(ThreadID tid, uint64_t squash_after_seq_num) // Send back the sequence number of the squashed instruction. toIEW->commitInfo[tid].doneSeqNum = squash_after_seq_num; + toIEW->commitInfo[tid].squashInst = head_inst; // Send back the squash signal to tell stages that they should squash. toIEW->commitInfo[tid].squash = true; @@ -601,7 +603,7 @@ DefaultCommit<Impl>::squashAfter(ThreadID tid, uint64_t squash_after_seq_num) // the ROB is in the process of squashing. toIEW->commitInfo[tid].robSquashing = true; - toIEW->commitInfo[tid].branchMispredict = false; + toIEW->commitInfo[tid].mispredictInst = NULL; toIEW->commitInfo[tid].pc = pc[tid]; DPRINTF(Commit, "Executing squash after for [tid:%i] inst [sn:%lli]\n", @@ -801,10 +803,17 @@ DefaultCommit<Impl>::commit() commitStatus[tid] != TrapPending && fromIEW->squashedSeqNum[tid] <= youngestSeqNum[tid]) { - DPRINTF(Commit, "[tid:%i]: Squashing due to PC %#x [sn:%i]\n", + if (fromIEW->mispredictInst[tid]) { + DPRINTF(Commit, + "[tid:%i]: Squashing due to branch mispred PC:%#x [sn:%i]\n", tid, - fromIEW->mispredPC[tid], + fromIEW->mispredictInst[tid]->instAddr(), fromIEW->squashedSeqNum[tid]); + } else { + DPRINTF(Commit, + "[tid:%i]: Squashing due to order violation [sn:%i]\n", + tid, fromIEW->squashedSeqNum[tid]); + } DPRINTF(Commit, "[tid:%i]: Redirecting to PC %#x\n", tid, @@ -835,18 +844,15 @@ DefaultCommit<Impl>::commit() // the ROB is in the process of squashing. toIEW->commitInfo[tid].robSquashing = true; - toIEW->commitInfo[tid].branchMispredict = - fromIEW->branchMispredict[tid]; toIEW->commitInfo[tid].mispredictInst = fromIEW->mispredictInst[tid]; toIEW->commitInfo[tid].branchTaken = fromIEW->branchTaken[tid]; + toIEW->commitInfo[tid].squashInst = NULL; toIEW->commitInfo[tid].pc = fromIEW->pc[tid]; - toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid]; - - if (toIEW->commitInfo[tid].branchMispredict) { + if (toIEW->commitInfo[tid].mispredictInst) { ++branchMispredicts; } } @@ -988,7 +994,7 @@ DefaultCommit<Impl>::commitInsts() // If this is an instruction that doesn't play nicely with // others squash everything and restart fetch if (head_inst->isSquashAfter()) - squashAfter(tid, head_inst->seqNum); + squashAfter(tid, head_inst, head_inst->seqNum); int count = 0; Addr oldpc; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 2d3bc3f72..4088f2399 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -808,8 +808,9 @@ FullO3CPU<Impl>::removeThread(ThreadID tid) } // Squash Throughout Pipeline - InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; - fetch.squash(0, squash_seq_num, tid); + DynInstPtr inst = commit.rob->readHeadInst(tid); + InstSeqNum squash_seq_num = inst->seqNum; + fetch.squash(0, squash_seq_num, inst, tid); decode.squash(tid); rename.squash(squash_seq_num, tid); iew.squash(tid); diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index c51658104..4a4ac0902 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -312,8 +312,8 @@ class DefaultFetch * remove any instructions that are not in the ROB. The source of this * squash should be the commit stage. */ - void squash(const TheISA::PCState &newPC, - const InstSeqNum &seq_num, ThreadID tid); + void squash(const TheISA::PCState &newPC, const InstSeqNum &seq_num, + DynInstPtr &squashInst, ThreadID tid); /** Ticks the fetch stage, processing all inputs signals and fetching * as many instructions as possible. diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index a2f2b4f8a..6c1ac456d 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -112,6 +112,9 @@ DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt) { DPRINTF(Fetch, "Received timing\n"); if (pkt->isResponse()) { + // We shouldn't ever get a block in ownership state + assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted())); + fetch->processCacheCompletion(pkt); } //else Snooped a coherence request, just return @@ -812,11 +815,14 @@ DefaultFetch<Impl>::updateFetchStatus() template <class Impl> void DefaultFetch<Impl>::squash(const TheISA::PCState &newPC, - const InstSeqNum &seq_num, ThreadID tid) + const InstSeqNum &seq_num, DynInstPtr &squashInst, + ThreadID tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid); doSquash(newPC, tid); + if (squashInst) + predecoder.reset(squashInst->staticInst->machInst); // Tell the CPU to remove any instructions that are not in the ROB. cpu->removeInstsNotInROB(tid); @@ -931,15 +937,12 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid) // In any case, squash. squash(fromCommit->commitInfo[tid].pc, fromCommit->commitInfo[tid].doneSeqNum, - tid); + fromCommit->commitInfo[tid].squashInst, tid); // If it was a branch mispredict on a control instruction, update the // branch predictor with that instruction, otherwise just kill the // invalid state we generated in after sequence number - assert(!fromCommit->commitInfo[tid].branchMispredict || - fromCommit->commitInfo[tid].mispredictInst); - - if (fromCommit->commitInfo[tid].branchMispredict && + if (fromCommit->commitInfo[tid].mispredictInst && fromCommit->commitInfo[tid].mispredictInst->isControl()) { branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, fromCommit->commitInfo[tid].pc, diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index dff287ff5..8bf3c56f4 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -456,8 +456,6 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, ThreadID tid) inst->seqNum < toCommit->squashedSeqNum[tid]) { toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; - toCommit->mispredPC[tid] = inst->instAddr(); - toCommit->branchMispredict[tid] = true; toCommit->branchTaken[tid] = inst->pcState().branching(); TheISA::PCState pc = inst->pcState(); @@ -486,7 +484,7 @@ DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, ThreadID tid) TheISA::PCState pc = inst->pcState(); TheISA::advancePC(pc, inst->staticInst); toCommit->pc[tid] = pc; - toCommit->branchMispredict[tid] = false; + toCommit->mispredictInst[tid] = NULL; toCommit->includeSquashInst[tid] = false; @@ -506,7 +504,7 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, ThreadID tid) toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->pc[tid] = inst->pcState(); - toCommit->branchMispredict[tid] = false; + toCommit->mispredictInst[tid] = NULL; // Must include the broadcasted SN in the squash. toCommit->includeSquashInst[tid] = true; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index b5d337935..1a4e686a3 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -1103,7 +1103,9 @@ LSQUnit<Impl>::recvRetry() dynamic_cast<LSQSenderState *>(retryPkt->senderState); // Don't finish the store unless this is the last packet. - if (!TheISA::HasUnalignedMemAcc || !state->pktToSend) { + if (!TheISA::HasUnalignedMemAcc || !state->pktToSend || + state->pendingPacket == retryPkt) { + state->pktToSend = false; storePostSend(retryPkt); } retryPkt = NULL; diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 27635d3ce..6aa0eb64e 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -641,6 +641,9 @@ AtomicSimpleCPU::tick() checkForInterrupts(); checkPcEventQueue(); + // We must have just got suspended by a PC event + if (_status == Idle) + return; Fault fault = NoFault; diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 632e83356..aca48e5d4 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -714,6 +714,10 @@ TimingSimpleCPU::fetch() checkPcEventQueue(); + // We must have just got suspended by a PC event + if (_status == Idle) + return; + TheISA::PCState pcState = thread->pcState(); bool needToFetch = !isRomMicroPC(pcState.microPC()) && !curMacroStaticInst; diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 933496864..d07b322df 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -267,6 +267,7 @@ class StaticInstBase : public RefCounted void setLastMicroop() { flags[IsLastMicroop] = true; } void setDelayedCommit() { flags[IsDelayedCommit] = true; } + void setFlag(Flags f) { flags[f] = true; } /// Operation class. Used to select appropriate function unit in issue. OpClass opClass() const { return _opClass; } diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index be97bc4ad..ffe8fdf06 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -139,6 +139,9 @@ DmaPort::recvTiming(PacketPtr pkt) assert(pendingCount >= 0); assert(state); + // We shouldn't ever get a block in ownership state + assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted())); + state->numBytes += pkt->req->getSize(); assert(state->totBytes >= state->numBytes); if (state->totBytes == state->numBytes) { diff --git a/src/kern/linux/events.cc b/src/kern/linux/events.cc index f619dd11b..75c2b6f7f 100644 --- a/src/kern/linux/events.cc +++ b/src/kern/linux/events.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -32,11 +44,13 @@ #include <sstream> #include "base/trace.hh" +#include "arch/utility.hh" #include "cpu/thread_context.hh" #include "kern/linux/events.hh" #include "kern/linux/printk.hh" #include "kern/system_events.hh" #include "sim/arguments.hh" +#include "sim/pseudo_inst.hh" #include "sim/system.hh" namespace Linux { @@ -54,4 +68,27 @@ DebugPrintkEvent::process(ThreadContext *tc) SkipFuncEvent::process(tc); } +void +UDelayEvent::process(ThreadContext *tc) +{ + int arg_num = 0; + + // Get the time in native size + uint64_t time = TheISA::getArgument(tc, arg_num, (uint16_t)-1, false); + + // convert parameter to ns + if (argDivToNs) + time /= argDivToNs; + + time *= argMultToNs; + + // Convert ns to ticks + time *= SimClock::Int::ns; + + SkipFuncEvent::process(tc); + + PseudoInst::quiesceNs(tc, time); +} + + } // namespace linux diff --git a/src/kern/linux/events.hh b/src/kern/linux/events.hh index e36a72dde..3f5f2526f 100644 --- a/src/kern/linux/events.hh +++ b/src/kern/linux/events.hh @@ -44,6 +44,33 @@ class DebugPrintkEvent : public SkipFuncEvent virtual void process(ThreadContext *xc); }; +/** A class to skip udelay() and related calls in the kernel. + * This class has two additional parameters that take the argument to udelay and + * manipulated it to come up with ns and eventually ticks to quiesce for. + * See descriptions of argDivToNs and argMultToNs below. + */ +class UDelayEvent : public SkipFuncEvent +{ + private: + /** value to divide arg by to create ns. This is present beacues the linux + * kernel code sometime precomputes the first multiply that is done in + * udelay() if the parameter is a constant. We need to undo it so here is + * how. */ + uint64_t argDivToNs; + + /** value to multiple arg by to create ns. Nominally, this is 1000 to + * convert us to ns, but since linux can do some preprocessing of constant + * values something else might be required. */ + uint64_t argMultToNs; + + public: + UDelayEvent(PCEventQueue *q, const std::string &desc, Addr addr, + uint64_t mult, uint64_t div) + : SkipFuncEvent(q, desc, addr), argDivToNs(div), argMultToNs(mult) {} + virtual void process(ThreadContext *xc); +}; + + } #endif diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index dffac2234..5c7ae5274 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -48,6 +48,7 @@ class BaseCache(MemObject): size = Param.MemorySize("capacity in bytes") forward_snoops = Param.Bool(True, "forward snoops from mem side to cpu side") + is_top_level = Param.Bool(False, "Is this cache at the top level (e.g. L1)") subblock_size = Param.Int(0, "Size of subblock in IIC used for compression") tgts_per_mshr = Param.Int("max number of accesses per MSHR") diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index 9166e1a09..b7e331d54 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -58,6 +58,7 @@ BaseCache::BaseCache(const Params *p) hitLatency(p->latency), numTarget(p->tgts_per_mshr), forwardSnoops(p->forward_snoops), + isTopLevel(p->is_top_level), blocked(0), noTargetMSHR(NULL), missCount(p->max_miss_count), diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index e8a644296..28ddf5054 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -194,6 +194,11 @@ class BaseCache : public MemObject /** Do we forward snoops from mem side port through to cpu side port? */ bool forwardSnoops; + /** Is this cache a toplevel cache (e.g. L1, I/O cache). If so we should + * never try to forward ownership and similar optimizations to the cpu + * side */ + bool isTopLevel; + /** * Bit vector of the blocking reasons for the access path. * @sa #BlockedCause diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index e4e4a3c92..0b2b273f9 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -216,7 +216,7 @@ Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk, if (blk->isDirty()) { // special considerations if we're owner: - if (!deferred_response) { + if (!deferred_response && !isTopLevel) { // if we are responding immediately and can // signal that we're transferring ownership // along with exclusivity, do so diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index f3b10f6d2..bcff2f5c1 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2010 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2003-2006 The Regents of The University of Michigan * All rights reserved. * @@ -86,6 +98,28 @@ quiesce(ThreadContext *tc) } void +quiesceSkip(ThreadContext *tc) +{ + BaseCPU *cpu = tc->getCpuPtr(); + + if (!cpu->params()->do_quiesce) + return; + + EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent(); + + Tick resume = curTick() + 1; + + cpu->reschedule(quiesceEvent, resume, true); + + DPRINTF(Quiesce, "%s: quiesceSkip() until %d\n", + cpu->name(), resume); + + tc->suspend(); + if (tc->getKernelStats()) + tc->getKernelStats()->quiesce(); +} + +void quiesceNs(ThreadContext *tc, uint64_t ns) { BaseCPU *cpu = tc->getCpuPtr(); diff --git a/src/sim/pseudo_inst.hh b/src/sim/pseudo_inst.hh index 296b1556b..aec3b5d8a 100644 --- a/src/sim/pseudo_inst.hh +++ b/src/sim/pseudo_inst.hh @@ -45,6 +45,7 @@ extern bool doQuiesce; #if FULL_SYSTEM void arm(ThreadContext *tc); void quiesce(ThreadContext *tc); +void quiesceSkip(ThreadContext *tc); void quiesceNs(ThreadContext *tc, uint64_t ns); void quiesceCycles(ThreadContext *tc, uint64_t cycles); uint64_t quiesceTime(ThreadContext *tc); diff --git a/src/sim/syscall_emul.cc b/src/sim/syscall_emul.cc index e0469744e..506b22fdf 100644 --- a/src/sim/syscall_emul.cc +++ b/src/sim/syscall_emul.cc @@ -98,6 +98,18 @@ ignoreFunc(SyscallDesc *desc, int callnum, LiveProcess *process, SyscallReturn +ignoreWarnOnceFunc(SyscallDesc *desc, int callnum, LiveProcess *process, + ThreadContext *tc) +{ + int index = 0; + warn_once("ignoring syscall %s(%d, %d, ...)", desc->name, + process->getSyscallArg(tc, index), process->getSyscallArg(tc, index)); + + return 0; +} + + +SyscallReturn exitFunc(SyscallDesc *desc, int callnum, LiveProcess *process, ThreadContext *tc) { @@ -802,6 +814,8 @@ cloneFunc(SyscallDesc *desc, int callnum, LiveProcess *process, for (int y = 8; y < 32; y++) ctc->setIntReg(y, tc->readIntReg(y)); + #elif THE_ISA == ARM_ISA + TheISA::copyRegs(tc, ctc); #else fatal("sys_clone is not implemented for this ISA\n"); #endif diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index 1dc51ad56..5091c275d 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -187,6 +187,8 @@ SyscallReturn unimplementedFunc(SyscallDesc *desc, int num, /// trace flag is enabled. Return success to the target program. SyscallReturn ignoreFunc(SyscallDesc *desc, int num, LiveProcess *p, ThreadContext *tc); +SyscallReturn ignoreWarnOnceFunc(SyscallDesc *desc, int num, + LiveProcess *p, ThreadContext *tc); /// Target exit() handler: terminate current context. SyscallReturn exitFunc(SyscallDesc *desc, int num, |