diff options
Diffstat (limited to 'src')
32 files changed, 620 insertions, 155 deletions
diff --git a/src/arch/alpha/tlb.cc b/src/arch/alpha/tlb.cc index 214b2579f..f701c423d 100644 --- a/src/arch/alpha/tlb.cc +++ b/src/arch/alpha/tlb.cc @@ -64,6 +64,7 @@ TLB::TLB(const string &name, int s) { table = new PTE[size]; memset(table, 0, sizeof(PTE[size])); + flushCache(); } TLB::~TLB() @@ -74,23 +75,39 @@ TLB::~TLB() // look up an entry in the TLB PTE * -TLB::lookup(Addr vpn, uint8_t asn) const +TLB::lookup(Addr vpn, uint8_t asn) { // assume not found... PTE *retval = NULL; - PageTable::const_iterator i = lookupTable.find(vpn); - if (i != lookupTable.end()) { - while (i->first == vpn) { - int index = i->second; - PTE *pte = &table[index]; - assert(pte->valid); - if (vpn == pte->tag && (pte->asma || pte->asn == asn)) { - retval = pte; - break; - } + if (PTECache[0]) { + if (vpn == PTECache[0]->tag && + (PTECache[0]->asma || PTECache[0]->asn == asn)) + retval = PTECache[0]; + else if (PTECache[1]) { + if (vpn == PTECache[1]->tag && + (PTECache[1]->asma || PTECache[1]->asn == asn)) + retval = PTECache[1]; + else if (PTECache[2] && vpn == PTECache[2]->tag && + (PTECache[2]->asma || PTECache[2]->asn == asn)) + retval = PTECache[2]; + } + } - ++i; + if (retval == NULL) { + PageTable::const_iterator i = lookupTable.find(vpn); + if (i != lookupTable.end()) { + while (i->first == vpn) { + int index = i->second; + PTE *pte = &table[index]; + assert(pte->valid); + if (vpn == pte->tag && (pte->asma || pte->asn == asn)) { + retval = updateCache(pte); + break; + } + + ++i; + } } } @@ -142,6 +159,7 @@ TLB::checkCacheability(RequestPtr &req) void TLB::insert(Addr addr, PTE &pte) { + flushCache(); VAddr vaddr = addr; if (table[nlu].valid) { Addr oldvpn = table[nlu].tag; @@ -178,6 +196,7 @@ TLB::flushAll() { DPRINTF(TLB, "flushAll\n"); memset(table, 0, sizeof(PTE[size])); + flushCache(); lookupTable.clear(); nlu = 0; } @@ -185,6 +204,7 @@ TLB::flushAll() void TLB::flushProcesses() { + flushCache(); PageTable::iterator i = lookupTable.begin(); PageTable::iterator end = lookupTable.end(); while (i != end) { @@ -208,6 +228,7 @@ TLB::flushProcesses() void TLB::flushAddr(Addr addr, uint8_t asn) { + flushCache(); VAddr vaddr = addr; PageTable::iterator i = lookupTable.find(vaddr.vpn()); @@ -291,7 +312,7 @@ ITB::regStats() Fault -ITB::translate(RequestPtr &req, ThreadContext *tc) const +ITB::translate(RequestPtr &req, ThreadContext *tc) { //If this is a pal pc, then set PHYSICAL if(FULL_SYSTEM && PcPAL(req->getPC())) @@ -453,7 +474,7 @@ DTB::regStats() } Fault -DTB::translate(RequestPtr &req, ThreadContext *tc, bool write) const +DTB::translate(RequestPtr &req, ThreadContext *tc, bool write) { Addr pc = tc->readPC(); diff --git a/src/arch/alpha/tlb.hh b/src/arch/alpha/tlb.hh index ea5ba5539..a4255f3c5 100644 --- a/src/arch/alpha/tlb.hh +++ b/src/arch/alpha/tlb.hh @@ -61,7 +61,7 @@ namespace AlphaISA int nlu; // not last used entry (for replacement) void nextnlu() { if (++nlu >= size) nlu = 0; } - PTE *lookup(Addr vpn, uint8_t asn) const; + PTE *lookup(Addr vpn, uint8_t asn); public: TLB(const std::string &name, int size); @@ -88,6 +88,16 @@ namespace AlphaISA // Checkpointing virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); + + // Most recently used page table entries + PTE *PTECache[3]; + inline void flushCache() { memset(PTECache, 0, 3 * sizeof(PTE*)); } + inline PTE* updateCache(PTE *pte) { + PTECache[2] = PTECache[1]; + PTECache[1] = PTECache[0]; + PTECache[0] = pte; + return pte; + } }; class ITB : public TLB @@ -102,7 +112,7 @@ namespace AlphaISA ITB(const std::string &name, int size); virtual void regStats(); - Fault translate(RequestPtr &req, ThreadContext *tc) const; + Fault translate(RequestPtr &req, ThreadContext *tc); }; class DTB : public TLB @@ -125,7 +135,7 @@ namespace AlphaISA DTB(const std::string &name, int size); virtual void regStats(); - Fault translate(RequestPtr &req, ThreadContext *tc, bool write) const; + Fault translate(RequestPtr &req, ThreadContext *tc, bool write); }; } diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py index 64a120c4c..fb398d152 100755 --- a/src/arch/isa_parser.py +++ b/src/arch/isa_parser.py @@ -1464,6 +1464,25 @@ class MemOperand(Operand): def makeAccSize(self): return self.size +class UPCOperand(Operand): + def makeConstructor(self): + return '' + + def makeRead(self): + return '%s = xc->readMicroPC();\n' % self.base_name + + def makeWrite(self): + return 'xc->setMicroPC(%s);\n' % self.base_name + +class NUPCOperand(Operand): + def makeConstructor(self): + return '' + + def makeRead(self): + return '%s = xc->readNextMicroPC();\n' % self.base_name + + def makeWrite(self): + return 'xc->setNextMicroPC(%s);\n' % self.base_name class NPCOperand(Operand): def makeConstructor(self): diff --git a/src/arch/x86/insts/microregop.cc b/src/arch/x86/insts/microregop.cc index e67a82d4f..b6a30d6a7 100644 --- a/src/arch/x86/insts/microregop.cc +++ b/src/arch/x86/insts/microregop.cc @@ -69,19 +69,23 @@ namespace X86ISA DPRINTF(Sparc, "flagMask = %#x\n", flagMask); uint64_t flags = oldFlags & ~flagMask; if(flagMask & CFBit) + { if(findCarry(dataSize*8, _dest, _src1, _src2)) flags |= CFBit; if(subtract) flags ^= CFBit; + } if(flagMask & PFBit && findParity(dataSize*8, _dest)) flags |= PFBit; if(flagMask & ECFBit && findCarry(dataSize*8, _dest, _src1, _src2)) flags |= ECFBit; if(flagMask & AFBit) + { if(findCarry(4, _dest, _src1, _src2)) flags |= AFBit; if(subtract) flags ^= AFBit; + } if(flagMask & EZFBit && findZero(dataSize*8, _dest)) flags |= EZFBit; if(flagMask & ZFBit && findZero(dataSize*8, _dest)) @@ -112,8 +116,9 @@ namespace X86ISA panic("This condition is not implemented!"); case ConditionTests::MSTRC: panic("This condition is not implemented!"); - case ConditionTests::STRZnZF: - panic("This condition is not implemented!"); + case ConditionTests::STRZnEZF: + return !ccflags.EZF & ccflags.ZF; + //And no interrupts or debug traps are waiting case ConditionTests::OF: return ccflags.OF; case ConditionTests::CF: @@ -144,8 +149,9 @@ namespace X86ISA panic("This condition is not implemented!"); case ConditionTests::NotMSTRC: panic("This condition is not implemented!"); - case ConditionTests::NotSTRZnZF: - panic("This condition is not implemented!"); + case ConditionTests::STRnZnEZF: + return !ccflags.EZF & !ccflags.ZF; + //And no interrupts or debug traps are waiting case ConditionTests::NotOF: return !ccflags.OF; case ConditionTests::NotCF: diff --git a/src/arch/x86/insts/microregop.hh b/src/arch/x86/insts/microregop.hh index f465ac651..f6bebb763 100644 --- a/src/arch/x86/insts/microregop.hh +++ b/src/arch/x86/insts/microregop.hh @@ -73,7 +73,7 @@ namespace X86ISA MSTRZ, STRZ, MSTRC, - STRZnZF, + STRZnEZF, OF, CF, ZF, @@ -91,7 +91,7 @@ namespace X86ISA NotMSTRZ, NotSTRZ, NotMSTRC, - NotSTRZnZF, + STRnZnEZF, NotOF, NotCF, NotZF, diff --git a/src/arch/x86/isa/decoder/one_byte_opcodes.isa b/src/arch/x86/isa/decoder/one_byte_opcodes.isa index cce07d6fe..ee7fbc683 100644 --- a/src/arch/x86/isa/decoder/one_byte_opcodes.isa +++ b/src/arch/x86/isa/decoder/one_byte_opcodes.isa @@ -342,8 +342,8 @@ 0x3: stos_Yv_rAX(); 0x4: lods_Al_Xb(); 0x5: lods_rAX_Xv(); - 0x6: scas_Yb_Al(); - 0x7: scas_Yv_rAX(); + 0x6: StringInst::SCAS(Yb); + 0x7: StringInst::SCAS(Yv); } format Inst { 0x16: MOV(Bb,Ib); diff --git a/src/arch/x86/isa/formats/formats.isa b/src/arch/x86/isa/formats/formats.isa index 1e7bb4a74..6906413c0 100644 --- a/src/arch/x86/isa/formats/formats.isa +++ b/src/arch/x86/isa/formats/formats.isa @@ -99,6 +99,10 @@ //thing on a variety of inputs ##include "multi.isa" +//Include a format which implements an extra layer of decoding to handle the +//repe and repne prefixes +##include "string.isa" + //Include a format which makes instructions who's sole purpose is to generate //a syscall. ##include "syscall.isa" diff --git a/src/arch/x86/isa/formats/string.isa b/src/arch/x86/isa/formats/string.isa new file mode 100644 index 000000000..cd182ff62 --- /dev/null +++ b/src/arch/x86/isa/formats/string.isa @@ -0,0 +1,88 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2007 The Hewlett-Packard Development Company +// All rights reserved. +// +// Redistribution and use of this software in source and binary forms, +// with or without modification, are permitted provided that the +// following conditions are met: +// +// The software must be used only for Non-Commercial Use which means any +// use which is NOT directed to receiving any direct monetary +// compensation for, or commercial advantage from such use. Illustrative +// examples of non-commercial use are academic research, personal study, +// teaching, education and corporate research & development. +// Illustrative examples of commercial use are distributing products for +// commercial advantage and providing services using the software for +// commercial advantage. +// +// If you wish to use this software or functionality therein that may be +// covered by patents for commercial use, please contact: +// Director of Intellectual Property Licensing +// Office of Strategy and Technology +// Hewlett-Packard Company +// 1501 Page Mill Road +// Palo Alto, California 94304 +// +// Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. Redistributions +// in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or +// other materials provided with the distribution. Neither the name of +// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. No right of +// sublicense is granted herewith. Derivatives of the software and +// output created using the software may be prepared, but only for +// Non-Commercial Uses. Derivatives of the software may be shared with +// others provided: (i) the others agree to abide by the list of +// conditions herein which includes the Non-Commercial Use restrictions; +// and (ii) such Derivatives of the software include the above copyright +// notice to acknowledge the contribution from this software where +// applicable, this list of conditions and the disclaimer below. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +////////////////////////////////////////////////////////////////////////// +// +// String Instructions +// +////////////////////////////////////////////////////////////////////////// + +def format StringInst(*opTypeSet) {{ + allBlocks = OutputBlocks() + + regBlocks = specializeInst(Name, list(opTypeSet), EmulEnv()) + eBlocks = specializeInst(Name + "_E", list(opTypeSet), EmulEnv()) + nBlocks = specializeInst(Name + "_N", list(opTypeSet), EmulEnv()) + + for blocks in (regBlocks, eBlocks, nBlocks): + allBlocks.header_output += blocks.header_output + allBlocks.decoder_output += blocks.decoder_output + allBlocks.exec_output += blocks.exec_output + + allBlocks.decode_block = ''' + if (LEGACY_REP) { + %s + } else if (LEGACY_REPNE) { + %s + } else { + %s + } + ''' % (eBlocks.decode_block, nBlocks.decode_block, regBlocks.decode_block) + + (header_output, decoder_output, + decode_block, exec_output) = allBlocks.makeList() +}}; diff --git a/src/arch/x86/isa/insts/string/scan_string.py b/src/arch/x86/isa/insts/string/scan_string.py index cd3d5b549..b038cc00a 100644 --- a/src/arch/x86/isa/insts/string/scan_string.py +++ b/src/arch/x86/isa/insts/string/scan_string.py @@ -53,16 +53,55 @@ # # Authors: Gabe Black -microcode = "" -#let {{ -# class SCAS(Inst): -# "GenFault ${new UnimpInstFault}" -# class SCASB(Inst): -# "GenFault ${new UnimpInstFault}" -# class SCASW(Inst): -# "GenFault ${new UnimpInstFault}" -# class SCASD(Inst): -# "GenFault ${new UnimpInstFault}" -# class SCASQ(Inst): -# "GenFault ${new UnimpInstFault}" -#}}; +microcode = ''' +def macroop SCAS_M { + # Find the constant we need to either add or subtract from rdi + ruflag t0, 10 + movi t2, t2, dsz, flags=(CEZF,), dataSize=asz + subi t3, t0, dsz, dataSize=asz + mov t2, t2, t3, flags=(nCEZF,), dataSize=asz + + ld t1, es, [1, t0, rdi] + sub t0, t1, rax, flags=(OF, SF, ZF, AF, PF, CF) + + add rdi, rdi, t2, dataSize=asz +}; + +# +# Versions which have the rep prefix. These could benefit from some loop +# unrolling. +# + +def macroop SCAS_E_M { + # Find the constant we need to either add or subtract from rdi + ruflag t0, 10 + movi t2, t2, dsz, flags=(CEZF,), dataSize=asz + subi t3, t0, dsz, dataSize=asz + mov t2, t2, t3, flags=(nCEZF,), dataSize=asz + + ld t1, es, [1, t0, rdi] + sub t0, t1, rax, flags=(OF, SF, ZF, AF, PF, CF) + + subi rcx, rcx, 1, flags=(EZF,), dataSize=asz + add rdi, rdi, t2, dataSize=asz + bri t0, 4, flags=(CSTRZnEZF,) + fault "NoFault" +}; + +def macroop SCAS_N_M { + # Find the constant we need to either add or subtract from rdi + ruflag t0, 10 + movi t2, t2, dsz, flags=(CEZF,), dataSize=asz + subi t3, t0, dsz, dataSize=asz + mov t2, t2, t3, flags=(nCEZF,), dataSize=asz + + ld t1, es, [1, t0, rdi] + sub t0, t1, rax, flags=(OF, SF, ZF, AF, PF, CF) + + subi rcx, rcx, 1, flags=(EZF,), dataSize=asz + add rdi, rdi, t2, dataSize=asz + bri t0, 4, flags=(CSTRnZnEZF,) + fault "NoFault" +}; + +''' diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa index 5c567a30c..af3148631 100644 --- a/src/arch/x86/isa/microasm.isa +++ b/src/arch/x86/isa/microasm.isa @@ -89,7 +89,7 @@ let {{ "index" : "env.index", "base" : "env.base", "dsz" : "env.dataSize", - "osz" : "env.operandSize", + "asz" : "env.addressSize", "ssz" : "env.stackSize" } assembler.symbols.update(symbols) @@ -107,11 +107,13 @@ let {{ assembler.symbols[flag] = flag + "Bit" for cond in ('True', 'False', 'ECF', 'EZF', 'SZnZF', - 'MSTRZ', 'STRZ', 'MSTRC', 'STRZnZF', + 'MSTRZ', 'STRZ', 'MSTRC', 'OF', 'CF', 'ZF', 'CvZF', 'SF', 'PF', 'SxOF', 'SxOvZF'): assembler.symbols["C%s" % cond] = "ConditionTests::%s" % cond assembler.symbols["nC%s" % cond] = "ConditionTests::Not%s" % cond + assembler.symbols["CSTRZnEZF"] = "ConditionTests::STRZnEZF" + assembler.symbols["CSTRnZnEZF"] = "ConditionTests::STRnZnEZF" assembler.symbols["CTrue"] = "ConditionTests::True" assembler.symbols["CFalse"] = "ConditionTests::False" diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index ac88be657..608b86a70 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -89,7 +89,7 @@ def template MicroRegOpExecute {{ }}; def template MicroRegOpImmExecute {{ - Fault %(class_name)sImm::execute(%(CPU_exec_context)s *xc, + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { Fault fault = NoFault; @@ -140,21 +140,21 @@ def template MicroRegOpDeclare {{ def template MicroRegOpImmDeclare {{ - class %(class_name)sImm : public %(base_class)s + class %(class_name)s : public %(base_class)s { protected: void buildMe(); public: - %(class_name)sImm(ExtMachInst _machInst, + %(class_name)s(ExtMachInst _machInst, const char * instMnem, bool isMicro, bool isDelayed, bool isFirst, bool isLast, - RegIndex _src1, uint8_t _imm8, RegIndex _dest, + RegIndex _src1, uint16_t _imm8, RegIndex _dest, uint8_t _dataSize, uint16_t _ext); - %(class_name)sImm(ExtMachInst _machInst, + %(class_name)s(ExtMachInst _machInst, const char * instMnem, - RegIndex _src1, uint8_t _imm8, RegIndex _dest, + RegIndex _src1, uint16_t _imm8, RegIndex _dest, uint8_t _dataSize, uint16_t _ext); %(BasicExecDeclare)s @@ -196,14 +196,14 @@ def template MicroRegOpConstructor {{ def template MicroRegOpImmConstructor {{ - inline void %(class_name)sImm::buildMe() + inline void %(class_name)s::buildMe() { %(constructor)s; } - inline %(class_name)sImm::%(class_name)sImm( + inline %(class_name)s::%(class_name)s( ExtMachInst machInst, const char * instMnem, - RegIndex _src1, uint8_t _imm8, RegIndex _dest, + RegIndex _src1, uint16_t _imm8, RegIndex _dest, uint8_t _dataSize, uint16_t _ext) : %(base_class)s(machInst, "%(mnemonic)s", instMnem, false, false, false, false, @@ -213,10 +213,10 @@ def template MicroRegOpImmConstructor {{ buildMe(); } - inline %(class_name)sImm::%(class_name)sImm( + inline %(class_name)s::%(class_name)s( ExtMachInst machInst, const char * instMnem, bool isMicro, bool isDelayed, bool isFirst, bool isLast, - RegIndex _src1, uint8_t _imm8, RegIndex _dest, + RegIndex _src1, uint16_t _imm8, RegIndex _dest, uint8_t _dataSize, uint16_t _ext) : %(base_class)s(machInst, "%(mnemonic)s", instMnem, isMicro, isDelayed, isFirst, isLast, @@ -310,7 +310,7 @@ let {{ exec_output = "" # A function which builds the C++ classes that implement the microops - def setUpMicroRegOp(name, Name, base, code, flagCode = "", condCheck = "true", elseCode = ";"): + def setUpMicroRegOp(name, Name, base, code, flagCode = "", condCheck = "true", elseCode = ";", imm=False): global header_output global decoder_output global exec_output @@ -321,9 +321,14 @@ let {{ "flag_code" : flagCode, "cond_check" : condCheck, "else_code" : elseCode}) - header_output += MicroRegOpDeclare.subst(iop) - decoder_output += MicroRegOpConstructor.subst(iop) - exec_output += MicroRegOpExecute.subst(iop) + if imm: + header_output += MicroRegOpImmDeclare.subst(iop) + decoder_output += MicroRegOpImmConstructor.subst(iop) + exec_output += MicroRegOpImmExecute.subst(iop) + else: + header_output += MicroRegOpDeclare.subst(iop) + decoder_output += MicroRegOpConstructor.subst(iop) + exec_output += MicroRegOpExecute.subst(iop) checkCCFlagBits = "checkCondition(ccFlagBits)" @@ -397,10 +402,11 @@ let {{ microopClasses[name + 'i'] = RegOpChildImm - setUpMicroRegOp(name + "i", Name + "Imm", "X86ISA::RegOpImm", immCode); + setUpMicroRegOp(name + "i", Name + "Imm", "X86ISA::RegOpImm", \ + immCode, imm=True); setUpMicroRegOp(name + "i", Name + "ImmFlags", "X86ISA::RegOpImm", immCode, flagCode=immFlagCode, - condCheck=condCode, elseCode=elseCode); + condCheck=condCode, elseCode=elseCode, imm=True); # This has it's own function because Wr ops have implicit destinations def defineMicroRegOpWr(mnemonic, code, elseCode=";"): @@ -434,9 +440,11 @@ let {{ microopClasses[name + 'i'] = RegOpChildImm - setUpMicroRegOp(name + 'i', Name + "Imm", "X86ISA::RegOpImm", immCode); - setUpMicroRegOp(name + 'i', Name + "ImmFlags", "X86ISA::RegOpImm", immCode, - condCheck = checkCCFlagBits, elseCode = elseCode); + setUpMicroRegOp(name + 'i', Name + "Imm", "X86ISA::RegOpImm", \ + immCode, imm=True); + setUpMicroRegOp(name + 'i', Name + "ImmFlags", "X86ISA::RegOpImm", \ + immCode, condCheck = checkCCFlagBits, elseCode = elseCode, \ + imm=True); # This has it's own function because Rd ops don't always have two parameters def defineMicroRegOpRd(mnemonic, code): @@ -444,29 +452,52 @@ let {{ name = mnemonic.lower() class RegOpChild(RegOp): + className = Name + mnemonic = name def __init__(self, dest, src1 = "NUM_INTREGS", dataSize="env.dataSize"): super(RegOpChild, self).__init__(dest, src1, "NUM_INTREGS", None, dataSize) - self.className = Name - self.mnemonic = name microopClasses[name] = RegOpChild setUpMicroRegOp(name, Name, "X86ISA::RegOp", code); - def defineMicroRegOpImm(mnemonic, code): + def defineMicroRegOpImm(mnemonic, code, flagCode=""): Name = mnemonic name = mnemonic.lower() code = immPick + code class RegOpChild(RegOpImm): - def __init__(self, dest, src1, src2, dataSize="env.dataSize"): - super(RegOpChild, self).__init__(dest, src1, src2, None, dataSize) - self.className = Name - self.mnemonic = name + className = Name + mnemonic = name + def __init__(self, dest, src1, src2, \ + flags=None, dataSize="env.dataSize"): + super(RegOpChild, self).__init__(dest, \ + src1, src2, flags, dataSize) microopClasses[name] = RegOpChild - setUpMicroRegOp(name, Name, "X86ISA::RegOpImm", code); + setUpMicroRegOp(name, Name, "X86ISA::RegOpImm", code, imm=True); + setUpMicroRegOp(name, Name + "Flags", "X86ISA::RegOpImm", \ + code, flagCode=flagCode, imm=True); + + def defineMicroRegOpRdImm(mnemonic, code, flagCode=""): + Name = mnemonic + name = mnemonic.lower() + code = immPick + code + + class RegOpChildRdImm(RegOpImm): + className = Name + mnemonic = name + def __init__(self, dest, imm, flags=None, \ + dataSize="env.dataSize"): + super(RegOpChildRdImm, self).__init__(dest, \ + "NUM_INTREGS", imm, flags, dataSize) + + microopClasses[name] = RegOpChildRdImm + + setUpMicroRegOp(name, Name, "X86ISA::RegOpImm", code, imm=True); + setUpMicroRegOp(name, Name + "Flags", "X86ISA::RegOpImm", \ + code, flagCode=flagCode, imm=True); defineMicroRegOp('Add', 'DestReg = merge(DestReg, psrc1 + op2, dataSize)') defineMicroRegOp('Or', 'DestReg = merge(DestReg, psrc1 | op2, dataSize);', @@ -615,12 +646,17 @@ let {{ ''') defineMicroRegOpWr('Wrip', 'RIP = psrc1 + op2', elseCode="RIP = RIP;") + defineMicroRegOpWr('Br', 'nuIP = psrc1 + op2;', elseCode='nuIP = nuIP;') defineMicroRegOpWr('Wruflags', 'ccFlagBits = psrc1 ^ op2') defineMicroRegOpRd('Rdip', 'DestReg = RIP') defineMicroRegOpRd('Ruflags', 'DestReg = ccFlagBits') - defineMicroRegOpImm('Ruflag', 'DestReg = bits(ccFlagBits, imm8);', \ - flagCode = genCCFlagBitsLogic) + defineMicroRegOpRdImm('Ruflag', ''' + int flag = bits(ccFlagBits, (1 << imm8) + 0*psrc1); + DestReg = merge(DestReg, flag, dataSize); + ccFlagBits = ccFlagBits & ~EZFBit; + ccFlagBits = ccFlagBits | ((flag == 0) ? EZFBit : 0); + ''') defineMicroRegOpImm('Sext', ''' IntReg val = psrc1; diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa index 64179ca98..098a75370 100644 --- a/src/arch/x86/isa/operands.isa +++ b/src/arch/x86/isa/operands.isa @@ -104,7 +104,9 @@ def operands {{ 'Data': ('IntReg', 'uqw', '(((data & 0x1C) == 4 ? foldOBit : 0) | data)', 'IsInteger', 6), 'rax': ('IntReg', 'uqw', '(INTREG_RAX)', 'IsInteger', 7), 'RIP': ('NPC', 'uqw', None, (None, None, 'IsControl'), 10), - 'ccFlagBits': ('IntReg', 'uqw', 'NUM_INTREGS + NumMicroIntRegs', None, 20), + 'uIP': ('UPC', 'uqw', None, (None, None, 'IsControl'), 11), + 'nuIP': ('NUPC', 'uqw', None, (None, None, 'IsControl'), 12), + 'ccFlagBits': ('IntReg', 'uqw', 'NUM_INTREGS + NumMicroIntRegs', None, 20), 'SegBase': ('ControlReg', 'uqw', 'MISCREG_SEG_BASE_BASE + segment', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 50), 'Mem': ('Mem', 'uqw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100) }}; diff --git a/src/base/compiler.hh b/src/base/compiler.hh index dc23ed7b3..2c655af60 100644 --- a/src/base/compiler.hh +++ b/src/base/compiler.hh @@ -40,11 +40,13 @@ #define M5_ATTR_NORETURN __attribute__((noreturn)) #define M5_PRAGMA_NORETURN(x) #define M5_DUMMY_RETURN +#define M5_VAR_USED __attribute__((unused)) #elif defined(__SUNPRO_CC) // this doesn't do anything with sun cc, but why not #define M5_ATTR_NORETURN __sun_attr__((__noreturn__)) #define M5_DUMMY_RETURN return (0); #define DO_PRAGMA(x) _Pragma(#x) +#define M5_VAR_USED #define M5_PRAGMA_NORETURN(x) DO_PRAGMA(does_not_return(x)) #else #error "Need to define compiler options in base/compiler.hh" diff --git a/src/base/range_ops.hh b/src/base/range_ops.hh new file mode 100644 index 000000000..f2b11b649 --- /dev/null +++ b/src/base/range_ops.hh @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2007 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + */ + +#ifndef __BASE_RANGE_OPS_HH__ +#define __BASE_RANGE_OPS_HH__ +#include <list> +#include <vector> + +#include "base/range.hh" + +template <class T> +inline void +FilterRangeList(std::vector<Range<T> > filter_list, std::list<Range<T> > + &range_list) { + typename std::list<Range<T> >::iterator i; + for (int x = 0; x < filter_list.size(); x++) { + for (i = range_list.begin(); i != range_list.end(); ) { + // Is the range within one of our filter ranges? + if (filter_list[x] == i->start || filter_list[x] == i->end) + range_list.erase(i++); + else + i++; + } + } +} + +#endif //__BASE_RANGE_OPS_HH__ + diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index 8be84392d..7a51650e6 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -93,10 +93,11 @@ class BaseCPU(SimObject): def connectMemPorts(self, bus): for p in self._mem_ports: - exec('self.%s = bus.port' % p) + if p != 'physmem_port': + exec('self.%s = bus.port' % p) def addPrivateSplitL1Caches(self, ic, dc): - assert(len(self._mem_ports) == 2) + assert(len(self._mem_ports) == 2 or len(self._mem_ports) == 3) self.icache = ic self.dcache = dc self.icache_port = ic.cpu_side diff --git a/src/cpu/simple/AtomicSimpleCPU.py b/src/cpu/simple/AtomicSimpleCPU.py index e97f059c1..bfd1825c2 100644 --- a/src/cpu/simple/AtomicSimpleCPU.py +++ b/src/cpu/simple/AtomicSimpleCPU.py @@ -40,4 +40,5 @@ class AtomicSimpleCPU(BaseCPU): profile = Param.Latency('0ns', "trace the kernel stack") icache_port = Port("Instruction Port") dcache_port = Port("Data Port") - _mem_ports = ['icache_port', 'dcache_port'] + physmem_port = Port("Physical Memory Port") + _mem_ports = ['icache_port', 'dcache_port', 'physmem_port'] diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 704b65f36..e2a7d5938 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -67,6 +67,10 @@ AtomicSimpleCPU::getPort(const std::string &if_name, int idx) return &dcachePort; else if (if_name == "icache_port") return &icachePort; + else if (if_name == "physmem_port") { + hasPhysMemPort = true; + return &physmemPort; + } else panic("No Such Port\n"); } @@ -83,6 +87,12 @@ AtomicSimpleCPU::init() TheISA::initCPU(tc, tc->readCpuId()); } #endif + if (hasPhysMemPort) { + bool snoop = false; + AddrRangeList pmAddrList; + physmemPort.getPeerAddressRanges(pmAddrList, snoop); + physMemAddr = *pmAddrList.begin(); + } } bool @@ -141,7 +151,8 @@ AtomicSimpleCPU::DcachePort::setPeer(Port *port) AtomicSimpleCPU::AtomicSimpleCPU(Params *p) : BaseSimpleCPU(p), tickEvent(this), width(p->width), simulate_stalls(p->simulate_stalls), - icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this) + icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this), + physmemPort(name() + "-iport", this), hasPhysMemPort(false) { _status = Idle; @@ -293,8 +304,12 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) if (req->isMmapedIpr()) dcache_latency = TheISA::handleIprRead(thread->getTC(), &pkt); - else - dcache_latency = dcachePort.sendAtomic(&pkt); + else { + if (hasPhysMemPort && pkt.getAddr() == physMemAddr) + dcache_latency = physmemPort.sendAtomic(&pkt); + else + dcache_latency = dcachePort.sendAtomic(&pkt); + } dcache_access = true; assert(!pkt.isError()); @@ -402,7 +417,10 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) dcache_latency = TheISA::handleIprWrite(thread->getTC(), &pkt); } else { data = htog(data); - dcache_latency = dcachePort.sendAtomic(&pkt); + if (hasPhysMemPort && pkt.getAddr() == physMemAddr) + dcache_latency = physmemPort.sendAtomic(&pkt); + else + dcache_latency = dcachePort.sendAtomic(&pkt); } dcache_access = true; assert(!pkt.isError()); @@ -513,7 +531,12 @@ AtomicSimpleCPU::tick() Packet::Broadcast); ifetch_pkt.dataStatic(&inst); - icache_latency = icachePort.sendAtomic(&ifetch_pkt); + if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr) + icache_latency = physmemPort.sendAtomic(&ifetch_pkt); + else + icache_latency = icachePort.sendAtomic(&ifetch_pkt); + + // ifetch_req is initialized to read the instruction directly // into the CPU object's inst field. //} diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 28e883b24..96429e5b1 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -121,6 +121,8 @@ class AtomicSimpleCPU : public BaseSimpleCPU }; DcachePort dcachePort; + CpuPort physmemPort; + bool hasPhysMemPort; Request ifetch_req; Request data_read_req; Request data_write_req; @@ -128,6 +130,8 @@ class AtomicSimpleCPU : public BaseSimpleCPU bool dcache_access; Tick dcache_latency; + Range<Addr> physMemAddr; + public: virtual Port *getPort(const std::string &if_name, int idx = -1); diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 0550aa036..22ffff3b9 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -291,11 +291,15 @@ class BaseSimpleCPU : public BaseCPU } uint64_t readPC() { return thread->readPC(); } + uint64_t readMicroPC() { return thread->readMicroPC(); } uint64_t readNextPC() { return thread->readNextPC(); } + uint64_t readNextMicroPC() { return thread->readNextMicroPC(); } uint64_t readNextNPC() { return thread->readNextNPC(); } void setPC(uint64_t val) { thread->setPC(val); } + void setMicroPC(uint64_t val) { thread->setMicroPC(val); } void setNextPC(uint64_t val) { thread->setNextPC(val); } + void setNextMicroPC(uint64_t val) { thread->setNextMicroPC(val); } void setNextNPC(uint64_t val) { thread->setNextNPC(val); } MiscReg readMiscRegNoEffect(int misc_reg) diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index f32b61ee5..2e1ebd766 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -353,9 +353,7 @@ class StaticInst : public StaticInstBase StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass) : StaticInstBase(__opClass), machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0) - { - memset(&recentDecodes, 0, 2 * sizeof(cacheElement)); - } + { } public: @@ -459,6 +457,9 @@ class StaticInst : public StaticInstBase struct cacheElement { Addr page_addr; AddrDecodePage *decodePage; + + cacheElement() + :decodePage(NULL) { } } ; /// An array of recently decoded instructions. diff --git a/src/dev/io_device.hh b/src/dev/io_device.hh index e65400ca2..876166adb 100644 --- a/src/dev/io_device.hh +++ b/src/dev/io_device.hh @@ -266,8 +266,7 @@ class DmaDevice : public PioDevice void dmaWrite(Addr addr, int size, Event *event, uint8_t *data) { - dmaPort->dmaAction(MemCmd::WriteInvalidateReq, - addr, size, event, data); + dmaPort->dmaAction(MemCmd::WriteReq, addr, size, event, data); } void dmaRead(Addr addr, int size, Event *event, uint8_t *data) diff --git a/src/mem/Bridge.py b/src/mem/Bridge.py index 8377221cd..b48e1684d 100644 --- a/src/mem/Bridge.py +++ b/src/mem/Bridge.py @@ -40,5 +40,7 @@ class Bridge(MemObject): delay = Param.Latency('0ns', "The latency of this bridge") nack_delay = Param.Latency('0ns', "The latency of this bridge") write_ack = Param.Bool(False, "Should this bridge ack writes") - fix_partial_write_a = Param.Bool(False, "Should this bridge fixup partial block writes") - fix_partial_write_b = Param.Bool(False, "Should this bridge fixup partial block writes") + filter_ranges_a = VectorParam.AddrRange([], + "What addresses shouldn't be passed through the side of the bridge") + filter_ranges_b = VectorParam.AddrRange([], + "What addresses shouldn't be passed through the side of the bridge") diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index 6cfa5a2ac..c502c5130 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -37,6 +37,7 @@ #include <algorithm> +#include "base/range_ops.hh" #include "base/trace.hh" #include "mem/bridge.hh" #include "params/Bridge.hh" @@ -44,9 +45,10 @@ Bridge::BridgePort::BridgePort(const std::string &_name, Bridge *_bridge, BridgePort *_otherPort, int _delay, int _nack_delay, int _req_limit, - int _resp_limit, bool fix_partial_write) + int _resp_limit, + std::vector<Range<Addr> > filter_ranges) : Port(_name), bridge(_bridge), otherPort(_otherPort), - delay(_delay), nackDelay(_nack_delay), fixPartialWrite(fix_partial_write), + delay(_delay), nackDelay(_nack_delay), filterRanges(filter_ranges), outstandingResponses(0), queuedRequests(0), inRetry(false), reqQueueLimit(_req_limit), respQueueLimit(_resp_limit), sendEvent(this) { @@ -55,9 +57,9 @@ Bridge::BridgePort::BridgePort(const std::string &_name, Bridge::Bridge(Params *p) : MemObject(p->name), portA(p->name + "-portA", this, &portB, p->delay, p->nack_delay, - p->req_size_a, p->resp_size_a, p->fix_partial_write_a), + p->req_size_a, p->resp_size_a, p->filter_ranges_a), portB(p->name + "-portB", this, &portA, p->delay, p->nack_delay, - p->req_size_b, p->resp_size_b, p->fix_partial_write_b), + p->req_size_b, p->resp_size_b, p->filter_ranges_b), ackWrites(p->write_ack), _params(p) { if (ackWrites) @@ -243,17 +245,6 @@ Bridge::BridgePort::trySend() PacketPtr pkt = buf->pkt; - // Ugly! @todo When multilevel coherence works this will be removed - if (pkt->cmd == MemCmd::WriteInvalidateReq && fixPartialWrite && - !pkt->wasNacked()) { - PacketPtr funcPkt = new Packet(pkt->req, MemCmd::WriteReq, - Packet::Broadcast); - funcPkt->dataStatic(pkt->getPtr<uint8_t>()); - sendFunctional(funcPkt); - pkt->cmd = MemCmd::WriteReq; - delete funcPkt; - } - DPRINTF(BusBridge, "trySend: origSrc %d dest %d addr 0x%x\n", buf->origSrc, pkt->getDest(), pkt->getAddr()); @@ -313,17 +304,6 @@ Bridge::BridgePort::recvRetry() Tick Bridge::BridgePort::recvAtomic(PacketPtr pkt) { - // fix partial atomic writes... similar to the timing code that does the - // same... will be removed once our code gets this right - if (pkt->cmd == MemCmd::WriteInvalidateReq && fixPartialWrite) { - - PacketPtr funcPkt = new Packet(pkt->req, MemCmd::WriteReq, - Packet::Broadcast); - funcPkt->dataStatic(pkt->getPtr<uint8_t>()); - otherPort->sendFunctional(funcPkt); - delete funcPkt; - pkt->cmd = MemCmd::WriteReq; - } return delay + otherPort->sendAtomic(pkt); } @@ -355,6 +335,7 @@ Bridge::BridgePort::getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) { otherPort->getPeerAddressRanges(resp, snoop); + FilterRangeList(filterRanges, resp); // we don't allow snooping across bridges snoop = false; } diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh index d3bbf2ddf..82001948e 100644 --- a/src/mem/bridge.hh +++ b/src/mem/bridge.hh @@ -70,7 +70,8 @@ class Bridge : public MemObject /** Min delay to respond to a nack. */ Tick nackDelay; - bool fixPartialWrite; + /** Pass ranges from one side of the bridge to the other? */ + std::vector<Range<Addr> > filterRanges; class PacketBuffer : public Packet::SenderState { @@ -156,7 +157,8 @@ class Bridge : public MemObject /** Constructor for the BusPort.*/ BridgePort(const std::string &_name, Bridge *_bridge, BridgePort *_otherPort, int _delay, int _nack_delay, - int _req_limit, int _resp_limit, bool fix_partial_write); + int _req_limit, int _resp_limit, + std::vector<Range<Addr> > filter_ranges); protected: diff --git a/src/mem/bus.cc b/src/mem/bus.cc index cb359734b..42c4431bb 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -84,6 +84,7 @@ Bus::deletePortRefs(Port *p) if (funcPort == bp) return; interfaces.erase(bp->getId()); + clearBusCache(); delete bp; } @@ -176,7 +177,16 @@ Bus::recvTiming(PacketPtr pkt) DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n", src, pkt->getDest(), pkt->getAddr(), pkt->cmdString()); - BusPort *src_port = (src == defaultId) ? defaultPort : interfaces[src]; + BusPort *src_port; + if (src == defaultId) + src_port = defaultPort; + else { + src_port = checkBusCache(src); + if (src_port == NULL) { + src_port = interfaces[src]; + updateBusCache(src, src_port); + } + } // If the bus is busy, or other devices are in line ahead of the current // one, put this device on the retry list. @@ -201,25 +211,28 @@ Bus::recvTiming(PacketPtr pkt) dest_port_id = findPort(pkt->getAddr()); dest_port = (dest_port_id == defaultId) ? defaultPort : interfaces[dest_port_id]; - for (SnoopIter s_iter = snoopPorts.begin(); - s_iter != snoopPorts.end(); - s_iter++) { + SnoopIter s_end = snoopPorts.end(); + for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) { BusPort *p = *s_iter; if (p != dest_port && p != src_port) { -#ifndef NDEBUG // cache is not allowed to refuse snoop - bool success = p->sendTiming(pkt); + bool success M5_VAR_USED = p->sendTiming(pkt); assert(success); -#else - // avoid unused variable warning - p->sendTiming(pkt); -#endif } } } else { assert(dest >= 0 && dest < maxId); assert(dest != src); // catch infinite loops dest_port_id = dest; + if (dest_port_id == defaultId) + dest_port = defaultPort; + else { + dest_port = checkBusCache(dest); + if (dest_port == NULL) { + dest_port = interfaces[dest_port_id]; + // updateBusCache(dest_port_id, dest_port); + } + } dest_port = (dest_port_id == defaultId) ? defaultPort : interfaces[dest_port_id]; } @@ -291,15 +304,19 @@ Bus::findPort(Addr addr) /* An interval tree would be a better way to do this. --ali. */ int dest_id = -1; - PortIter i = portMap.find(RangeSize(addr,1)); - if (i != portMap.end()) - dest_id = i->second; + dest_id = checkPortCache(addr); + if (dest_id == -1) { + PortIter i = portMap.find(RangeSize(addr,1)); + if (i != portMap.end()) + dest_id = i->second; + updatePortCache(dest_id, i->first.start, i->first.end); + } // Check if this matches the default range if (dest_id == -1) { - for (AddrRangeIter iter = defaultRange.begin(); - iter != defaultRange.end(); iter++) { - if (*iter == addr) { + AddrRangeIter a_end = defaultRange.end(); + for (AddrRangeIter i = defaultRange.begin(); i != a_end; i++) { + if (*i == addr) { DPRINTF(Bus, " found addr %#llx on default\n", addr); return defaultId; } @@ -340,8 +357,16 @@ Bus::recvAtomic(PacketPtr pkt) int orig_src = pkt->getSrc(); int target_port_id = findPort(pkt->getAddr()); - Port *target_port = (target_port_id == defaultId) ? - defaultPort : interfaces[target_port_id]; + BusPort *target_port; + if (target_port_id == defaultId) + target_port = defaultPort; + else { + target_port = checkBusCache(target_port_id); + if (target_port == NULL) { + target_port = interfaces[target_port_id]; + updateBusCache(target_port_id, target_port); + } + } SnoopIter s_end = snoopPorts.end(); for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) { @@ -406,9 +431,8 @@ Bus::recvFunctional(PacketPtr pkt) assert(pkt->isRequest()); // hasn't already been satisfied - for (SnoopIter s_iter = snoopPorts.begin(); - s_iter != snoopPorts.end(); - s_iter++) { + SnoopIter s_end = snoopPorts.end(); + for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) { BusPort *p = *s_iter; if (p != port && p->getId() != src_id) { p->sendFunctional(pkt); @@ -433,11 +457,16 @@ Bus::recvStatusChange(Port::Status status, int id) bool snoops; AddrRangeIter iter; + if (inRecvStatusChange.count(id)) + return; + inRecvStatusChange.insert(id); + assert(status == Port::RangeChange && "The other statuses need to be implemented."); DPRINTF(BusAddrRanges, "received RangeChange from device id %d\n", id); + clearPortCache(); if (id == defaultId) { defaultRange.clear(); // Only try to update these ranges if the user set a default responder. @@ -499,6 +528,7 @@ Bus::recvStatusChange(Port::Status status, int id) if (id != defaultId && defaultPort) defaultPort->sendStatusChange(Port::RangeChange); + inRecvStatusChange.erase(id); } void @@ -557,14 +587,14 @@ Bus::findBlockSize(int id) int max_bs = -1; - for (PortIter portIter = portMap.begin(); - portIter != portMap.end(); portIter++) { - int tmp_bs = interfaces[portIter->second]->peerBlockSize(); + PortIter p_end = portMap.end(); + for (PortIter p_iter = portMap.begin(); p_iter != p_end; p_iter++) { + int tmp_bs = interfaces[p_iter->second]->peerBlockSize(); if (tmp_bs > max_bs) max_bs = tmp_bs; } - for (SnoopIter s_iter = snoopPorts.begin(); - s_iter != snoopPorts.end(); s_iter++) { + SnoopIter s_end = snoopPorts.end(); + for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) { int tmp_bs = (*s_iter)->peerBlockSize(); if (tmp_bs > max_bs) max_bs = tmp_bs; diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 06ccd4ac0..0c594c463 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -38,6 +38,7 @@ #define __MEM_BUS_HH__ #include <string> +#include <set> #include <list> #include <inttypes.h> @@ -180,6 +181,60 @@ class Bus : public MemObject */ int findPort(Addr addr); + // Cache for the findPort function storing recently used ports from portMap + struct PortCache { + bool valid; + int id; + Addr start; + Addr end; + }; + + PortCache portCache[3]; + + // Checks the cache and returns the id of the port that has the requested + // address within its range + inline int checkPortCache(Addr addr) { + if (portCache[0].valid && addr >= portCache[0].start && + addr < portCache[0].end) { + return portCache[0].id; + } + if (portCache[1].valid && addr >= portCache[1].start && + addr < portCache[1].end) { + return portCache[1].id; + } + if (portCache[2].valid && addr >= portCache[2].start && + addr < portCache[2].end) { + return portCache[2].id; + } + + return -1; + } + + // Clears the earliest entry of the cache and inserts a new port entry + inline void updatePortCache(short id, Addr start, Addr end) { + portCache[2].valid = portCache[1].valid; + portCache[2].id = portCache[1].id; + portCache[2].start = portCache[1].start; + portCache[2].end = portCache[1].end; + + portCache[1].valid = portCache[0].valid; + portCache[1].id = portCache[0].id; + portCache[1].start = portCache[0].start; + portCache[1].end = portCache[0].end; + + portCache[0].valid = true; + portCache[0].id = id; + portCache[0].start = start; + portCache[0].end = end; + } + + // Clears the cache. Needs to be called in constructor. + inline void clearPortCache() { + portCache[2].valid = false; + portCache[1].valid = false; + portCache[0].valid = false; + } + /** Process address range request. * @param resp addresses that we can respond to * @param snoop addresses that we would like to snoop @@ -199,6 +254,7 @@ class Bus : public MemObject BusFreeEvent busIdle; bool inRetry; + std::set<int> inRecvStatusChange; /** max number of bus ids we've handed out so far */ short maxId; @@ -246,6 +302,54 @@ class Bus : public MemObject int cachedBlockSize; bool cachedBlockSizeValid; + // Cache for the peer port interfaces + struct BusCache { + bool valid; + short id; + BusPort *port; + }; + + BusCache busCache[3]; + + // Checks the peer port interfaces cache for the port id and returns + // a pointer to the matching port + inline BusPort* checkBusCache(short id) { + if (busCache[0].valid && id == busCache[0].id) { + return busCache[0].port; + } + if (busCache[1].valid && id == busCache[1].id) { + return busCache[1].port; + } + if (busCache[2].valid && id == busCache[2].id) { + return busCache[2].port; + } + + return NULL; + } + + // Replaces the earliest entry in the cache with a new entry + inline void updateBusCache(short id, BusPort *port) { + busCache[2].valid = busCache[1].valid; + busCache[2].id = busCache[1].id; + busCache[2].port = busCache[1].port; + + busCache[1].valid = busCache[0].valid; + busCache[1].id = busCache[0].id; + busCache[1].port = busCache[0].port; + + busCache[0].valid = true; + busCache[0].id = id; + busCache[0].port = port; + } + + // Invalidates the cache. Needs to be called in constructor. + inline void clearBusCache() { + busCache[2].valid = false; + busCache[1].valid = false; + busCache[0].valid = false; + } + + public: /** A function used to return the port associated with this bus object. */ @@ -270,6 +374,8 @@ class Bus : public MemObject fatal("Bus width must be positive\n"); if (clock <= 0) fatal("Bus clock period must be positive\n"); + clearBusCache(); + clearPortCache(); } }; diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index 2bf44cdf9..f6d42b1ef 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -81,4 +81,8 @@ class BaseCache(MemObject): "Only prefetch on data not on instruction accesses") cpu_side = Port("Port on side closer to CPU") mem_side = Port("Port on side closer to MEM") + cpu_side_filter_ranges = VectorParam.AddrRange([], + "What addresses shouldn't be passed through the side of the bridge") + mem_side_filter_ranges = VectorParam.AddrRange([], + "What addresses shouldn't be passed through the side of the bridge") addr_range = VectorParam.AddrRange(AllMemory, "The address range in bytes") diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index b44468486..0c8b02cb3 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -40,9 +40,10 @@ using namespace std; -BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache) +BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache, + std::vector<Range<Addr> > filter_ranges) : SimpleTimingPort(_name, _cache), cache(_cache), otherPort(NULL), - blocked(false), mustSendRetry(false) + blocked(false), mustSendRetry(false), filterRanges(filter_ranges) { } diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 719ab0245..6a4eec43e 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -98,7 +98,8 @@ class BaseCache : public MemObject BaseCache *cache; protected: - CachePort(const std::string &_name, BaseCache *_cache); + CachePort(const std::string &_name, BaseCache *_cache, + std::vector<Range<Addr> > filter_ranges); virtual void recvStatusChange(Status status); @@ -124,6 +125,9 @@ class BaseCache : public MemObject bool mustSendRetry; + /** filter ranges */ + std::vector<Range<Addr> > filterRanges; + void requestBus(RequestCause cause, Tick time) { DPRINTF(CachePort, "Asserting bus request for cause %d\n", cause); @@ -367,15 +371,21 @@ class BaseCache : public MemObject */ Counter maxMisses; + std::vector<Range<Addr> > cpuSideFilterRanges; + std::vector<Range<Addr> > memSideFilterRanges; /** * Construct an instance of this parameter class. */ Params(int _hitLatency, int _blkSize, int _numMSHRs, int _numTargets, int _numWriteBuffers, - Counter _maxMisses) + Counter _maxMisses, + std::vector<Range<Addr> > cpu_side_filter_ranges, + std::vector<Range<Addr> > mem_side_filter_ranges) : hitLatency(_hitLatency), blkSize(_blkSize), numMSHRs(_numMSHRs), numTargets(_numTargets), - numWriteBuffers(_numWriteBuffers), maxMisses(_maxMisses) + numWriteBuffers(_numWriteBuffers), maxMisses(_maxMisses), + cpuSideFilterRanges(cpu_side_filter_ranges), + memSideFilterRanges(mem_side_filter_ranges) { } }; diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 57028a05e..821fa9702 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -72,7 +72,8 @@ class Cache : public BaseCache { public: CpuSidePort(const std::string &_name, - Cache<TagStore> *_cache); + Cache<TagStore> *_cache, + std::vector<Range<Addr> > filterRanges); // BaseCache::CachePort just has a BaseCache *; this function // lets us get back the type info we lost when we stored the @@ -95,7 +96,8 @@ class Cache : public BaseCache { public: MemSidePort(const std::string &_name, - Cache<TagStore> *_cache); + Cache<TagStore> *_cache, + std::vector<Range<Addr> > filterRanges); // BaseCache::CachePort just has a BaseCache *; this function // lets us get back the type info we lost when we stored the diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc index 4c9592a1b..0f8b52af2 100644 --- a/src/mem/cache/cache_builder.cc +++ b/src/mem/cache/cache_builder.cc @@ -241,7 +241,8 @@ BaseCacheParams::create() // Build BaseCache param object BaseCache::Params base_params(latency, block_size, mshrs, tgts_per_mshr, write_buffers, - max_miss_count); + max_miss_count, cpu_side_filter_ranges, + mem_side_filter_ranges); //Warnings about prefetcher policy if (prefetch_policy == Enums::none) { diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index d144266ed..402e34db2 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -39,6 +39,7 @@ #include "sim/host.hh" #include "base/misc.hh" +#include "base/range_ops.hh" #include "mem/cache/cache.hh" #include "mem/cache/cache_blk.hh" @@ -61,8 +62,10 @@ Cache<TagStore>::Cache(const std::string &_name, tempBlock = new BlkType(); tempBlock->data = new uint8_t[blkSize]; - cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this); - memSidePort = new MemSidePort(_name + "-mem_side_port", this); + cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this, + params.baseParams.cpuSideFilterRanges); + memSidePort = new MemSidePort(_name + "-mem_side_port", this, + params.baseParams.memSideFilterRanges); cpuSidePort->setOtherPort(memSidePort); memSidePort->setOtherPort(cpuSidePort); @@ -88,7 +91,8 @@ Cache<TagStore>::getPort(const std::string &if_name, int idx) } else if (if_name == "mem_side") { return memSidePort; } else if (if_name == "functional") { - return new CpuSidePort(name() + "-cpu_side_funcport", this); + return new CpuSidePort(name() + "-cpu_side_funcport", this, + std::vector<Range<Addr> >()); } else { panic("Port name %s unrecognized\n", if_name); } @@ -1221,6 +1225,7 @@ getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) // CPU side port doesn't snoop; it's a target only. bool dummy; otherPort->getPeerAddressRanges(resp, dummy); + FilterRangeList(filterRanges, resp); snoop = false; } @@ -1262,8 +1267,9 @@ Cache<TagStore>::CpuSidePort::recvFunctional(PacketPtr pkt) template<class TagStore> Cache<TagStore>:: CpuSidePort::CpuSidePort(const std::string &_name, - Cache<TagStore> *_cache) - : BaseCache::CachePort(_name, _cache) + Cache<TagStore> *_cache, std::vector<Range<Addr> > + filterRanges) + : BaseCache::CachePort(_name, _cache, filterRanges) { } @@ -1279,6 +1285,8 @@ Cache<TagStore>::MemSidePort:: getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) { otherPort->getPeerAddressRanges(resp, snoop); + FilterRangeList(filterRanges, resp); + // Memory-side port always snoops, so unconditionally set flag for // caller. snoop = true; @@ -1416,8 +1424,9 @@ Cache<TagStore>::MemSidePort::processSendEvent() template<class TagStore> Cache<TagStore>:: -MemSidePort::MemSidePort(const std::string &_name, Cache<TagStore> *_cache) - : BaseCache::CachePort(_name, _cache) +MemSidePort::MemSidePort(const std::string &_name, Cache<TagStore> *_cache, + std::vector<Range<Addr> > filterRanges) + : BaseCache::CachePort(_name, _cache, filterRanges) { // override default send event from SimpleTimingPort delete sendEvent; |