diff options
Diffstat (limited to 'src')
29 files changed, 869 insertions, 391 deletions
diff --git a/src/SConscript b/src/SConscript index cad0736c5..0ee144747 100755 --- a/src/SConscript +++ b/src/SConscript @@ -446,7 +446,7 @@ def makeInfoPyFile(target, source, env): # Generate a file that wraps the basic top level files env.Command('python/m5/info.py', - [ '#/AUTHORS', '#/LICENSE', '#/README', '#/RELEASE_NOTES' ], + [ '#/AUTHORS', '#/LICENSE', '#/README', ], MakeAction(makeInfoPyFile, Transform("INFO"))) PySource('m5', 'python/m5/info.py') diff --git a/src/arch/generic/debugfaults.hh b/src/arch/generic/debugfaults.hh new file mode 100644 index 000000000..acffadc34 --- /dev/null +++ b/src/arch/generic/debugfaults.hh @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2010 Advanced Micro Devices + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_GENERIC_DEBUGFAULTS_HH__ +#define __ARCH_GENERIC_DEBUGFAULTS_HH__ + +#include "base/misc.hh" +#include "sim/faults.hh" + +#include <string> + +namespace GenericISA +{ +class M5DebugFault : public FaultBase +{ + public: + enum DebugFunc + { + PanicFunc, + FatalFunc, + WarnFunc, + WarnOnceFunc + }; + + protected: + std::string message; + DebugFunc func; + + public: + M5DebugFault(DebugFunc _func, std::string _message) : + message(_message), func(_func) + {} + + FaultName + name() const + { + switch (func) { + case PanicFunc: + return "panic fault"; + case FatalFunc: + return "fatal fault"; + case WarnFunc: + return "warn fault"; + case WarnOnceFunc: + return "warn_once fault"; + default: + panic("unrecognized debug function number\n"); + } + } + + void + invoke(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr) + { + switch (func) { + case PanicFunc: + panic(message); + break; + case FatalFunc: + fatal(message); + break; + case WarnFunc: + warn(message); + break; + case WarnOnceFunc: + warn_once(message); + break; + default: + panic("unrecognized debug function number\n"); + } + } +}; +} // namespace GenericISA + +#endif // __ARCH_GENERIC_DEBUGFAULTS_HH__ diff --git a/src/arch/x86/SConscript b/src/arch/x86/SConscript index 27de9da11..9cb774647 100644 --- a/src/arch/x86/SConscript +++ b/src/arch/x86/SConscript @@ -46,6 +46,7 @@ if env['TARGET_ISA'] == 'x86': Source('cpuid.cc') Source('emulenv.cc') Source('faults.cc') + Source('insts/badmicroop.cc') Source('insts/microfpop.cc') Source('insts/microldstop.cc') Source('insts/micromediaop.cc') diff --git a/src/arch/x86/insts/badmicroop.cc b/src/arch/x86/insts/badmicroop.cc new file mode 100644 index 000000000..ef493f250 --- /dev/null +++ b/src/arch/x86/insts/badmicroop.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2011 Advanced Micro Devices + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/x86/insts/badmicroop.hh" +#include "arch/x86/isa_traits.hh" +#include "arch/x86/decoder.hh" + +namespace X86ISA +{ + +// This microop needs to be allocated on the heap even though it could +// theoretically be statically allocated. The reference counted pointer would +// try to delete the static memory when it was destructed. +const StaticInstPtr badMicroop = + new X86ISAInst::MicroPanic(NoopMachInst, "BAD", + StaticInst::IsMicroop | StaticInst::IsLastMicroop, + "Invalid microop!", 0); + +} // namespace X86ISA diff --git a/src/arch/x86/insts/badmicroop.hh b/src/arch/x86/insts/badmicroop.hh new file mode 100644 index 000000000..57fe242c4 --- /dev/null +++ b/src/arch/x86/insts/badmicroop.hh @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2011 Advanced Micro Devices + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_X86_INSTS_BADMICROOP_HH__ +#define __ARCH_X86_INSTS_BADMICROOP_HH__ + +class StaticInstPtr; + +namespace X86ISA +{ + +extern const StaticInstPtr badMicroop; + +} // namespace X86ISA + +#endif //__ARCH_X86_INSTS_BADMICROOP_HH__ diff --git a/src/arch/x86/insts/macroop.hh b/src/arch/x86/insts/macroop.hh index fcf051a37..4f4176b77 100644 --- a/src/arch/x86/insts/macroop.hh +++ b/src/arch/x86/insts/macroop.hh @@ -41,6 +41,7 @@ #define __ARCH_X86_INSTS_MACROOP_HH__ #include "arch/x86/emulenv.hh" +#include "arch/x86/insts/badmicroop.hh" #include "arch/x86/types.hh" #include "arch/x86/insts/static_inst.hh" @@ -76,8 +77,10 @@ class MacroopBase : public X86StaticInst StaticInstPtr fetchMicroop(MicroPC microPC) const { - assert(microPC < numMicroops); - return microops[microPC]; + if (microPC >= numMicroops) + return badMicroop; + else + return microops[microPC]; } std::string diff --git a/src/arch/x86/insts/microregop.cc b/src/arch/x86/insts/microregop.cc index 6aee87449..dedea0f3d 100644 --- a/src/arch/x86/insts/microregop.cc +++ b/src/arch/x86/insts/microregop.cc @@ -50,9 +50,6 @@ namespace X86ISA bool subtract) const { DPRINTF(X86, "flagMask = %#x\n", flagMask); - if (_destRegIdx[0] & IntFoldBit) { - _dest >>= 8; - } uint64_t flags = oldFlags & ~flagMask; if(flagMask & (ECFBit | CFBit)) { diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa index 58b1fbc62..674e69e98 100644 --- a/src/arch/x86/isa/includes.isa +++ b/src/arch/x86/isa/includes.isa @@ -53,6 +53,7 @@ output header {{ #include <sstream> #include <iostream> +#include "arch/generic/debugfaults.hh" #include "arch/x86/emulenv.hh" #include "arch/x86/insts/macroop.hh" #include "arch/x86/insts/microfpop.hh" @@ -113,6 +114,7 @@ output exec {{ #include "arch/x86/regs/misc.hh" #include "arch/x86/tlb.hh" #include "base/bigint.hh" +#include "base/compiler.hh" #include "base/condcodes.hh" #include "cpu/base.hh" #include "cpu/exetrace.hh" diff --git a/src/arch/x86/isa/microops/debug.isa b/src/arch/x86/isa/microops/debug.isa index 4b2ecdd5a..220c1af97 100644 --- a/src/arch/x86/isa/microops/debug.isa +++ b/src/arch/x86/isa/microops/debug.isa @@ -45,16 +45,29 @@ output header {{ class MicroDebugBase : public X86ISA::X86MicroopBase { protected: + typedef GenericISA::M5DebugFault::DebugFunc DebugFunc; + DebugFunc func; std::string message; uint8_t cc; public: - MicroDebugBase(ExtMachInst _machInst, const char * mnem, + MicroDebugBase(ExtMachInst machInst, const char * mnem, const char * instMnem, uint64_t setFlags, - std::string _message, uint8_t _cc); + DebugFunc _func, std::string _message, uint8_t _cc) : + X86MicroopBase(machInst, mnem, instMnem, setFlags, No_OpClass), + func(_func), message(_message), cc(_cc) + {} - std::string generateDisassembly(Addr pc, - const SymbolTable *symtab) const; + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream response; + + printMnemonic(response, instMnem, mnemonic); + response << "\"" << message << "\""; + + return response.str(); + } }; }}; @@ -70,53 +83,31 @@ def template MicroDebugDeclare {{ }}; def template MicroDebugExecute {{ - Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Fault + %(class_name)s::execute(%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { %(op_decl)s %(op_rd)s if (%(cond_test)s) { - %(func)s("%s\n", message); + return new GenericISA::M5DebugFault(func, message); + } else { + return NoFault; } - return NoFault; } }}; -output decoder {{ - inline MicroDebugBase::MicroDebugBase( - ExtMachInst machInst, const char * mnem, const char * instMnem, - uint64_t setFlags, std::string _message, uint8_t _cc) : - X86MicroopBase(machInst, mnem, instMnem, - setFlags, No_OpClass), - message(_message), cc(_cc) - { - } -}}; - def template MicroDebugConstructor {{ - inline %(class_name)s::%(class_name)s( + %(class_name)s::%(class_name)s( ExtMachInst machInst, const char * instMnem, uint64_t setFlags, std::string _message, uint8_t _cc) : %(base_class)s(machInst, "%(func)s", instMnem, - setFlags, _message, _cc) + setFlags, %(func_num)s, _message, _cc) { %(constructor)s; } }}; -output decoder {{ - std::string MicroDebugBase::generateDisassembly(Addr pc, - const SymbolTable *symtab) const - { - std::stringstream response; - - printMnemonic(response, instMnem, mnemonic); - response << "\"" << message << "\""; - - return response.str(); - } -}}; - let {{ class MicroDebug(X86Microop): def __init__(self, message, flags=None): @@ -142,13 +133,14 @@ let {{ header_output = "" decoder_output = "" - def buildDebugMicro(func): + def buildDebugMicro(func, func_num): global exec_output, header_output, decoder_output iop = InstObjParams(func, "Micro%sFlags" % func.capitalize(), "MicroDebugBase", {"code": "", "func": func, + "func_num": "GenericISA::M5DebugFault::%s" % func_num, "cond_test": "checkCondition(ccFlagBits, cc)"}) exec_output += MicroDebugExecute.subst(iop) header_output += MicroDebugDeclare.subst(iop) @@ -158,6 +150,7 @@ let {{ "MicroDebugBase", {"code": "", "func": func, + "func_num": "GenericISA::M5DebugFault::%s" % func_num, "cond_test": "true"}) exec_output += MicroDebugExecute.subst(iop) header_output += MicroDebugDeclare.subst(iop) @@ -169,8 +162,8 @@ let {{ global microopClasses microopClasses[func] = MicroDebugChild - buildDebugMicro("panic") - buildDebugMicro("fatal") - buildDebugMicro("warn") - buildDebugMicro("warn_once") + buildDebugMicro("panic", "PanicFunc") + buildDebugMicro("fatal", "FatalFunc") + buildDebugMicro("warn", "WarnFunc") + buildDebugMicro("warn_once", "WarnOnceFunc") }}; diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa index 216a74c6c..cd649d644 100644 --- a/src/arch/x86/isa/microops/ldstop.isa +++ b/src/arch/x86/isa/microops/ldstop.isa @@ -301,6 +301,46 @@ let {{ "dataSize" : self.dataSize, "addressSize" : self.addressSize, "memFlags" : self.memFlags} return allocator + + class BigLdStOp(X86Microop): + def __init__(self, data, segment, addr, disp, + dataSize, addressSize, baseFlags, atCPL0, prefetch): + self.data = data + [self.scale, self.index, self.base] = addr + self.disp = disp + self.segment = segment + self.dataSize = dataSize + self.addressSize = addressSize + self.memFlags = baseFlags + if atCPL0: + self.memFlags += " | (CPL0FlagBit << FlagShift)" + if prefetch: + self.memFlags += " | Request::PREFETCH" + self.memFlags += " | (machInst.legacy.addr ? " + \ + "(AddrSizeFlagBit << FlagShift) : 0)" + + def getAllocator(self, microFlags): + allocString = ''' + (%(dataSize)s >= 4) ? + (StaticInstPtr)(new %(class_name)sBig(machInst, + macrocodeBlock, %(flags)s, %(scale)s, %(index)s, + %(base)s, %(disp)s, %(segment)s, %(data)s, + %(dataSize)s, %(addressSize)s, %(memFlags)s)) : + (StaticInstPtr)(new %(class_name)s(machInst, + macrocodeBlock, %(flags)s, %(scale)s, %(index)s, + %(base)s, %(disp)s, %(segment)s, %(data)s, + %(dataSize)s, %(addressSize)s, %(memFlags)s)) + ''' + allocator = allocString % { + "class_name" : self.className, + "flags" : self.microFlagsText(microFlags), + "scale" : self.scale, "index" : self.index, + "base" : self.base, + "disp" : self.disp, + "segment" : self.segment, "data" : self.data, + "dataSize" : self.dataSize, "addressSize" : self.addressSize, + "memFlags" : self.memFlags} + return allocator }}; let {{ @@ -315,7 +355,8 @@ let {{ EA = bits(SegBase + scale * Index + Base + disp, addressSize * 8 - 1, 0); ''' - def defineMicroLoadOp(mnemonic, code, mem_flags="0"): + def defineMicroLoadOp(mnemonic, code, bigCode='', + mem_flags="0", big=True): global header_output global decoder_output global exec_output @@ -324,16 +365,22 @@ let {{ name = mnemonic.lower() # Build up the all register version of this micro op - iop = InstObjParams(name, Name, 'X86ISA::LdStOp', - {"code": code, - "ea_code": calculateEA}) - header_output += MicroLdStOpDeclare.subst(iop) - decoder_output += MicroLdStOpConstructor.subst(iop) - exec_output += MicroLoadExecute.subst(iop) - exec_output += MicroLoadInitiateAcc.subst(iop) - exec_output += MicroLoadCompleteAcc.subst(iop) - - class LoadOp(LdStOp): + iops = [InstObjParams(name, Name, 'X86ISA::LdStOp', + {"code": code, "ea_code": calculateEA})] + if big: + iops += [InstObjParams(name, Name + "Big", 'X86ISA::LdStOp', + {"code": bigCode, "ea_code": calculateEA})] + for iop in iops: + header_output += MicroLdStOpDeclare.subst(iop) + decoder_output += MicroLdStOpConstructor.subst(iop) + exec_output += MicroLoadExecute.subst(iop) + exec_output += MicroLoadInitiateAcc.subst(iop) + exec_output += MicroLoadCompleteAcc.subst(iop) + + base = LdStOp + if big: + base = BigLdStOp + class LoadOp(base): def __init__(self, data, segment, addr, disp = 0, dataSize="env.dataSize", addressSize="env.addressSize", @@ -346,12 +393,15 @@ let {{ microopClasses[name] = LoadOp - defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);') + defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);', + 'Data = Mem & mask(dataSize * 8);') defineMicroLoadOp('Ldst', 'Data = merge(Data, Mem, dataSize);', - '(StoreCheck << FlagShift)') + 'Data = Mem & mask(dataSize * 8);', + '(StoreCheck << FlagShift)') defineMicroLoadOp('Ldstl', 'Data = merge(Data, Mem, dataSize);', - '(StoreCheck << FlagShift) | Request::LOCKED') - defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;') + 'Data = Mem & mask(dataSize * 8);', + '(StoreCheck << FlagShift) | Request::LOCKED') + defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;', big = False) def defineMicroStoreOp(mnemonic, code, \ postCode="", completeCode="", mem_flags="0"): diff --git a/src/arch/x86/isa/microops/limmop.isa b/src/arch/x86/isa/microops/limmop.isa index 2871d5a89..ac78b090d 100644 --- a/src/arch/x86/isa/microops/limmop.isa +++ b/src/arch/x86/isa/microops/limmop.isa @@ -114,8 +114,16 @@ let {{ self.dataSize = dataSize def getAllocator(self, microFlags): - allocator = '''new %(class_name)s(machInst, macrocodeBlock, - %(flags)s, %(dest)s, %(imm)s, %(dataSize)s)''' % { + allocString = ''' + (%(dataSize)s >= 4) ? + (StaticInstPtr)(new %(class_name)sBig(machInst, + macrocodeBlock, %(flags)s, %(dest)s, %(imm)s, + %(dataSize)s)) : + (StaticInstPtr)(new %(class_name)s(machInst, + macrocodeBlock, %(flags)s, %(dest)s, %(imm)s, + %(dataSize)s)) + ''' + allocator = allocString % { "class_name" : self.className, "mnemonic" : self.mnemonic, "flags" : self.microFlagsText(microFlags), @@ -152,12 +160,15 @@ let {{ let {{ # Build up the all register version of this micro op - iop = InstObjParams("limm", "Limm", 'X86MicroopBase', - {"code" : "DestReg = merge(DestReg, imm, dataSize);"}) - header_output += MicroLimmOpDeclare.subst(iop) - decoder_output += MicroLimmOpConstructor.subst(iop) - decoder_output += MicroLimmOpDisassembly.subst(iop) - exec_output += MicroLimmOpExecute.subst(iop) + iops = [InstObjParams("limm", "Limm", 'X86MicroopBase', + {"code" : "DestReg = merge(DestReg, imm, dataSize);"}), + InstObjParams("limm", "LimmBig", 'X86MicroopBase', + {"code" : "DestReg = imm & mask(dataSize * 8);"})] + for iop in iops: + header_output += MicroLimmOpDeclare.subst(iop) + decoder_output += MicroLimmOpConstructor.subst(iop) + decoder_output += MicroLimmOpDisassembly.subst(iop) + exec_output += MicroLimmOpExecute.subst(iop) iop = InstObjParams("lfpimm", "Lfpimm", 'X86MicroopBase', {"code" : "FpDestReg.uqw = imm"}) diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index ccfcb3a69..e2a51c127 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -51,6 +51,8 @@ def template MicroRegOpExecute {{ %(op_decl)s; %(op_rd)s; + IntReg result M5_VAR_USED; + if(%(cond_check)s) { %(code)s; @@ -79,6 +81,8 @@ def template MicroRegOpImmExecute {{ %(op_decl)s; %(op_rd)s; + IntReg result M5_VAR_USED; + if(%(cond_check)s) { %(code)s; @@ -224,8 +228,8 @@ let {{ MicroRegOpExecute) class RegOpMeta(type): - def buildCppClasses(self, name, Name, suffix, \ - code, flag_code, cond_check, else_code, cond_control_flag_init): + def buildCppClasses(self, name, Name, suffix, code, big_code, \ + flag_code, cond_check, else_code, cond_control_flag_init): # Globals to stick the output in global header_output @@ -235,11 +239,13 @@ let {{ # Stick all the code together so it can be searched at once allCode = "|".join((code, flag_code, cond_check, else_code, cond_control_flag_init)) + allBigCode = "|".join((big_code, flag_code, cond_check, else_code, + cond_control_flag_init)) # If op2 is used anywhere, make register and immediate versions # of this code. matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?") - match = matcher.search(allCode) + match = matcher.search(allCode + allBigCode) if match: typeQual = "" if match.group("typeQual"): @@ -247,6 +253,7 @@ let {{ src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual) self.buildCppClasses(name, Name, suffix, matcher.sub(src2_name, code), + matcher.sub(src2_name, big_code), matcher.sub(src2_name, flag_code), matcher.sub(src2_name, cond_check), matcher.sub(src2_name, else_code), @@ -254,6 +261,7 @@ let {{ imm_name = "%simm8" % match.group("prefix") self.buildCppClasses(name + "i", Name, suffix + "Imm", matcher.sub(imm_name, code), + matcher.sub(imm_name, big_code), matcher.sub(imm_name, flag_code), matcher.sub(imm_name, cond_check), matcher.sub(imm_name, else_code), @@ -264,27 +272,32 @@ let {{ # a version without it and fix up this version to use it. if flag_code != "" or cond_check != "true": self.buildCppClasses(name, Name, suffix, - code, "", "true", else_code, "") + code, big_code, "", "true", else_code, "") suffix = "Flags" + suffix # If psrc1 or psrc2 is used, we need to actually insert code to # compute it. - matcher = re.compile("(?<!\w)psrc1(?!\w)") - if matcher.search(allCode): - code = "uint64_t psrc1 = pick(SrcReg1, 0, dataSize);" + code - matcher = re.compile("(?<!\w)psrc2(?!\w)") - if matcher.search(allCode): - code = "uint64_t psrc2 = pick(SrcReg2, 1, dataSize);" + code - # Also make available versions which do sign extension - matcher = re.compile("(?<!\w)spsrc1(?!\w)") - if matcher.search(allCode): - code = "int64_t spsrc1 = signedPick(SrcReg1, 0, dataSize);" + code - matcher = re.compile("(?<!\w)spsrc2(?!\w)") - if matcher.search(allCode): - code = "int64_t spsrc2 = signedPick(SrcReg2, 1, dataSize);" + code - matcher = re.compile("(?<!\w)simm8(?!\w)") - if matcher.search(allCode): - code = "int8_t simm8 = imm8;" + code + for (big, all) in ((False, allCode), (True, allBigCode)): + prefix = "" + for (rex, decl) in ( + ("(?<!\w)psrc1(?!\w)", + "uint64_t psrc1 = pick(SrcReg1, 0, dataSize);"), + ("(?<!\w)psrc2(?!\w)", + "uint64_t psrc2 = pick(SrcReg2, 1, dataSize);"), + ("(?<!\w)spsrc1(?!\w)", + "int64_t spsrc1 = signedPick(SrcReg1, 0, dataSize);"), + ("(?<!\w)spsrc2(?!\w)", + "int64_t spsrc2 = signedPick(SrcReg2, 1, dataSize);"), + ("(?<!\w)simm8(?!\w)", + "int8_t simm8 = imm8;")): + matcher = re.compile(rex) + if matcher.search(all): + prefix += decl + "\n" + if big: + if big_code != "": + big_code = prefix + big_code + else: + code = prefix + code base = "X86ISA::RegOp" @@ -297,17 +310,26 @@ let {{ templates = immTemplates # Get everything ready for the substitution - iop = InstObjParams(name, Name + suffix, base, + iops = [InstObjParams(name, Name + suffix, base, {"code" : code, "flag_code" : flag_code, "cond_check" : cond_check, "else_code" : else_code, - "cond_control_flag_init": cond_control_flag_init}) + "cond_control_flag_init" : cond_control_flag_init})] + if big_code != "": + iops += [InstObjParams(name, Name + suffix + "Big", base, + {"code" : big_code, + "flag_code" : flag_code, + "cond_check" : cond_check, + "else_code" : else_code, + "cond_control_flag_init" : + cond_control_flag_init})] # Generate the actual code (finally!) - header_output += templates[0].subst(iop) - decoder_output += templates[1].subst(iop) - exec_output += templates[2].subst(iop) + for iop in iops: + header_output += templates[0].subst(iop) + decoder_output += templates[1].subst(iop) + exec_output += templates[2].subst(iop) def __new__(mcls, Name, bases, dict): @@ -322,14 +344,16 @@ let {{ cls.className = Name cls.base_mnemonic = name code = cls.code + big_code = cls.big_code flag_code = cls.flag_code cond_check = cls.cond_check else_code = cls.else_code cond_control_flag_init = cls.cond_control_flag_init # Set up the C++ classes - mcls.buildCppClasses(cls, name, Name, "", code, flag_code, - cond_check, else_code, cond_control_flag_init) + mcls.buildCppClasses(cls, name, Name, "", code, big_code, + flag_code, cond_check, else_code, + cond_control_flag_init) # Hook into the microassembler dict global microopClasses @@ -352,6 +376,7 @@ let {{ abstract = True # Default template parameter values + big_code = "" flag_code = "" cond_check = "true" else_code = ";" @@ -372,26 +397,48 @@ let {{ self.className += "Flags" def getAllocator(self, microFlags): - className = self.className - if self.mnemonic == self.base_mnemonic + 'i': - className += "Imm" - allocator = '''new %(class_name)s(machInst, macrocodeBlock, - %(flags)s, %(src1)s, %(op2)s, %(dest)s, - %(dataSize)s, %(ext)s)''' % { - "class_name" : className, - "flags" : self.microFlagsText(microFlags), - "src1" : self.src1, "op2" : self.op2, - "dest" : self.dest, - "dataSize" : self.dataSize, - "ext" : self.ext} - return allocator + if self.big_code != "": + className = self.className + if self.mnemonic == self.base_mnemonic + 'i': + className += "Imm" + allocString = ''' + (%(dataSize)s >= 4) ? + (StaticInstPtr)(new %(class_name)sBig(machInst, + macrocodeBlock, %(flags)s, %(src1)s, %(op2)s, + %(dest)s, %(dataSize)s, %(ext)s)) : + (StaticInstPtr)(new %(class_name)s(machInst, + macrocodeBlock, %(flags)s, %(src1)s, %(op2)s, + %(dest)s, %(dataSize)s, %(ext)s)) + ''' + allocator = allocString % { + "class_name" : className, + "flags" : self.microFlagsText(microFlags), + "src1" : self.src1, "op2" : self.op2, + "dest" : self.dest, + "dataSize" : self.dataSize, + "ext" : self.ext} + return allocator + else: + className = self.className + if self.mnemonic == self.base_mnemonic + 'i': + className += "Imm" + allocator = '''new %(class_name)s(machInst, macrocodeBlock, + %(flags)s, %(src1)s, %(op2)s, %(dest)s, + %(dataSize)s, %(ext)s)''' % { + "class_name" : className, + "flags" : self.microFlagsText(microFlags), + "src1" : self.src1, "op2" : self.op2, + "dest" : self.dest, + "dataSize" : self.dataSize, + "ext" : self.ext} + return allocator class LogicRegOp(RegOp): abstract = True flag_code = ''' //Don't have genFlags handle the OF or CF bits uint64_t mask = CFBit | ECFBit | OFBit; - ccFlagBits = genFlags(ccFlagBits, ext & ~mask, DestReg, psrc1, op2); + ccFlagBits = genFlags(ccFlagBits, ext & ~mask, result, psrc1, op2); //If a logic microop wants to set these, it wants to set them to 0. ccFlagBits &= ~(CFBit & ext); ccFlagBits &= ~(ECFBit & ext); @@ -401,12 +448,12 @@ let {{ class FlagRegOp(RegOp): abstract = True flag_code = \ - "ccFlagBits = genFlags(ccFlagBits, ext, DestReg, psrc1, op2);" + "ccFlagBits = genFlags(ccFlagBits, ext, result, psrc1, op2);" class SubRegOp(RegOp): abstract = True flag_code = \ - "ccFlagBits = genFlags(ccFlagBits, ext, DestReg, psrc1, ~op2, true);" + "ccFlagBits = genFlags(ccFlagBits, ext, result, psrc1, ~op2, true);" class CondRegOp(RegOp): abstract = True @@ -428,31 +475,44 @@ let {{ src1, src2, flags, dataSize) class Add(FlagRegOp): - code = 'DestReg = merge(DestReg, psrc1 + op2, dataSize);' + code = 'DestReg = merge(DestReg, result = (psrc1 + op2), dataSize);' + big_code = 'DestReg = result = (psrc1 + op2) & mask(dataSize * 8);' class Or(LogicRegOp): - code = 'DestReg = merge(DestReg, psrc1 | op2, dataSize);' + code = 'DestReg = merge(DestReg, result = (psrc1 | op2), dataSize);' + big_code = 'DestReg = result = (psrc1 | op2) & mask(dataSize * 8);' class Adc(FlagRegOp): code = ''' CCFlagBits flags = ccFlagBits; - DestReg = merge(DestReg, psrc1 + op2 + flags.cf, dataSize); + DestReg = merge(DestReg, result = (psrc1 + op2 + flags.cf), dataSize); + ''' + big_code = ''' + CCFlagBits flags = ccFlagBits; + DestReg = result = (psrc1 + op2 + flags.cf) & mask(dataSize * 8); ''' class Sbb(SubRegOp): code = ''' CCFlagBits flags = ccFlagBits; - DestReg = merge(DestReg, psrc1 - op2 - flags.cf, dataSize); + DestReg = merge(DestReg, result = (psrc1 - op2 - flags.cf), dataSize); + ''' + big_code = ''' + CCFlagBits flags = ccFlagBits; + DestReg = result = (psrc1 - op2 - flags.cf) & mask(dataSize * 8); ''' class And(LogicRegOp): - code = 'DestReg = merge(DestReg, psrc1 & op2, dataSize)' + code = 'DestReg = merge(DestReg, result = (psrc1 & op2), dataSize)' + big_code = 'DestReg = result = (psrc1 & op2) & mask(dataSize * 8)' class Sub(SubRegOp): - code = 'DestReg = merge(DestReg, psrc1 - op2, dataSize)' + code = 'DestReg = merge(DestReg, result = (psrc1 - op2), dataSize)' + big_code = 'DestReg = result = (psrc1 - op2) & mask(dataSize * 8)' class Xor(LogicRegOp): - code = 'DestReg = merge(DestReg, psrc1 ^ op2, dataSize)' + code = 'DestReg = merge(DestReg, result = (psrc1 ^ op2), dataSize)' + big_code = 'DestReg = result = (psrc1 ^ op2) & mask(dataSize * 8)' class Mul1s(WrRegOp): code = ''' @@ -505,6 +565,7 @@ let {{ class Mulel(RdRegOp): code = 'DestReg = merge(SrcReg1, ProdLow, dataSize);' + big_code = 'DestReg = ProdLow & mask(dataSize * 8);' class Muleh(RdRegOp): def __init__(self, dest, src1=None, flags=None, dataSize="env.dataSize"): @@ -513,6 +574,7 @@ let {{ super(RdRegOp, self).__init__(dest, src1, \ "InstRegIndex(NUM_INTREGS)", flags, dataSize) code = 'DestReg = merge(SrcReg1, ProdHi, dataSize);' + big_code = 'DestReg = ProdHi & mask(dataSize * 8);' # One or two bit divide class Div1(WrRegOp): @@ -540,7 +602,7 @@ let {{ # Step divide class Div2(RegOp): - code = ''' + divCode = ''' uint64_t dividend = Remainder; uint64_t divisor = Divisor; uint64_t quotient = Quotient; @@ -587,11 +649,13 @@ let {{ } } //Keep track of how many bits there are still to pull in. - DestReg = merge(DestReg, remaining, dataSize); + %s //Record the final results Remainder = remainder; Quotient = quotient; ''' + code = divCode % "DestReg = merge(DestReg, remaining, dataSize);" + big_code = divCode % "DestReg = remaining & mask(dataSize * 8);" flag_code = ''' if (remaining == 0) ccFlagBits = ccFlagBits | (ext & EZFBit); @@ -601,9 +665,11 @@ let {{ class Divq(RdRegOp): code = 'DestReg = merge(SrcReg1, Quotient, dataSize);' + big_code = 'DestReg = Quotient & mask(dataSize * 8);' class Divr(RdRegOp): code = 'DestReg = merge(SrcReg1, Remainder, dataSize);' + big_code = 'DestReg = Remainder & mask(dataSize * 8);' class Mov(CondRegOp): code = 'DestReg = merge(SrcReg1, op2, dataSize)' @@ -616,6 +682,10 @@ let {{ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); DestReg = merge(DestReg, psrc1 << shiftAmt, dataSize); ''' + big_code = ''' + uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); + DestReg = (psrc1 << shiftAmt) & mask(dataSize * 8); + ''' flag_code = ''' // If the shift amount is zero, no flags should be modified. if (shiftAmt) { @@ -641,14 +711,19 @@ let {{ ''' class Srl(RegOp): + # Because what happens to the bits shift -in- on a right shift + # is not defined in the C/C++ standard, we have to mask them out + # to be sure they're zero. code = ''' uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); - // Because what happens to the bits shift -in- on a right shift - // is not defined in the C/C++ standard, we have to mask them out - // to be sure they're zero. uint64_t logicalMask = mask(dataSize * 8 - shiftAmt); DestReg = merge(DestReg, (psrc1 >> shiftAmt) & logicalMask, dataSize); ''' + big_code = ''' + uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); + uint64_t logicalMask = mask(dataSize * 8 - shiftAmt); + DestReg = (psrc1 >> shiftAmt) & logicalMask; + ''' flag_code = ''' // If the shift amount is zero, no flags should be modified. if (shiftAmt) { @@ -671,15 +746,21 @@ let {{ ''' class Sra(RegOp): + # Because what happens to the bits shift -in- on a right shift + # is not defined in the C/C++ standard, we have to sign extend + # them manually to be sure. code = ''' uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); - // Because what happens to the bits shift -in- on a right shift - // is not defined in the C/C++ standard, we have to sign extend - // them manually to be sure. uint64_t arithMask = (shiftAmt == 0) ? 0 : -bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt); DestReg = merge(DestReg, (psrc1 >> shiftAmt) | arithMask, dataSize); ''' + big_code = ''' + uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); + uint64_t arithMask = (shiftAmt == 0) ? 0 : + -bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt); + DestReg = ((psrc1 >> shiftAmt) | arithMask) & mask(dataSize * 8); + ''' flag_code = ''' // If the shift amount is zero, no flags should be modified. if (shiftAmt) { @@ -704,13 +785,11 @@ let {{ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t realShiftAmt = shiftAmt % (dataSize * 8); - if(realShiftAmt) - { + if (realShiftAmt) { uint64_t top = psrc1 << (dataSize * 8 - realShiftAmt); uint64_t bottom = bits(psrc1, dataSize * 8, realShiftAmt); DestReg = merge(DestReg, top | bottom, dataSize); - } - else + } else DestReg = merge(DestReg, DestReg, dataSize); ''' flag_code = ''' @@ -739,16 +818,14 @@ let {{ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1); - if(realShiftAmt) - { + if (realShiftAmt) { CCFlagBits flags = ccFlagBits; uint64_t top = flags.cf << (dataSize * 8 - realShiftAmt); if (realShiftAmt > 1) top |= psrc1 << (dataSize * 8 - realShiftAmt + 1); uint64_t bottom = bits(psrc1, dataSize * 8 - 1, realShiftAmt); DestReg = merge(DestReg, top | bottom, dataSize); - } - else + } else DestReg = merge(DestReg, DestReg, dataSize); ''' flag_code = ''' @@ -780,14 +857,12 @@ let {{ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t realShiftAmt = shiftAmt % (dataSize * 8); - if(realShiftAmt) - { + if (realShiftAmt) { uint64_t top = psrc1 << realShiftAmt; uint64_t bottom = bits(psrc1, dataSize * 8 - 1, dataSize * 8 - realShiftAmt); DestReg = merge(DestReg, top | bottom, dataSize); - } - else + } else DestReg = merge(DestReg, DestReg, dataSize); ''' flag_code = ''' @@ -816,8 +891,7 @@ let {{ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1); - if(realShiftAmt) - { + if (realShiftAmt) { CCFlagBits flags = ccFlagBits; uint64_t top = psrc1 << realShiftAmt; uint64_t bottom = flags.cf << (realShiftAmt - 1); @@ -826,8 +900,7 @@ let {{ bits(psrc1, dataSize * 8 - 1, dataSize * 8 - realShiftAmt + 1); DestReg = merge(DestReg, top | bottom, dataSize); - } - else + } else DestReg = merge(DestReg, DestReg, dataSize); ''' flag_code = ''' @@ -853,10 +926,10 @@ let {{ ''' class Sld(RegOp): - code = ''' + sldCode = ''' uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t dataBits = dataSize * 8; - uint8_t realShiftAmt = shiftAmt % (2 * dataBits); + uint8_t realShiftAmt = shiftAmt %% (2 * dataBits); uint64_t result; if (realShiftAmt == 0) { result = psrc1; @@ -867,8 +940,10 @@ let {{ result = (DoubleBits << (realShiftAmt - dataBits)) | (psrc1 >> (2 * dataBits - realShiftAmt)); } - DestReg = merge(DestReg, result, dataSize); + %s ''' + code = sldCode % "DestReg = merge(DestReg, result, dataSize);" + big_code = sldCode % "DestReg = result & mask(dataSize * 8);" flag_code = ''' // If the shift amount is zero, no flags should be modified. if (shiftAmt) { @@ -899,10 +974,10 @@ let {{ ''' class Srd(RegOp): - code = ''' + srdCode = ''' uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t dataBits = dataSize * 8; - uint8_t realShiftAmt = shiftAmt % (2 * dataBits); + uint8_t realShiftAmt = shiftAmt %% (2 * dataBits); uint64_t result; if (realShiftAmt == 0) { result = psrc1; @@ -919,8 +994,10 @@ let {{ logicalMask) | (psrc1 << (2 * dataBits - realShiftAmt)); } - DestReg = merge(DestReg, result, dataSize); + %s ''' + code = srdCode % "DestReg = merge(DestReg, result, dataSize);" + big_code = srdCode % "DestReg = result & mask(dataSize * 8);" flag_code = ''' // If the shift amount is zero, no flags should be modified. if (shiftAmt) { @@ -986,6 +1063,12 @@ let {{ ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) : (ccFlagBits & ~EZFBit); ''' + big_code = ''' + int flag = bits(ccFlagBits, imm8); + DestReg = flag & mask(dataSize * 8); + ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) : + (ccFlagBits & ~EZFBit); + ''' def __init__(self, dest, imm, flags=None, \ dataSize="env.dataSize"): super(Ruflag, self).__init__(dest, \ @@ -1000,6 +1083,14 @@ let {{ ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) : (ccFlagBits & ~EZFBit); ''' + big_code = ''' + MiscReg flagMask = 0x3F7FDD5; + MiscReg flags = (nccFlagBits | ccFlagBits) & flagMask; + int flag = bits(flags, imm8); + DestReg = flag & mask(dataSize * 8); + ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) : + (ccFlagBits & ~EZFBit); + ''' def __init__(self, dest, imm, flags=None, \ dataSize="env.dataSize"): super(Rflag, self).__init__(dest, \ @@ -1015,6 +1106,15 @@ let {{ val = sign_bit ? (val | ~maskVal) : (val & maskVal); DestReg = merge(DestReg, val, dataSize); ''' + big_code = ''' + IntReg val = psrc1; + // Mask the bit position so that it wraps. + int bitPos = op2 & (dataSize * 8 - 1); + int sign_bit = bits(val, bitPos, bitPos); + uint64_t maskVal = mask(bitPos+1); + val = sign_bit ? (val | ~maskVal) : (val & maskVal); + DestReg = val & mask(dataSize * 8); + ''' flag_code = ''' if (!sign_bit) ccFlagBits = ccFlagBits & @@ -1026,12 +1126,13 @@ let {{ class Zext(RegOp): code = 'DestReg = merge(DestReg, bits(psrc1, op2, 0), dataSize);' + big_code = 'DestReg = bits(psrc1, op2, 0) & mask(dataSize * 8);' class Rddr(RegOp): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): super(Rddr, self).__init__(dest, \ src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize) - code = ''' + rdrCode = ''' CR4 cr4 = CR4Op; DR7 dr7 = DR7Op; if ((cr4.de == 1 && (src1 == 4 || src1 == 5)) || src1 >= 8) { @@ -1039,9 +1140,11 @@ let {{ } else if (dr7.gd) { fault = new DebugException(); } else { - DestReg = merge(DestReg, DebugSrc1, dataSize); + %s } ''' + code = rdrCode % "DestReg = merge(DestReg, DebugSrc1, dataSize);" + big_code = rdrCode % "DestReg = DebugSrc1 & mask(dataSize * 8);" class Wrdr(RegOp): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): @@ -1066,13 +1169,15 @@ let {{ def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): super(Rdcr, self).__init__(dest, \ src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize) - code = ''' + rdcrCode = ''' if (src1 == 1 || (src1 > 4 && src1 < 8) || (src1 > 8)) { fault = new InvalidOpcode(); } else { - DestReg = merge(DestReg, ControlSrc1, dataSize); + %s } ''' + code = rdcrCode % "DestReg = merge(DestReg, ControlSrc1, dataSize);" + big_code = rdcrCode % "DestReg = ControlSrc1 & mask(dataSize * 8);" class Wrcr(RegOp): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): @@ -1154,24 +1259,20 @@ let {{ ''' class Rdbase(SegOp): - code = ''' - DestReg = merge(DestReg, SegBaseSrc1, dataSize); - ''' + code = 'DestReg = merge(DestReg, SegBaseSrc1, dataSize);' + big_code = 'DestReg = SegBaseSrc1 & mask(dataSize * 8);' class Rdlimit(SegOp): - code = ''' - DestReg = merge(DestReg, SegLimitSrc1, dataSize); - ''' + code = 'DestReg = merge(DestReg, SegLimitSrc1, dataSize);' + big_code = 'DestReg = SegLimitSrc1 & mask(dataSize * 8);' class RdAttr(SegOp): - code = ''' - DestReg = merge(DestReg, SegAttrSrc1, dataSize); - ''' + code = 'DestReg = merge(DestReg, SegAttrSrc1, dataSize);' + big_code = 'DestReg = SegAttrSrc1 & mask(dataSize * 8);' class Rdsel(SegOp): - code = ''' - DestReg = merge(DestReg, SegSelSrc1, dataSize); - ''' + code = 'DestReg = merge(DestReg, SegSelSrc1, dataSize);' + big_code = 'DestReg = SegSelSrc1 & mask(dataSize * 8);' class Rdval(RegOp): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): diff --git a/src/arch/x86/microcode_rom.hh b/src/arch/x86/microcode_rom.hh index f8ad410ce..84c503bb9 100644 --- a/src/arch/x86/microcode_rom.hh +++ b/src/arch/x86/microcode_rom.hh @@ -32,6 +32,7 @@ #define __ARCH_X86_MICROCODE_ROM_HH__ #include "arch/x86/emulenv.hh" +#include "arch/x86/insts/badmicroop.hh" #include "cpu/static_inst.hh" namespace X86ISAInst @@ -60,8 +61,10 @@ namespace X86ISAInst fetchMicroop(MicroPC microPC, StaticInstPtr curMacroop) { microPC = normalMicroPC(microPC); - assert(microPC < numMicroops); - return genFuncs[microPC](curMacroop); + if (microPC >= numMicroops) + return X86ISA::badMicroop; + else + return genFuncs[microPC](curMacroop); } }; } diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh index c06ec18bc..5c67e28e1 100644 --- a/src/arch/x86/predecoder.hh +++ b/src/arch/x86/predecoder.hh @@ -225,7 +225,11 @@ namespace X86ISA { assert(emiIsReady); emiIsReady = false; - nextPC.npc(nextPC.pc() + getInstSize()); + if (!nextPC.size()) { + Addr size = getInstSize(); + nextPC.size(size); + nextPC.npc(nextPC.pc() + size); + } return emi; } }; diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh index 5a208446a..4641141d3 100644 --- a/src/arch/x86/types.hh +++ b/src/arch/x86/types.hh @@ -222,7 +222,61 @@ namespace X86ISA return true; } - typedef GenericISA::UPCState<MachInst> PCState; + class PCState : public GenericISA::UPCState<MachInst> + { + protected: + typedef GenericISA::UPCState<MachInst> Base; + + uint8_t _size; + + public: + void + set(Addr val) + { + Base::set(val); + _size = 0; + } + + PCState() {} + PCState(Addr val) { set(val); } + + uint8_t size() const { return _size; } + void size(uint8_t newSize) { _size = newSize; } + + bool + branching() const + { + return this->npc() != this->pc() + size(); + } + + void + advance() + { + Base::advance(); + _size = 0; + } + + void + uEnd() + { + Base::uEnd(); + _size = 0; + } + + void + serialize(std::ostream &os) + { + Base::serialize(os); + SERIALIZE_SCALAR(_size); + } + + void + unserialize(Checkpoint *cp, const std::string §ion) + { + Base::unserialize(cp, section); + UNSERIALIZE_SCALAR(_size); + } + }; struct CoreSpecific { int core_type; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 2e4e4819e..d2cde496e 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -1070,6 +1070,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) Addr pcOffset = fetchOffset[tid]; Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; + bool inRom = isRomMicroPC(thisPC.microPC()); + // If returning from the delay of a cache miss, then update the status // to running, otherwise do the cache access. Possibly move this up // to tick() function. @@ -1083,7 +1085,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) Addr block_PC = icacheBlockAlignPC(fetchAddr); // Unless buffer already got the block, fetch it from icache. - if (!cacheDataValid[tid] || block_PC != cacheDataPC[tid]) { + if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid]) && !inRom) { DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " "instruction, starting at PC %s.\n", tid, thisPC); @@ -1155,7 +1157,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) !predictedBranch) { // If we need to process more memory, do it now. - if (!curMacroop && !predecoder.extMachInstReady()) { + if (!(curMacroop || inRom) && !predecoder.extMachInstReady()) { if (ISA_HAS_DELAY_SLOT && pcOffset == 0) { // Walk past any annulled delay slot instructions. Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask; @@ -1181,7 +1183,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) // Extract as many instructions and/or microops as we can from // the memory we've processed so far. do { - if (!curMacroop) { + if (!(curMacroop || inRom)) { if (predecoder.extMachInstReady()) { ExtMachInst extMachInst; @@ -1202,8 +1204,13 @@ DefaultFetch<Impl>::fetch(bool &status_change) break; } } - if (curMacroop) { - staticInst = curMacroop->fetchMicroop(thisPC.microPC()); + if (curMacroop || inRom) { + if (inRom) { + staticInst = cpu->microcodeRom.fetchMicroop( + thisPC.microPC(), curMacroop); + } else { + staticInst = curMacroop->fetchMicroop(thisPC.microPC()); + } if (staticInst->isLastMicroop()) { curMacroop = NULL; pcOffset = 0; diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index d6da4b818..aa21a0edc 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -749,7 +749,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() DynInstPtr deferred_mem_inst; int total_deferred_mem_issued = 0; while (total_deferred_mem_issued < totalWidth && - (deferred_mem_inst = getDeferredMemInstToExecute()) != NULL) { + (deferred_mem_inst = getDeferredMemInstToExecute()) != 0) { issueToExecuteQueue->access(0)->size++; instsToExecute.push_back(deferred_mem_inst); total_deferred_mem_issued++; diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm index 8744a7122..4442cee41 100644 --- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm @@ -287,20 +287,21 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") if (in_msg.Type == CacheRequestType:IFETCH) { // ** INSTRUCTION ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); - if (is_valid(L1Dcache_entry)) { - // The block is in the wrong L1, put the request on the queue to the shared L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, - L1Dcache_entry, L1_TBEs[in_msg.LineAddress]); - } - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); if (is_valid(L1Icache_entry)) { // The tag matches for the L1, so the L1 asks the L2 for it. trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Icache_entry, L1_TBEs[in_msg.LineAddress]); } else { + + // Check to see if it is in the OTHER L1 + Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); + if (is_valid(L1Dcache_entry)) { + // The block is in the wrong L1, put the request on the queue to the shared L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, + L1Dcache_entry, L1_TBEs[in_msg.LineAddress]); + } + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, @@ -313,21 +314,23 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") } } } else { - // *** DATA ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); - if (is_valid(L1Icache_entry)) { - // The block is in the wrong L1, put the request on the queue to the shared L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, - L1Icache_entry, L1_TBEs[in_msg.LineAddress]); - } + // *** DATA ACCESS *** Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); if (is_valid(L1Dcache_entry)) { // The tag matches for the L1, so the L1 ask the L2 for it trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Dcache_entry, L1_TBEs[in_msg.LineAddress]); } else { + + // Check to see if it is in the OTHER L1 + Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); + if (is_valid(L1Icache_entry)) { + // The block is in the wrong L1, put the request on the queue to the shared L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, + L1Icache_entry, L1_TBEs[in_msg.LineAddress]); + } + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm index 4082f23c9..7f0ab62a8 100644 --- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm @@ -338,14 +338,6 @@ machine(L1Cache, "Directory protocol") if (in_msg.Type == CacheRequestType:IFETCH) { // ** INSTRUCTION ACCESS *** - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); - // Check to see if it is in the OTHER L1 - if (is_valid(L1Dcache_entry)) { - // The block is in the wrong L1, put the request on the queue to the shared L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, L1Dcache_entry, - TBEs[in_msg.LineAddress]); - } - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); if (is_valid(L1Icache_entry)) { // The tag matches for the L1, so the L1 asks the L2 for it. @@ -353,6 +345,14 @@ machine(L1Cache, "Directory protocol") in_msg.LineAddress, L1Icache_entry, TBEs[in_msg.LineAddress]); } else { + + Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); + // Check to see if it is in the OTHER L1 + if (is_valid(L1Dcache_entry)) { + // The block is in the wrong L1, put the request on the queue to the shared L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, L1Dcache_entry, + TBEs[in_msg.LineAddress]); + } if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it trigger(mandatory_request_type_to_event(in_msg.Type), @@ -369,14 +369,6 @@ machine(L1Cache, "Directory protocol") } else { // *** DATA ACCESS *** - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); - // Check to see if it is in the OTHER L1 - if (is_valid(L1Icache_entry)) { - // The block is in the wrong L1, put the request on the queue to the shared L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, - L1Icache_entry, TBEs[in_msg.LineAddress]); - } - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); if (is_valid(L1Dcache_entry)) { // The tag matches for the L1, so the L1 ask the L2 for it @@ -384,6 +376,14 @@ machine(L1Cache, "Directory protocol") in_msg.LineAddress, L1Dcache_entry, TBEs[in_msg.LineAddress]); } else { + + Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); + // Check to see if it is in the OTHER L1 + if (is_valid(L1Icache_entry)) { + // The block is in the wrong L1, put the request on the queue to the shared L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, + L1Icache_entry, TBEs[in_msg.LineAddress]); + } if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it trigger(mandatory_request_type_to_event(in_msg.Type), diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm index 00e9404c9..226f21374 100644 --- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm @@ -647,20 +647,21 @@ machine(L1Cache, "Token protocol") if (in_msg.Type == CacheRequestType:IFETCH) { // ** INSTRUCTION ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); - if (is_valid(L1Dcache_entry)) { - // The block is in the wrong L1, try to write it to the L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, - L1Dcache_entry, tbe); - } - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); if (is_valid(L1Icache_entry)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Icache_entry, tbe); } else { + + // Check to see if it is in the OTHER L1 + Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); + if (is_valid(L1Dcache_entry)) { + // The block is in the wrong L1, try to write it to the L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, + L1Dcache_entry, tbe); + } + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 trigger(mandatory_request_type_to_event(in_msg.Type), @@ -676,21 +677,21 @@ machine(L1Cache, "Token protocol") } else { // *** DATA ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); - - if (is_valid(L1Icache_entry)) { - // The block is in the wrong L1, try to write it to the L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, - L1Icache_entry, tbe); - } - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); if (is_valid(L1Dcache_entry)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Dcache_entry, tbe); } else { + + // Check to see if it is in the OTHER L1 + Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); + if (is_valid(L1Icache_entry)) { + // The block is in the wrong L1, try to write it to the L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, + L1Icache_entry, tbe); + } + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 trigger(mandatory_request_type_to_event(in_msg.Type), diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index 78bc9e3e7..ab2a6acf4 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -377,26 +377,26 @@ machine(L1Cache, "AMD Hammer-like protocol") if (in_msg.Type == CacheRequestType:IFETCH) { // ** INSTRUCTION ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); - if (is_valid(L1Dcache_entry)) { - // The block is in the wrong L1, try to write it to the L2 - if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { - trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe); - } else { - trigger(Event:L2_Replacement, - L2cacheMemory.cacheProbe(in_msg.LineAddress), - getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)), - TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]); - } - } - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); if (is_valid(L1Icache_entry)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Icache_entry, tbe); } else { + // Check to see if it is in the OTHER L1 + Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); + if (is_valid(L1Dcache_entry)) { + // The block is in the wrong L1, try to write it to the L2 + if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { + trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe); + } else { + trigger(Event:L2_Replacement, + L2cacheMemory.cacheProbe(in_msg.LineAddress), + getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)), + TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]); + } + } + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 @@ -430,26 +430,27 @@ machine(L1Cache, "AMD Hammer-like protocol") } else { // *** DATA ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); - if (is_valid(L1Icache_entry)) { - // The block is in the wrong L1, try to write it to the L2 - if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { - trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe); - } else { - trigger(Event:L2_Replacement, - L2cacheMemory.cacheProbe(in_msg.LineAddress), - getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)), - TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]); - } - } - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); if (is_valid(L1Dcache_entry)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Dcache_entry, tbe); } else { + + // Check to see if it is in the OTHER L1 + Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); + if (is_valid(L1Icache_entry)) { + // The block is in the wrong L1, try to write it to the L2 + if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { + trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe); + } else { + trigger(Event:L2_Replacement, + L2cacheMemory.cacheProbe(in_msg.LineAddress), + getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)), + TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]); + } + } + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 Entry L2cache_entry := getL2CacheEntry(in_msg.LineAddress); diff --git a/src/mem/ruby/buffers/MessageBuffer.cc b/src/mem/ruby/buffers/MessageBuffer.cc index f6b79c580..225595005 100644 --- a/src/mem/ruby/buffers/MessageBuffer.cc +++ b/src/mem/ruby/buffers/MessageBuffer.cc @@ -58,6 +58,8 @@ MessageBuffer::MessageBuffer(const string &name) m_name = name; m_stall_msg_map.clear(); + m_input_link_id = 0; + m_vnet_id = 0; } int @@ -228,6 +230,7 @@ MessageBuffer::enqueue(MsgPtr message, Time delta) // Schedule the wakeup if (m_consumer_ptr != NULL) { g_eventQueue_ptr->scheduleEventAbsolute(m_consumer_ptr, arrival_time); + m_consumer_ptr->storeEventInfo(m_vnet_id); } else { panic("No consumer: %s name: %s\n", *this, m_name); } diff --git a/src/mem/ruby/buffers/MessageBuffer.hh b/src/mem/ruby/buffers/MessageBuffer.hh index 62cc65670..88df5b788 100644 --- a/src/mem/ruby/buffers/MessageBuffer.hh +++ b/src/mem/ruby/buffers/MessageBuffer.hh @@ -142,6 +142,9 @@ class MessageBuffer void printStats(std::ostream& out); void clearStats() { m_not_avail_count = 0; m_msg_counter = 0; } + void setIncomingLink(int link_id) { m_input_link_id = link_id; } + void setVnet(int net) { m_vnet_id = net; } + private: //added by SS int m_recycle_latency; @@ -184,6 +187,9 @@ class MessageBuffer bool m_ordering_set; bool m_randomization; Time m_last_arrival_time; + + int m_input_link_id; + int m_vnet_id; }; inline std::ostream& diff --git a/src/mem/ruby/common/Consumer.hh b/src/mem/ruby/common/Consumer.hh index c1f8bc42e..a119abb39 100644 --- a/src/mem/ruby/common/Consumer.hh +++ b/src/mem/ruby/common/Consumer.hh @@ -67,6 +67,7 @@ class Consumer virtual void wakeup() = 0; virtual void print(std::ostream& out) const = 0; + virtual void storeEventInfo(int info) {} const Time& getLastScheduledWakeup() const diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc index 7229c724f..5c461c63f 100644 --- a/src/mem/ruby/network/simple/PerfectSwitch.cc +++ b/src/mem/ruby/network/simple/PerfectSwitch.cc @@ -54,6 +54,11 @@ PerfectSwitch::PerfectSwitch(SwitchID sid, SimpleNetwork* network_ptr) m_round_robin_start = 0; m_network_ptr = network_ptr; m_wakeups_wo_switch = 0; + + for(int i = 0;i < m_virtual_networks;++i) + { + m_pending_message_count.push_back(0); + } } void @@ -62,12 +67,15 @@ PerfectSwitch::addInPort(const vector<MessageBuffer*>& in) assert(in.size() == m_virtual_networks); NodeID port = m_in.size(); m_in.push_back(in); + for (int j = 0; j < m_virtual_networks; j++) { m_in[port][j]->setConsumer(this); string desc = csprintf("[Queue from port %s %s %s to PerfectSwitch]", NodeIDToString(m_switch_id), NodeIDToString(port), NodeIDToString(j)); m_in[port][j]->setDescription(desc); + m_in[port][j]->setIncomingLink(port); + m_in[port][j]->setVnet(j); } } @@ -154,161 +162,170 @@ PerfectSwitch::wakeup() m_round_robin_start = 0; } - // for all input ports, use round robin scheduling - for (int counter = 0; counter < m_in.size(); counter++) { - // Round robin scheduling - incoming++; - if (incoming >= m_in.size()) { - incoming = 0; - } + if(m_pending_message_count[vnet] > 0) { + // for all input ports, use round robin scheduling + for (int counter = 0; counter < m_in.size(); counter++) { + // Round robin scheduling + incoming++; + if (incoming >= m_in.size()) { + incoming = 0; + } - // temporary vectors to store the routing results - vector<LinkID> output_links; - vector<NetDest> output_link_destinations; - - // Is there a message waiting? - while (m_in[incoming][vnet]->isReady()) { - DPRINTF(RubyNetwork, "incoming: %d\n", incoming); - - // Peek at message - msg_ptr = m_in[incoming][vnet]->peekMsgPtr(); - net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get()); - DPRINTF(RubyNetwork, "Message: %s\n", (*net_msg_ptr)); - - output_links.clear(); - output_link_destinations.clear(); - NetDest msg_dsts = - net_msg_ptr->getInternalDestination(); - - // Unfortunately, the token-protocol sends some - // zero-destination messages, so this assert isn't valid - // assert(msg_dsts.count() > 0); - - assert(m_link_order.size() == m_routing_table.size()); - assert(m_link_order.size() == m_out.size()); - - if (m_network_ptr->getAdaptiveRouting()) { - if (m_network_ptr->isVNetOrdered(vnet)) { - // Don't adaptively route - for (int out = 0; out < m_out.size(); out++) { - m_link_order[out].m_link = out; - m_link_order[out].m_value = 0; - } - } else { - // Find how clogged each link is - for (int out = 0; out < m_out.size(); out++) { - int out_queue_length = 0; - for (int v = 0; v < m_virtual_networks; v++) { - out_queue_length += m_out[out][v]->getSize(); + // temporary vectors to store the routing results + vector<LinkID> output_links; + vector<NetDest> output_link_destinations; + + // Is there a message waiting? + while (m_in[incoming][vnet]->isReady()) { + DPRINTF(RubyNetwork, "incoming: %d\n", incoming); + + // Peek at message + msg_ptr = m_in[incoming][vnet]->peekMsgPtr(); + net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get()); + DPRINTF(RubyNetwork, "Message: %s\n", (*net_msg_ptr)); + + output_links.clear(); + output_link_destinations.clear(); + NetDest msg_dsts = + net_msg_ptr->getInternalDestination(); + + // Unfortunately, the token-protocol sends some + // zero-destination messages, so this assert isn't valid + // assert(msg_dsts.count() > 0); + + assert(m_link_order.size() == m_routing_table.size()); + assert(m_link_order.size() == m_out.size()); + + if (m_network_ptr->getAdaptiveRouting()) { + if (m_network_ptr->isVNetOrdered(vnet)) { + // Don't adaptively route + for (int out = 0; out < m_out.size(); out++) { + m_link_order[out].m_link = out; + m_link_order[out].m_value = 0; + } + } else { + // Find how clogged each link is + for (int out = 0; out < m_out.size(); out++) { + int out_queue_length = 0; + for (int v = 0; v < m_virtual_networks; v++) { + out_queue_length += m_out[out][v]->getSize(); + } + int value = + (out_queue_length << 8) | (random() & 0xff); + m_link_order[out].m_link = out; + m_link_order[out].m_value = value; } - int value = - (out_queue_length << 8) | (random() & 0xff); - m_link_order[out].m_link = out; - m_link_order[out].m_value = value; + + // Look at the most empty link first + sort(m_link_order.begin(), m_link_order.end()); } + } - // Look at the most empty link first - sort(m_link_order.begin(), m_link_order.end()); + for (int i = 0; i < m_routing_table.size(); i++) { + // pick the next link to look at + int link = m_link_order[i].m_link; + NetDest dst = m_routing_table[link]; + DPRINTF(RubyNetwork, "dst: %s\n", dst); + + if (!msg_dsts.intersectionIsNotEmpty(dst)) + continue; + + // Remember what link we're using + output_links.push_back(link); + + // Need to remember which destinations need this + // message in another vector. This Set is the + // intersection of the routing_table entry and the + // current destination set. The intersection must + // not be empty, since we are inside "if" + output_link_destinations.push_back(msg_dsts.AND(dst)); + + // Next, we update the msg_destination not to + // include those nodes that were already handled + // by this link + msg_dsts.removeNetDest(dst); } - } - for (int i = 0; i < m_routing_table.size(); i++) { - // pick the next link to look at - int link = m_link_order[i].m_link; - NetDest dst = m_routing_table[link]; - DPRINTF(RubyNetwork, "dst: %s\n", dst); - - if (!msg_dsts.intersectionIsNotEmpty(dst)) - continue; - - // Remember what link we're using - output_links.push_back(link); - - // Need to remember which destinations need this - // message in another vector. This Set is the - // intersection of the routing_table entry and the - // current destination set. The intersection must - // not be empty, since we are inside "if" - output_link_destinations.push_back(msg_dsts.AND(dst)); - - // Next, we update the msg_destination not to - // include those nodes that were already handled - // by this link - msg_dsts.removeNetDest(dst); - } + assert(msg_dsts.count() == 0); + //assert(output_links.size() > 0); + + // Check for resources - for all outgoing queues + bool enough = true; + for (int i = 0; i < output_links.size(); i++) { + int outgoing = output_links[i]; + if (!m_out[outgoing][vnet]->areNSlotsAvailable(1)) + enough = false; + DPRINTF(RubyNetwork, "Checking if node is blocked\n" + "outgoing: %d, vnet: %d, enough: %d\n", + outgoing, vnet, enough); + } - assert(msg_dsts.count() == 0); - //assert(output_links.size() > 0); - - // Check for resources - for all outgoing queues - bool enough = true; - for (int i = 0; i < output_links.size(); i++) { - int outgoing = output_links[i]; - if (!m_out[outgoing][vnet]->areNSlotsAvailable(1)) - enough = false; - DPRINTF(RubyNetwork, "Checking if node is blocked\n" - "outgoing: %d, vnet: %d, enough: %d\n", - outgoing, vnet, enough); - } + // There were not enough resources + if (!enough) { + g_eventQueue_ptr->scheduleEvent(this, 1); + DPRINTF(RubyNetwork, "Can't deliver message since a node " + "is blocked\n" + "Message: %s\n", (*net_msg_ptr)); + break; // go to next incoming port + } - // There were not enough resources - if (!enough) { - g_eventQueue_ptr->scheduleEvent(this, 1); - DPRINTF(RubyNetwork, "Can't deliver message since a node " - "is blocked\n" - "Message: %s\n", (*net_msg_ptr)); - break; // go to next incoming port - } + MsgPtr unmodified_msg_ptr; - MsgPtr unmodified_msg_ptr; + if (output_links.size() > 1) { + // If we are sending this message down more than + // one link (size>1), we need to make a copy of + // the message so each branch can have a different + // internal destination we need to create an + // unmodified MsgPtr because the MessageBuffer + // enqueue func will modify the message - if (output_links.size() > 1) { - // If we are sending this message down more than - // one link (size>1), we need to make a copy of - // the message so each branch can have a different - // internal destination we need to create an - // unmodified MsgPtr because the MessageBuffer - // enqueue func will modify the message + // This magic line creates a private copy of the + // message + unmodified_msg_ptr = msg_ptr->clone(); + } - // This magic line creates a private copy of the - // message - unmodified_msg_ptr = msg_ptr->clone(); - } + // Enqueue it - for all outgoing queues + for (int i=0; i<output_links.size(); i++) { + int outgoing = output_links[i]; - // Enqueue it - for all outgoing queues - for (int i=0; i<output_links.size(); i++) { - int outgoing = output_links[i]; + if (i > 0) { + // create a private copy of the unmodified + // message + msg_ptr = unmodified_msg_ptr->clone(); + } - if (i > 0) { - // create a private copy of the unmodified - // message - msg_ptr = unmodified_msg_ptr->clone(); - } + // Change the internal destination set of the + // message so it knows which destinations this + // link is responsible for. + net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get()); + net_msg_ptr->getInternalDestination() = + output_link_destinations[i]; - // Change the internal destination set of the - // message so it knows which destinations this - // link is responsible for. - net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get()); - net_msg_ptr->getInternalDestination() = - output_link_destinations[i]; + // Enqeue msg + DPRINTF(RubyNetwork, "Switch: %d enqueuing net msg from " + "inport[%d][%d] to outport [%d][%d] time: %lld.\n", + m_switch_id, incoming, vnet, outgoing, vnet, + g_eventQueue_ptr->getTime()); - // Enqeue msg - DPRINTF(RubyNetwork, "Switch: %d enqueuing net msg from " - "inport[%d][%d] to outport [%d][%d] time: %lld.\n", - m_switch_id, incoming, vnet, outgoing, vnet, - g_eventQueue_ptr->getTime()); + m_out[outgoing][vnet]->enqueue(msg_ptr); + } - m_out[outgoing][vnet]->enqueue(msg_ptr); + // Dequeue msg + m_in[incoming][vnet]->pop(); + m_pending_message_count[vnet]--; } - - // Dequeue msg - m_in[incoming][vnet]->pop(); } } } } void +PerfectSwitch::storeEventInfo(int info) +{ + m_pending_message_count[info]++; +} + +void PerfectSwitch::printStats(std::ostream& out) const { out << "PerfectSwitch printStats" << endl; diff --git a/src/mem/ruby/network/simple/PerfectSwitch.hh b/src/mem/ruby/network/simple/PerfectSwitch.hh index a7e577df0..cd0219fd9 100644 --- a/src/mem/ruby/network/simple/PerfectSwitch.hh +++ b/src/mem/ruby/network/simple/PerfectSwitch.hh @@ -69,6 +69,7 @@ class PerfectSwitch : public Consumer int getOutLinks() const { return m_out.size(); } void wakeup(); + void storeEventInfo(int info); void printStats(std::ostream& out) const; void clearStats(); @@ -92,6 +93,7 @@ class PerfectSwitch : public Consumer int m_round_robin_start; int m_wakeups_wo_switch; SimpleNetwork* m_network_ptr; + std::vector<int> m_pending_message_count; }; inline std::ostream& diff --git a/src/mem/ruby/slicc_interface/Message.hh b/src/mem/ruby/slicc_interface/Message.hh index ff94fdd40..7fcfabe9c 100644 --- a/src/mem/ruby/slicc_interface/Message.hh +++ b/src/mem/ruby/slicc_interface/Message.hh @@ -57,6 +57,8 @@ class Message : public RefCounted virtual Message* clone() const = 0; virtual void print(std::ostream& out) const = 0; + virtual void setIncomingLink(int) {} + virtual void setVnet(int) {} void setDelayedCycles(const int& cycles) { m_DelayedCycles = cycles; } const int& getDelayedCycles() const {return m_DelayedCycles;} diff --git a/src/mem/ruby/slicc_interface/NetworkMessage.hh b/src/mem/ruby/slicc_interface/NetworkMessage.hh index 082481e05..a8f9c625b 100644 --- a/src/mem/ruby/slicc_interface/NetworkMessage.hh +++ b/src/mem/ruby/slicc_interface/NetworkMessage.hh @@ -82,9 +82,16 @@ class NetworkMessage : public Message virtual void print(std::ostream& out) const = 0; + int getIncomingLink() const { return incoming_link; } + void setIncomingLink(int link) { incoming_link = link; } + int getVnet() const { return vnet; } + void setVnet(int net) { vnet = net; } + private: NetDest m_internal_dest; bool m_internal_dest_valid; + int incoming_link; + int vnet; }; inline std::ostream& diff --git a/src/python/m5/main.py b/src/python/m5/main.py index cd139ccb3..23a012166 100644 --- a/src/python/m5/main.py +++ b/src/python/m5/main.py @@ -61,8 +61,6 @@ add_option('-C', "--copyright", action="store_true", default=False, help="Show full copyright information") add_option('-R', "--readme", action="store_true", default=False, help="Show the readme") -add_option('-N', "--release-notes", action="store_true", default=False, - help="Show the release notes") # Options for configuring the base simulator add_option('-d', "--outdir", metavar="DIR", default="m5out", @@ -207,13 +205,6 @@ def main(): print info.README print - if options.release_notes: - done = True - print 'Release Notes:' - print - print info.RELEASE_NOTES - print - if options.trace_help: done = True check_tracing() |