diff options
29 files changed, 2197 insertions, 310 deletions
diff --git a/build_opts/SPARC_SE b/build_opts/SPARC_SE index 62b6841ad..b288d3908 100644 --- a/build_opts/SPARC_SE +++ b/build_opts/SPARC_SE @@ -1,3 +1,3 @@ TARGET_ISA = 'sparc' -CPU_MODELS = 'AtomicSimpleCPU,TimingSimpleCPU' +CPU_MODELS = 'AtomicSimpleCPU,TimingSimpleCPU,O3CPU' FULL_SYSTEM = 0 diff --git a/src/arch/sparc/isa/formats/mem/swap.isa b/src/arch/sparc/isa/formats/mem/swap.isa index 818597a84..b71542a2b 100644 --- a/src/arch/sparc/isa/formats/mem/swap.isa +++ b/src/arch/sparc/isa/formats/mem/swap.isa @@ -137,7 +137,7 @@ def format Swap(code, postacc_code, mem_flags, *opt_flags) {{ decoder_output, exec_output, decode_block) = doMemFormat(code, SwapFuncs, '', name, Name, flags, - opt_flags, postacc_code) + ["IsStoreConditional"], postacc_code) }}; def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{ @@ -148,7 +148,7 @@ def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{ decoder_output, exec_output, decode_block) = doMemFormat(code, SwapFuncs, AlternateASIPrivFaultCheck, - name, Name, flags, opt_flags, postacc_code) + name, Name, flags, ["IsStoreConditional"], postacc_code) }}; @@ -163,8 +163,8 @@ let {{ decode_block = BasicDecode.subst(iop) microParams = {"code": code, "postacc_code" : postacc_code, "ea_code" : addrCalcReg, "fault_check" : faultCode} - exec_output = doSplitExecute(execute, name, Name, asi, opt_flags, - microParams); + exec_output = doSplitExecute(execute, name, Name, asi, + ["IsStoreConditional"], microParams); return (header_output, decoder_output, exec_output, decode_block) }}; @@ -177,7 +177,7 @@ def format CasAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{ decoder_output, exec_output, decode_block) = doCasFormat(code, SwapFuncs, AlternateASIPrivFaultCheck, - name, Name, flags, opt_flags, postacc_code) + name, Name, flags, ["IsStoreConditional"], postacc_code) }}; diff --git a/src/arch/x86/isa/base.isa b/src/arch/x86/isa/base.isa index 4776f7a7e..cd166b306 100644 --- a/src/arch/x86/isa/base.isa +++ b/src/arch/x86/isa/base.isa @@ -79,6 +79,13 @@ output header {{ void printReg(std::ostream &os, int reg) const; void printSrcReg(std::ostream &os, int reg) const; void printDestReg(std::ostream &os, int reg) const; + + inline uint64_t merge(uint64_t into, uint64_t val, int size) const + { + //FIXME This needs to be significantly more sophisticated + return val; + } + }; }}; diff --git a/src/arch/x86/isa/formats/formats.isa b/src/arch/x86/isa/formats/formats.isa index f4e5c402f..d763c05bc 100644 --- a/src/arch/x86/isa/formats/formats.isa +++ b/src/arch/x86/isa/formats/formats.isa @@ -95,9 +95,6 @@ //malfunction of the decode mechanism. ##include "error.isa" -//Include code to build up macro op instructions -##include "macroop.isa" - //Include a format which implements a batch of instructions which do the same //thing on a variety of inputs ##include "multi.isa" diff --git a/src/arch/x86/isa/formats/macroop.isa b/src/arch/x86/isa/macroop.isa index 717103df1..7d41a2dea 100644 --- a/src/arch/x86/isa/formats/macroop.isa +++ b/src/arch/x86/isa/macroop.isa @@ -55,16 +55,20 @@ // // Authors: Gabe Black -//////////////////////////////////////////////////////////////////// -// -// Instructions that do the same thing to multiple sets of arguments. -// +// Execute method for macroops. +def template MacroExecPanic {{ + Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const + { + panic("Tried to execute macroop directly!"); + M5_DUMMY_RETURN + } +}}; output header {{ // Base class for most macroops, except ones that need to commit as // they go. - class X86MacroInst : public X86StaticInst + class X86MacroInst : public StaticInst { protected: const uint32_t numMicroOps; @@ -72,7 +76,7 @@ output header {{ //Constructor. X86MacroInst(const char *mnem, ExtMachInst _machInst, uint32_t _numMicroOps) - : X86StaticInst(mnem, _machInst, No_OpClass), + : StaticInst(mnem, _machInst, No_OpClass), numMicroOps(_numMicroOps) { assert(numMicroOps); @@ -85,9 +89,6 @@ output header {{ delete [] microOps; } - std::string generateDisassembly(Addr pc, - const SymbolTable *symtab) const; - StaticInstPtr * microOps; StaticInstPtr fetchMicroOp(MicroPC microPC) @@ -96,21 +97,7 @@ output header {{ return microOps[microPC]; } - %(BasicExecPanic)s - }; - - // Base class for macroops which commit as they go. This is for - // instructions which can be partially completed like those with the - // rep prefix. This prevents those instructions from overflowing - // buffers with uncommitted microops. - class X86RollingMacroInst : public X86MacroInst - { - protected: - //Constructor. - X86RollingMacroInst(const char *mnem, ExtMachInst _machInst, - uint32_t _numMicroOps) - : X86MacroInst(mnem, _machInst, numMicroOps) - {} + %(MacroExecPanic)s }; }}; @@ -121,34 +108,24 @@ def template MacroConstructor {{ { %(constructor)s; //alloc_micro_ops is the code that sets up the microOps - //array in the parent class. This hook will hopefully - //allow all that to be automated. + //array in the parent class. %(alloc_micro_ops)s; - setMicroFlags(); } }}; let {{ - def genMacroOp(name, Name, ops, rolling = False): + def genMacroOp(name, Name, opSeq): baseClass = 'X86MacroInst' - if rolling: - baseClass = 'X86RollingMacroInst' - numMicroOps = len(ops) + numMicroOps = len(opSeq.ops) allocMicroOps = '' micropc = 0 - allocMicroOps += \ - "microOps[0] = %s;\n" % \ - op.getAllocator(True, not rolling, True, False) - micropc += 1 - if numMicroOps > 2: - for op in ops[1:-1]: - allocMicroOps += \ - "microOps[%d] = %s;\n" % \ - (micropc, op.getAllocator(True, not rolling, False, False)) - micropc += 1 - allocMicroOps += \ - "microOps[%d] = %s;\n" % \ - op.getAllocator(True, not rolling, False, True) + for op in opSeq.ops: + allocMicroOps += \ + "microOps[%d] = %s;\n" % \ + (micropc, op.getAllocator(True, op.delayed, + micropc == 0, + micropc == numMicroOps - 1)) + micropc += 1 iop = InstObjParams(name, Name, baseClass, {'code' : '', 'num_micro_ops' : numMicroOps, 'alloc_micro_ops' : allocMicroOps}) diff --git a/src/arch/x86/isa/main.isa b/src/arch/x86/isa/main.isa index cc3a9bee4..063d7125d 100644 --- a/src/arch/x86/isa/main.isa +++ b/src/arch/x86/isa/main.isa @@ -72,26 +72,55 @@ namespace X86ISA; -//Include the simple microcode assembler -##include "microasm.isa" +//////////////////////////////////////////////////////////////////// +// +// General infrastructure code. These files provide infrastructure +// which was developed to support x86 but isn't specific to it. +// -//Include the bitfield definitions -##include "bitfields.isa" +//Include code to build macroops. +##include "macroop.isa" -//Include the operand_types and operand definitions -##include "operands.isa" +//Include the simple microcode assembler. This will hopefully stay +//unspecialized for x86 and can later be made available to other ISAs. +##include "microasm.isa" + +//////////////////////////////////////////////////////////////////// +// +// X86 only infrastructure code. +// -//Include the base class for x86 instructions, and some support code +//Include the base class for x86 instructions, and some support code. ##include "base.isa" -//Include the instruction definitions -##include "insts/insts.isa" +//Include code to specialize an instruction template to operate on +//a particular set of operands. This is specific to x86 and the x86 +//microcode ISA. +##include "specialize.isa" + +//////////////////////////////////////////////////////////////////// +// +// Code which directly specifies isa components like instructions +// microops, and the decoder. +// //Include the definitions for the instruction formats ##include "formats/formats.isa" -//Include the definitions of the micro ops +//Include the operand_types and operand definitions. These are needed by +//the microop definitions. +##include "operands.isa" + +//Include the definitions of the micro ops. +//These are StaticInst classes which stand on their own and make up an +//internal instruction set. ##include "microops/microops.isa" +//Include the instruction definitions which are microop assembler programs. +##include "insts/insts.isa" + +//Include the bitfield definitions +##include "bitfields.isa" + //Include the decoder definition ##include "decoder/decoder.isa" diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa index b94b55aab..23567aae9 100644 --- a/src/arch/x86/isa/microasm.isa +++ b/src/arch/x86/isa/microasm.isa @@ -57,152 +57,17 @@ //////////////////////////////////////////////////////////////////// // -// Code to "specialize" a microcode sequence to use a particular -// variety of operands +// The microcode assembler // let {{ - # This builds either a regular or macro op to implement the sequence of - # ops we give it. - def genInst(name, Name, ops): - # If we can implement this instruction with exactly one microop, just - # use that directly. - newStmnt = '' - if len(ops) == 1: - decode_block = "return (X86StaticInst *)(%s);" % \ - ops[0].getAllocator() - return ('', '', decode_block, '') - else: - # Build a macroop to contain the sequence of microops we've - # been given. - return genMacroOp(name, Name, ops) -}}; - -let {{ - # This code builds up a decode block which decodes based on switchval. - # vals is a dict which matches case values with what should be decoded to. - # builder is called on the exploded contents of "vals" values to generate - # whatever code should be used. - def doSplitDecode(name, Name, builder, switchVal, vals, default = None): - header_output = '' - decoder_output = '' - decode_block = 'switch(%s) {\n' % switchVal - exec_output = '' - for (val, todo) in vals.items(): - (new_header_output, - new_decoder_output, - new_decode_block, - new_exec_output) = builder(name, Name, *todo) - header_output += new_header_output - decoder_output += new_decoder_output - decode_block += '\tcase %s: %s\n' % (val, new_decode_block) - exec_output += new_exec_output - if default: - (new_header_output, - new_decoder_output, - new_decode_block, - new_exec_output) = builder(name, Name, *default) - header_output += new_header_output - decoder_output += new_decoder_output - decode_block += '\tdefault: %s\n' % new_decode_block - exec_output += new_exec_output - decode_block += '}\n' - return (header_output, decoder_output, decode_block, exec_output) -}}; - -let {{ - class OpType(object): - parser = re.compile(r"(?P<tag>[A-Z][A-Z]*)(?P<size>[a-z][a-z]*)|(r(?P<reg>[A-Za-z0-9][A-Za-z0-9]*))") - def __init__(self, opTypeString): - match = OpType.parser.search(opTypeString) - if match == None: - raise Exception, "Problem parsing operand type %s" % opTypeString - self.reg = match.group("reg") - self.tag = match.group("tag") - self.size = match.group("size") + # These are used when setting up microops so that they can specialize their + # base class template properly. + RegOpType = "RegisterOperand" + ImmOpType = "ImmediateOperand" }}; let {{ - - # This function specializes the given piece of code to use a particular - # set of argument types described by "opTypes". These are "implemented" - # in reverse order. - def specializeInst(name, Name, code, opTypes): - opNum = len(opTypes) - 1 - while len(opTypes): - # print "Building a composite op with tags", opTypes - # print "And code", code - opNum = len(opTypes) - 1 - # A regular expression to find the operand placeholders we're - # interested in. - opRe = re.compile("\\^(?P<operandNum>%d)(?=[^0-9]|$)" % opNum) - - # Parse the operand type strign we're working with - opType = OpType(opTypes[opNum]) - - if opType.reg: - #Figure out what to do with fixed register operands - if opType.reg in ("Ax", "Bx", "Cx", "Dx"): - code = opRe.sub("%%{INTREG_R%s}" % opType.reg.upper(), code) - elif opType.reg == "Al": - # We need a way to specify register width - code = opRe.sub("%{INTREG_RAX}", code) - else: - print "Didn't know how to encode fixed register %s!" % opType.reg - elif opType.tag == None or opType.size == None: - raise Exception, "Problem parsing operand tag: %s" % opType.tag - elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"): - # Use the "reg" field of the ModRM byte to select the register - code = opRe.sub("%{(uint8_t)MODRM_REG}", code) - elif opType.tag in ("E", "Q", "W"): - # This might refer to memory or to a register. We need to - # divide it up farther. - regCode = opRe.sub("%{(uint8_t)MODRM_RM}", code) - regTypes = copy.copy(opTypes) - regTypes.pop(-1) - # This needs to refer to memory, but we'll fill in the details - # later. It needs to take into account unaligned memory - # addresses. - memCode = opRe.sub("%0", code) - memTypes = copy.copy(opTypes) - memTypes.pop(-1) - return doSplitDecode(name, Name, specializeInst, "MODRM_MOD", - {"3" : (regCode, regTypes)}, (memCode, memTypes)) - elif opType.tag in ("I", "J"): - # Immediates are already in the instruction, so don't leave in - # those parameters - code = opRe.sub("${IMMEDIATE}", code) - elif opType.tag == "M": - # This needs to refer to memory, but we'll fill in the details - # later. It needs to take into account unaligned memory - # addresses. - code = opRe.sub("%0", code) - elif opType.tag in ("PR", "R", "VR"): - # There should probably be a check here to verify that mod - # is equal to 11b - code = opRe.sub("%{(uint8_t)MODRM_RM}", code) - else: - raise Exception, "Unrecognized tag %s." % opType.tag - opTypes.pop(-1) - - # At this point, we've built up "code" to have all the necessary extra - # instructions needed to implement whatever types of operands were - # specified. Now we'll assemble it it into a microOp sequence. - ops = assembleMicro(code) - - # Build a macroop to contain the sequence of microops we've - # constructed. The decode block will be used to fill in our - # inner decode structure, and the rest will be concatenated and - # passed back. - return genInst(name, Name, ops) -}}; - -//////////////////////////////////////////////////////////////////// -// -// The microcode assembler -// - -let {{ class MicroOpStatement(object): def __init__(self): self.className = '' @@ -242,19 +107,9 @@ let {{ return 'new %s%s(machInst%s%s)' % (self.className, signature, self.microFlagsText(microFlags), args) }}; -let {{ - def buildLabelDict(ops): - labels = {} - micropc = 0 - for op in ops: - if op.label: - labels[op.label] = count - micropc += 1 - return labels -}}; - let{{ - def assembleMicro(code): + def assembleMicro(name, Name, code): + # This function takes in a block of microcode assembly and returns # a python list of objects which describe it. @@ -341,7 +196,13 @@ let{{ lineMatch = lineRe.search(code) # Decode the labels into displacements - labels = buildLabelDict(statements) + + labels = {} + micropc = 0 + for statement in statements: + if statement.label: + labels[statement.label] = count + micropc += 1 micropc = 0 for statement in statements: for arg in statement.args: @@ -353,5 +214,15 @@ let{{ # micropc + 1 + displacement. arg["operandImm"] = labels[arg["operandLabel"]] - micropc - 1 micropc += 1 - return statements + + # If we can implement this instruction with exactly one microop, just + # use that directly. + if len(statements) == 1: + decode_block = "return %s;" % \ + statements[0].getAllocator() + return ('', '', decode_block, '') + else: + # Build a macroop to contain the sequence of microops we've + # been given. + return genMacroOp(name, Name, statements) }}; diff --git a/src/arch/x86/isa/microops/base.isa b/src/arch/x86/isa/microops/base.isa index b1351d999..4254994f3 100644 --- a/src/arch/x86/isa/microops/base.isa +++ b/src/arch/x86/isa/microops/base.isa @@ -63,12 +63,15 @@ output header {{ }; }}; -//A class which is the base of all x86 micro ops it provides a function to +//A class which is the base of all x86 micro ops. It provides a function to //set necessary flags appropriately. output header {{ class X86MicroOpBase : public X86StaticInst { protected: + uint8_t opSize; + uint8_t addrSize; + X86MicroOpBase(bool isMicro, bool isDelayed, bool isFirst, bool isLast, const char *mnem, ExtMachInst _machInst, @@ -94,6 +97,7 @@ def template BaseMicroOpTemplateDeclare {{ let {{ def buildBaseMicroOpTemplate(Name, numParams): + assert(numParams > 0) signature = "<" signature += "int SignatureOperandTypeSpecifier0" for count in xrange(1,numParams): @@ -102,10 +106,9 @@ let {{ signature += ">" subs = {"signature" : signature, "class_name" : Name} return BaseMicroOpTemplateDeclare.subst(subs) +}}; - RegOpType = "RegisterOperand" - ImmOpType = "ImmediateOperand" - +let {{ def buildMicroOpTemplateDict(*params): signature = "<" if len(params): diff --git a/src/arch/x86/isa/specialize.isa b/src/arch/x86/isa/specialize.isa new file mode 100644 index 000000000..9cac09770 --- /dev/null +++ b/src/arch/x86/isa/specialize.isa @@ -0,0 +1,172 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2007 The Hewlett-Packard Development Company +// All rights reserved. +// +// Redistribution and use of this software in source and binary forms, +// with or without modification, are permitted provided that the +// following conditions are met: +// +// The software must be used only for Non-Commercial Use which means any +// use which is NOT directed to receiving any direct monetary +// compensation for, or commercial advantage from such use. Illustrative +// examples of non-commercial use are academic research, personal study, +// teaching, education and corporate research & development. +// Illustrative examples of commercial use are distributing products for +// commercial advantage and providing services using the software for +// commercial advantage. +// +// If you wish to use this software or functionality therein that may be +// covered by patents for commercial use, please contact: +// Director of Intellectual Property Licensing +// Office of Strategy and Technology +// Hewlett-Packard Company +// 1501 Page Mill Road +// Palo Alto, California 94304 +// +// Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. Redistributions +// in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or +// other materials provided with the distribution. Neither the name of +// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. No right of +// sublicense is granted herewith. Derivatives of the software and +// output created using the software may be prepared, but only for +// Non-Commercial Uses. Derivatives of the software may be shared with +// others provided: (i) the others agree to abide by the list of +// conditions herein which includes the Non-Commercial Use restrictions; +// and (ii) such Derivatives of the software include the above copyright +// notice to acknowledge the contribution from this software where +// applicable, this list of conditions and the disclaimer below. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Code to "specialize" a microcode sequence to use a particular +// variety of operands +// + +let {{ + # This code builds up a decode block which decodes based on switchval. + # vals is a dict which matches case values with what should be decoded to. + # builder is called on the exploded contents of "vals" values to generate + # whatever code should be used. + def doSplitDecode(name, Name, builder, switchVal, vals, default = None): + header_output = '' + decoder_output = '' + decode_block = 'switch(%s) {\n' % switchVal + exec_output = '' + for (val, todo) in vals.items(): + (new_header_output, + new_decoder_output, + new_decode_block, + new_exec_output) = builder(name, Name, *todo) + header_output += new_header_output + decoder_output += new_decoder_output + decode_block += '\tcase %s: %s\n' % (val, new_decode_block) + exec_output += new_exec_output + if default: + (new_header_output, + new_decoder_output, + new_decode_block, + new_exec_output) = builder(name, Name, *default) + header_output += new_header_output + decoder_output += new_decoder_output + decode_block += '\tdefault: %s\n' % new_decode_block + exec_output += new_exec_output + decode_block += '}\n' + return (header_output, decoder_output, decode_block, exec_output) +}}; + +let {{ + class OpType(object): + parser = re.compile(r"(?P<tag>[A-Z][A-Z]*)(?P<size>[a-z][a-z]*)|(r(?P<reg>[A-Za-z0-9][A-Za-z0-9]*))") + def __init__(self, opTypeString): + match = OpType.parser.search(opTypeString) + if match == None: + raise Exception, "Problem parsing operand type %s" % opTypeString + self.reg = match.group("reg") + self.tag = match.group("tag") + self.size = match.group("size") + + # This function specializes the given piece of code to use a particular + # set of argument types described by "opTypes". These are "implemented" + # in reverse order. + def specializeInst(name, Name, code, opTypes): + opNum = len(opTypes) - 1 + while len(opTypes): + # print "Building a composite op with tags", opTypes + # print "And code", code + opNum = len(opTypes) - 1 + # A regular expression to find the operand placeholders we're + # interested in. + opRe = re.compile("\\^(?P<operandNum>%d)(?=[^0-9]|$)" % opNum) + + # Parse the operand type strign we're working with + opType = OpType(opTypes[opNum]) + + if opType.reg: + #Figure out what to do with fixed register operands + if opType.reg in ("Ax", "Bx", "Cx", "Dx"): + code = opRe.sub("%%{INTREG_R%s}" % opType.reg.upper(), code) + elif opType.reg == "Al": + # We need a way to specify register width + code = opRe.sub("%{INTREG_RAX}", code) + else: + print "Didn't know how to encode fixed register %s!" % opType.reg + elif opType.tag == None or opType.size == None: + raise Exception, "Problem parsing operand tag: %s" % opType.tag + elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"): + # Use the "reg" field of the ModRM byte to select the register + code = opRe.sub("%{(uint8_t)MODRM_REG}", code) + elif opType.tag in ("E", "Q", "W"): + # This might refer to memory or to a register. We need to + # divide it up farther. + regCode = opRe.sub("%{(uint8_t)MODRM_RM}", code) + regTypes = copy.copy(opTypes) + regTypes.pop(-1) + # This needs to refer to memory, but we'll fill in the details + # later. It needs to take into account unaligned memory + # addresses. + memCode = opRe.sub("%0", code) + memTypes = copy.copy(opTypes) + memTypes.pop(-1) + return doSplitDecode(name, Name, specializeInst, "MODRM_MOD", + {"3" : (regCode, regTypes)}, (memCode, memTypes)) + elif opType.tag in ("I", "J"): + # Immediates are already in the instruction, so don't leave in + # those parameters + code = opRe.sub("${IMMEDIATE}", code) + elif opType.tag == "M": + # This needs to refer to memory, but we'll fill in the details + # later. It needs to take into account unaligned memory + # addresses. + code = opRe.sub("%0", code) + elif opType.tag in ("PR", "R", "VR"): + # There should probably be a check here to verify that mod + # is equal to 11b + code = opRe.sub("%{(uint8_t)MODRM_RM}", code) + else: + raise Exception, "Unrecognized tag %s." % opType.tag + opTypes.pop(-1) + + # At this point, we've built up "code" to have all the necessary extra + # instructions needed to implement whatever types of operands were + # specified. Now we'll assemble it it into a StaticInst. + return assembleMicro(name, Name, code) +}}; diff --git a/src/arch/x86/predecoder.cc b/src/arch/x86/predecoder.cc index 80971e7cf..573012ee6 100644 --- a/src/arch/x86/predecoder.cc +++ b/src/arch/x86/predecoder.cc @@ -117,37 +117,33 @@ namespace X86ISA //Operand size override prefixes case OperandSizeOverride: DPRINTF(Predecoder, "Found operand size override prefix.\n"); + emi.legacy.op = true; break; case AddressSizeOverride: DPRINTF(Predecoder, "Found address size override prefix.\n"); + emi.legacy.addr = true; break; //Segment override prefixes case CSOverride: - DPRINTF(Predecoder, "Found cs segment override.\n"); - break; case DSOverride: - DPRINTF(Predecoder, "Found ds segment override.\n"); - break; case ESOverride: - DPRINTF(Predecoder, "Found es segment override.\n"); - break; case FSOverride: - DPRINTF(Predecoder, "Found fs segment override.\n"); - break; case GSOverride: - DPRINTF(Predecoder, "Found gs segment override.\n"); - break; case SSOverride: - DPRINTF(Predecoder, "Found ss segment override.\n"); + DPRINTF(Predecoder, "Found segment override.\n"); + emi.legacy.seg = prefix; break; case Lock: DPRINTF(Predecoder, "Found lock prefix.\n"); + emi.legacy.lock = true; break; case Rep: DPRINTF(Predecoder, "Found rep prefix.\n"); + emi.legacy.rep = true; break; case Repne: DPRINTF(Predecoder, "Found repne prefix.\n"); + emi.legacy.repne = true; break; case RexPrefix: DPRINTF(Predecoder, "Found Rex prefix %#x.\n", nextByte); @@ -198,16 +194,36 @@ namespace X86ISA displacementCollected = 0; emi.displacement = 0; + //Figure out the effective operand size. This can be overriden to + //a fixed value at the decoder level. + if(/*FIXME long mode*/1) + { + if(emi.rex && emi.rex.w) + emi.opSize = 3; // 64 bit operand size + else if(emi.legacy.op) + emi.opSize = 1; // 16 bit operand size + else + emi.opSize = 2; // 32 bit operand size + } + else if(/*FIXME default 32*/1) + { + if(emi.legacy.op) + emi.opSize = 1; // 16 bit operand size + else + emi.opSize = 2; // 32 bit operand size + } + else // 16 bit default operand size + { + if(emi.legacy.op) + emi.opSize = 2; // 32 bit operand size + else + emi.opSize = 1; // 16 bit operand size + } + //Figure out how big of an immediate we'll retreive based //on the opcode. - int immType = ImmediateType[ - emi.opcode.num - 1][nextByte]; - if(0) //16 bit mode - immediateSize = ImmediateTypeToSize[0][immType]; - else if(!(emi.rex & 0x4)) //32 bit mode - immediateSize = ImmediateTypeToSize[1][immType]; - else //64 bit mode - immediateSize = ImmediateTypeToSize[2][immType]; + int immType = ImmediateType[emi.opcode.num - 1][nextByte]; + immediateSize = SizeTypeToSize[emi.opSize - 1][immType]; //Determine what to expect next if (UsesModRM[emi.opcode.num - 1][nextByte]) { @@ -351,6 +367,16 @@ namespace X86ISA if(immediateSize == immediateCollected) { + //XXX Warning! The following is an observed pattern and might + //not always be true! + + //Instructions which use 64 bit operands but 32 bit immediates + //need to have the immediate sign extended to 64 bits. + //Instructions which use true 64 bit immediates won't be + //affected, and instructions that use true 32 bit immediates + //won't notice. + if(immediateSize == 4) + emi.immediate = sext<32>(emi.immediate); DPRINTF(Predecoder, "Collected immediate %#x.\n", emi.immediate); emiIsReady = true; diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh index 1df17d6d2..6562ab9f5 100644 --- a/src/arch/x86/predecoder.hh +++ b/src/arch/x86/predecoder.hh @@ -73,7 +73,7 @@ namespace X86ISA static const uint8_t Prefixes[256]; static const uint8_t UsesModRM[2][256]; static const uint8_t ImmediateType[2][256]; - static const uint8_t ImmediateTypeToSize[3][10]; + static const uint8_t SizeTypeToSize[3][10]; protected: ThreadContext * tc; diff --git a/src/arch/x86/predecoder_tables.cc b/src/arch/x86/predecoder_tables.cc index f233ad234..38b9c57a3 100644 --- a/src/arch/x86/predecoder_tables.cc +++ b/src/arch/x86/predecoder_tables.cc @@ -141,7 +141,7 @@ namespace X86ISA } }; - enum ImmediateTypes { + enum SizeType { NoImm, NI = NoImm, ByteImm, @@ -158,19 +158,19 @@ namespace X86ISA VW = VWordImm, ZWordImm, ZW = ZWordImm, - Pointer, - PO = Pointer, //The enter instruction takes -2- immediates for a total of 3 bytes Enter, - EN = Enter + EN = Enter, + Pointer, + PO = Pointer }; - const uint8_t Predecoder::ImmediateTypeToSize[3][10] = + const uint8_t Predecoder::SizeTypeToSize[3][10] = { -// noimm byte word dword qword oword vword zword enter - {0, 1, 2, 4, 8, 16, 2, 2, 3, 4}, //16 bit - {0, 1, 2, 4, 8, 16, 4, 4, 3, 6}, //32 bit - {0, 1, 2, 4, 8, 16, 4, 8, 3, 0} //64 bit +// noimm byte word dword qword oword vword zword enter pointer + {0, 1, 2, 4, 8, 16, 2, 2, 3, 4 }, //16 bit + {0, 1, 2, 4, 8, 16, 4, 4, 3, 6 }, //32 bit + {0, 1, 2, 4, 8, 16, 4, 8, 3, 0 } //64 bit }; //This table determines the immediate type. The first index is the diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh index cdac3c00e..022f20ee5 100644 --- a/src/arch/x86/types.hh +++ b/src/arch/x86/types.hh @@ -70,25 +70,31 @@ namespace X86ISA typedef uint64_t MachInst; enum Prefixes { - NoOverride = 0, - CSOverride = 1, - DSOverride = 2, - ESOverride = 3, - FSOverride = 4, - GSOverride = 5, - SSOverride = 6, - //The Rex prefix obviously doesn't fit in with the above, but putting - //it here lets us save double the space the enums take up. - RexPrefix = 7, + NoOverride, + CSOverride, + DSOverride, + ESOverride, + FSOverride, + GSOverride, + SSOverride, + RexPrefix, + OperandSizeOverride, + AddressSizeOverride, + Lock, + Rep, + Repne + }; + + BitUnion8(LegacyPrefixVector) + Bitfield<7> repne; + Bitfield<6> rep; + Bitfield<5> lock; + Bitfield<4> addr; + Bitfield<3> op; //There can be only one segment override, so they share the //first 3 bits in the legacyPrefixes bitfield. - SegmentOverride = 0x7, - OperandSizeOverride = 8, - AddressSizeOverride = 16, - Lock = 32, - Rep = 64, - Repne = 128 - }; + Bitfield<2,0> seg; + EndBitUnion(LegacyPrefixVector) BitUnion8(ModRM) Bitfield<7,6> mod; @@ -118,7 +124,7 @@ namespace X86ISA struct ExtMachInst { //Prefixes - uint8_t legacy; + LegacyPrefixVector legacy; Rex rex; //This holds all of the bytes of the opcode struct @@ -140,6 +146,10 @@ namespace X86ISA //Immediate fields uint64_t immediate; uint64_t displacement; + + //The effective operand size. + uint8_t opSize; + //The }; inline static std::ostream & diff --git a/src/arch/x86/utility.hh b/src/arch/x86/utility.hh index e0bd09515..1c98e7fbc 100644 --- a/src/arch/x86/utility.hh +++ b/src/arch/x86/utility.hh @@ -78,7 +78,8 @@ namespace __hash_namespace { ((uint64_t)emi.opcode.prefixA << 16) | ((uint64_t)emi.opcode.prefixB << 8) | ((uint64_t)emi.opcode.op)) ^ - emi.immediate ^ emi.displacement; + emi.immediate ^ emi.displacement ^ + emi.opSize; }; }; } diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 6c6d90076..eed05c2f1 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -877,6 +877,11 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) effAddrValid = true; physEffAddr = req->getPaddr(); memReqFlags = req->getFlags(); + + if (req->isCondSwap()) { + assert(res); + req->setExtraData(*res); + } #if 0 if (cpu->system->memctrl->badaddr(physEffAddr)) { fault = TheISA::genMachineCheckFault(); diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index e1b27048d..f24de20d9 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -289,15 +289,19 @@ class LSQUnit { struct SQEntry { /** Constructs an empty store queue entry. */ SQEntry() - : inst(NULL), req(NULL), size(0), data(0), + : inst(NULL), req(NULL), size(0), canWB(0), committed(0), completed(0) - { } + { + bzero(data, sizeof(data)); + } /** Constructs a store queue entry for a given instruction. */ SQEntry(DynInstPtr &_inst) - : inst(_inst), req(NULL), size(0), data(0), + : inst(_inst), req(NULL), size(0), canWB(0), committed(0), completed(0) - { } + { + bzero(data, sizeof(data)); + } /** The store instruction. */ DynInstPtr inst; @@ -306,7 +310,7 @@ class LSQUnit { /** The size of the store. */ int size; /** The store data. */ - IntReg data; + char data[sizeof(IntReg)]; /** Whether or not the store can writeback. */ bool canWB; /** Whether or not the store is committed. */ @@ -554,22 +558,14 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) if ((store_has_lower_limit && store_has_upper_limit)) { // Get shift amount for offset into the store's data. int shift_amt = req->getVaddr() & (store_size - 1); - // @todo: Magic number, assumes byte addressing - shift_amt = shift_amt << 3; - - // Cast this to type T? - data = storeQueue[store_idx].data >> shift_amt; - // When the data comes from the store queue entry, it's in host - // order. When it gets sent to the load, it needs to be in guest - // order so when the load converts it again, it ends up back - // in host order like the inst expects. - data = TheISA::htog(data); + memcpy(&data, storeQueue[store_idx].data + shift_amt, sizeof(T)); assert(!load_inst->memData); load_inst->memData = new uint8_t[64]; - memcpy(load_inst->memData, &data, req->getSize()); + memcpy(load_inst->memData, + storeQueue[store_idx].data + shift_amt, req->getSize()); DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " "addr %#x, data %#x\n", @@ -716,7 +712,10 @@ LSQUnit<Impl>::write(Request *req, T &data, int store_idx) storeQueue[store_idx].req = req; storeQueue[store_idx].size = sizeof(T); - storeQueue[store_idx].data = data; + assert(sizeof(T) <= sizeof(storeQueue[store_idx].data)); + + T gData = htog(data); + memcpy(storeQueue[store_idx].data, &gData, sizeof(T)); // This function only writes the data to the store queue, so no fault // can happen here. diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 2aa0d6b6a..44e2cea76 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -645,22 +645,10 @@ LSQUnit<Impl>::writebackStores() assert(!inst->memData); inst->memData = new uint8_t[64]; - TheISA::IntReg convertedData = - TheISA::htog(storeQueue[storeWBIdx].data); - - //FIXME This is a hack to get SPARC working. It, along with endianness - //in the memory system in general, need to be straightened out more - //formally. The problem is that the data's endianness is swapped when - //it's in the 64 bit data field in the store queue. The data that you - //want won't start at the beginning of the field anymore unless it was - //a 64 bit access. - memcpy(inst->memData, - (uint8_t *)&convertedData + - (TheISA::ByteOrderDiffers ? - (sizeof(TheISA::IntReg) - req->getSize()) : 0), - req->getSize()); - - PacketPtr data_pkt = new Packet(req, MemCmd::WriteReq, + memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); + + MemCmd command = req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq; + PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast); data_pkt->dataStatic(inst->memData); @@ -677,7 +665,7 @@ LSQUnit<Impl>::writebackStores() inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. - if (req->isLocked()) { + if (inst->isStoreConditional()) { // Disable recording the result temporarily. Writing to // misc regs normally updates the result, but this is not // the desired behavior when handling store conditionals. diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index bbc69fc96..b5b1cd021 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -174,7 +174,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -189,7 +189,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -204,7 +204,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -217,7 +217,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -232,11 +232,11 @@ class PhysRegFile MiscReg readMiscReg(int misc_reg, unsigned thread_id) { - return miscRegs[thread_id].readReg(misc_reg, - cpu->tcBase(thread_id)); + return miscRegs[thread_id].readReg(misc_reg, cpu->tcBase(thread_id)); } - void setMiscRegNoEffect(int misc_reg, const MiscReg &val, unsigned thread_id) + void setMiscRegNoEffect(int misc_reg, + const MiscReg &val, unsigned thread_id) { miscRegs[thread_id].setRegNoEffect(misc_reg, val); } diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index ec630b31e..431705e19 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -996,7 +996,12 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid) if (src_reg < TheISA::FP_Base_DepTag) { flat_src_reg = TheISA::flattenIntIndex(inst->tcBase(), src_reg); DPRINTF(Rename, "Flattening index %d to %d.\n", (int)src_reg, (int)flat_src_reg); + } else { + // Floating point and Miscellaneous registers need their indexes + // adjusted to account for the expanded number of flattened int regs. + flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; } + inst->flattenSrcReg(src_idx, flat_src_reg); // Look up the source registers to get the phys. register they've @@ -1033,8 +1038,13 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid) RegIndex dest_reg = inst->destRegIdx(dest_idx); RegIndex flat_dest_reg = dest_reg; if (dest_reg < TheISA::FP_Base_DepTag) { + // Integer registers are flattened. flat_dest_reg = TheISA::flattenIntIndex(inst->tcBase(), dest_reg); DPRINTF(Rename, "Flattening index %d to %d.\n", (int)dest_reg, (int)flat_dest_reg); + } else { + // Floating point and Miscellaneous registers need their indexes + // adjusted to account for the expanded number of flattened int regs. + flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; } inst->flattenDestReg(dest_idx, flat_dest_reg); diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini new file mode 100644 index 000000000..f804a40fe --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini @@ -0,0 +1,379 @@ +[root] +type=Root +children=system +dummy=0 + +[system] +type=System +children=cpu membus physmem +mem_mode=atomic +physmem=system.physmem + +[system.cpu] +type=DerivO3CPU +children=dcache fuPool icache l2cache toL2Bus workload +BTBEntries=4096 +BTBTagSize=16 +LFSTSize=1024 +LQEntries=32 +RASSize=16 +SQEntries=32 +SSITSize=1024 +activity=0 +backComSize=5 +choiceCtrBits=2 +choicePredictorSize=8192 +clock=1 +commitToDecodeDelay=1 +commitToFetchDelay=1 +commitToIEWDelay=1 +commitToRenameDelay=1 +commitWidth=8 +cpu_id=0 +decodeToFetchDelay=1 +decodeToRenameDelay=1 +decodeWidth=8 +defer_registration=false +dispatchWidth=8 +fetchToDecodeDelay=1 +fetchTrapLatency=1 +fetchWidth=8 +forwardComSize=5 +fuPool=system.cpu.fuPool +function_trace=false +function_trace_start=0 +globalCtrBits=2 +globalHistoryBits=13 +globalPredictorSize=8192 +iewToCommitDelay=1 +iewToDecodeDelay=1 +iewToFetchDelay=1 +iewToRenameDelay=1 +instShiftAmt=2 +issueToExecuteDelay=1 +issueWidth=8 +localCtrBits=2 +localHistoryBits=11 +localHistoryTableSize=2048 +localPredictorSize=2048 +max_insts_all_threads=0 +max_insts_any_thread=0 +max_loads_all_threads=0 +max_loads_any_thread=0 +numIQEntries=64 +numPhysFloatRegs=256 +numPhysIntRegs=256 +numROBEntries=192 +numRobs=1 +numThreads=1 +phase=0 +predType=tournament +progress_interval=0 +renameToDecodeDelay=1 +renameToFetchDelay=1 +renameToIEWDelay=2 +renameToROBDelay=1 +renameWidth=8 +squashWidth=8 +system=system +trapLatency=13 +wbDepth=1 +wbWidth=8 +workload=system.cpu.workload +dcache_port=system.cpu.dcache.cpu_side +icache_port=system.cpu.icache.cpu_side + +[system.cpu.dcache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=262144 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=20 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.dcache_port +mem_side=system.cpu.toL2Bus.port[1] + +[system.cpu.fuPool] +type=FUPool +children=FUList0 FUList1 FUList2 FUList3 FUList4 FUList5 FUList6 FUList7 +FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 + +[system.cpu.fuPool.FUList0] +type=FUDesc +children=opList0 +count=6 +opList=system.cpu.fuPool.FUList0.opList0 + +[system.cpu.fuPool.FUList0.opList0] +type=OpDesc +issueLat=1 +opClass=IntAlu +opLat=1 + +[system.cpu.fuPool.FUList1] +type=FUDesc +children=opList0 opList1 +count=2 +opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 + +[system.cpu.fuPool.FUList1.opList0] +type=OpDesc +issueLat=1 +opClass=IntMult +opLat=3 + +[system.cpu.fuPool.FUList1.opList1] +type=OpDesc +issueLat=19 +opClass=IntDiv +opLat=20 + +[system.cpu.fuPool.FUList2] +type=FUDesc +children=opList0 opList1 opList2 +count=4 +opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 + +[system.cpu.fuPool.FUList2.opList0] +type=OpDesc +issueLat=1 +opClass=FloatAdd +opLat=2 + +[system.cpu.fuPool.FUList2.opList1] +type=OpDesc +issueLat=1 +opClass=FloatCmp +opLat=2 + +[system.cpu.fuPool.FUList2.opList2] +type=OpDesc +issueLat=1 +opClass=FloatCvt +opLat=2 + +[system.cpu.fuPool.FUList3] +type=FUDesc +children=opList0 opList1 opList2 +count=2 +opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 + +[system.cpu.fuPool.FUList3.opList0] +type=OpDesc +issueLat=1 +opClass=FloatMult +opLat=4 + +[system.cpu.fuPool.FUList3.opList1] +type=OpDesc +issueLat=12 +opClass=FloatDiv +opLat=12 + +[system.cpu.fuPool.FUList3.opList2] +type=OpDesc +issueLat=24 +opClass=FloatSqrt +opLat=24 + +[system.cpu.fuPool.FUList4] +type=FUDesc +children=opList0 +count=0 +opList=system.cpu.fuPool.FUList4.opList0 + +[system.cpu.fuPool.FUList4.opList0] +type=OpDesc +issueLat=1 +opClass=MemRead +opLat=1 + +[system.cpu.fuPool.FUList5] +type=FUDesc +children=opList0 +count=0 +opList=system.cpu.fuPool.FUList5.opList0 + +[system.cpu.fuPool.FUList5.opList0] +type=OpDesc +issueLat=1 +opClass=MemWrite +opLat=1 + +[system.cpu.fuPool.FUList6] +type=FUDesc +children=opList0 opList1 +count=4 +opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 + +[system.cpu.fuPool.FUList6.opList0] +type=OpDesc +issueLat=1 +opClass=MemRead +opLat=1 + +[system.cpu.fuPool.FUList6.opList1] +type=OpDesc +issueLat=1 +opClass=MemWrite +opLat=1 + +[system.cpu.fuPool.FUList7] +type=FUDesc +children=opList0 +count=1 +opList=system.cpu.fuPool.FUList7.opList0 + +[system.cpu.fuPool.FUList7.opList0] +type=OpDesc +issueLat=3 +opClass=IprAccess +opLat=3 + +[system.cpu.icache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=131072 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=20 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.icache_port +mem_side=system.cpu.toL2Bus.port[0] + +[system.cpu.l2cache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=2097152 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.toL2Bus.port[2] +mem_side=system.membus.port[1] + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +responder_set=false +width=64 +port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side + +[system.cpu.workload] +type=LiveProcess +cmd=insttest +cwd= +egid=100 +env= +euid=100 +executable=tests/test-progs/insttest/bin/sparc/linux/insttest +gid=100 +input=cin +output=cout +pid=100 +ppid=99 +system=system +uid=100 + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +responder_set=false +width=64 +port=system.physmem.port system.cpu.l2cache.mem_side + +[system.physmem] +type=PhysicalMemory +file= +latency=1 +range=0:134217727 +zero=false +port=system.membus.port[0] + diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out new file mode 100644 index 000000000..d248f77bf --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out @@ -0,0 +1,367 @@ +[root] +type=Root +dummy=0 + +[system.physmem] +type=PhysicalMemory +file= +range=[0,134217727] +latency=1 +zero=false + +[system] +type=System +physmem=system.physmem +mem_mode=atomic + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +width=64 +responder_set=false + +[system.cpu.workload] +type=LiveProcess +cmd=insttest +executable=tests/test-progs/insttest/bin/sparc/linux/insttest +input=cin +output=cout +env= +cwd= +system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 + +[system.cpu.fuPool.FUList0.opList0] +type=OpDesc +opClass=IntAlu +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList0] +type=FUDesc +opList=system.cpu.fuPool.FUList0.opList0 +count=6 + +[system.cpu.fuPool.FUList1.opList0] +type=OpDesc +opClass=IntMult +opLat=3 +issueLat=1 + +[system.cpu.fuPool.FUList1.opList1] +type=OpDesc +opClass=IntDiv +opLat=20 +issueLat=19 + +[system.cpu.fuPool.FUList1] +type=FUDesc +opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 +count=2 + +[system.cpu.fuPool.FUList2.opList0] +type=OpDesc +opClass=FloatAdd +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2.opList1] +type=OpDesc +opClass=FloatCmp +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2.opList2] +type=OpDesc +opClass=FloatCvt +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2] +type=FUDesc +opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 +count=4 + +[system.cpu.fuPool.FUList3.opList0] +type=OpDesc +opClass=FloatMult +opLat=4 +issueLat=1 + +[system.cpu.fuPool.FUList3.opList1] +type=OpDesc +opClass=FloatDiv +opLat=12 +issueLat=12 + +[system.cpu.fuPool.FUList3.opList2] +type=OpDesc +opClass=FloatSqrt +opLat=24 +issueLat=24 + +[system.cpu.fuPool.FUList3] +type=FUDesc +opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 +count=2 + +[system.cpu.fuPool.FUList4.opList0] +type=OpDesc +opClass=MemRead +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList4] +type=FUDesc +opList=system.cpu.fuPool.FUList4.opList0 +count=0 + +[system.cpu.fuPool.FUList5.opList0] +type=OpDesc +opClass=MemWrite +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList5] +type=FUDesc +opList=system.cpu.fuPool.FUList5.opList0 +count=0 + +[system.cpu.fuPool.FUList6.opList0] +type=OpDesc +opClass=MemRead +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList6.opList1] +type=OpDesc +opClass=MemWrite +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList6] +type=FUDesc +opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 +count=4 + +[system.cpu.fuPool.FUList7.opList0] +type=OpDesc +opClass=IprAccess +opLat=3 +issueLat=3 + +[system.cpu.fuPool.FUList7] +type=FUDesc +opList=system.cpu.fuPool.FUList7.opList0 +count=1 + +[system.cpu.fuPool] +type=FUPool +FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 + +[system.cpu] +type=DerivO3CPU +clock=1 +phase=0 +numThreads=1 +cpu_id=0 +activity=0 +workload=system.cpu.workload +checker=null +max_insts_any_thread=0 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +progress_interval=0 +cachePorts=200 +decodeToFetchDelay=1 +renameToFetchDelay=1 +iewToFetchDelay=1 +commitToFetchDelay=1 +fetchWidth=8 +renameToDecodeDelay=1 +iewToDecodeDelay=1 +commitToDecodeDelay=1 +fetchToDecodeDelay=1 +decodeWidth=8 +iewToRenameDelay=1 +commitToRenameDelay=1 +decodeToRenameDelay=1 +renameWidth=8 +commitToIEWDelay=1 +renameToIEWDelay=2 +issueToExecuteDelay=1 +dispatchWidth=8 +issueWidth=8 +wbWidth=8 +wbDepth=1 +fuPool=system.cpu.fuPool +iewToCommitDelay=1 +renameToROBDelay=1 +commitWidth=8 +squashWidth=8 +trapLatency=13 +backComSize=5 +forwardComSize=5 +predType=tournament +localPredictorSize=2048 +localCtrBits=2 +localHistoryTableSize=2048 +localHistoryBits=11 +globalPredictorSize=8192 +globalCtrBits=2 +globalHistoryBits=13 +choicePredictorSize=8192 +choiceCtrBits=2 +BTBEntries=4096 +BTBTagSize=16 +RASSize=16 +LQEntries=32 +SQEntries=32 +LFSTSize=1024 +SSITSize=1024 +numPhysIntRegs=256 +numPhysFloatRegs=256 +numIQEntries=64 +numROBEntries=192 +smtNumFetchingThreads=1 +smtFetchPolicy=SingleThread +smtLSQPolicy=Partitioned +smtLSQThreshold=100 +smtIQPolicy=Partitioned +smtIQThreshold=100 +smtROBPolicy=Partitioned +smtROBThreshold=100 +smtCommitPolicy=RoundRobin +instShiftAmt=2 +defer_registration=false +function_trace=false +function_trace_start=0 + +[system.cpu.icache] +type=BaseCache +size=131072 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=20 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.dcache] +type=BaseCache +size=262144 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=20 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.l2cache] +type=BaseCache +size=2097152 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +width=64 +responder_set=false + diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt new file mode 100644 index 000000000..7c0d31494 --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt @@ -0,0 +1,410 @@ + +---------- Begin Simulation Statistics ---------- +global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. +global.BPredUnit.BTBHits 2990 # Number of BTB hits +global.BPredUnit.BTBLookups 7055 # Number of BTB lookups +global.BPredUnit.RASInCorrect 0 # Number of incorrect RAS predictions. +global.BPredUnit.condIncorrect 2077 # Number of conditional branches incorrect +global.BPredUnit.condPredicted 7846 # Number of conditional branches predicted +global.BPredUnit.lookups 7846 # Number of BP lookups +global.BPredUnit.usedRAS 0 # Number of times the RAS was used to get a target. +host_inst_rate 15119 # Simulator instruction rate (inst/s) +host_mem_usage 154868 # Number of bytes of host memory used +host_seconds 0.73 # Real time elapsed on the host +host_tick_rate 1956796 # Simulator tick rate (ticks/s) +memdepunit.memDep.conflictingLoads 12 # Number of conflicting loads. +memdepunit.memDep.conflictingStores 0 # Number of conflicting stores. +memdepunit.memDep.insertedLoads 3250 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 2817 # Number of stores inserted to the mem dependence unit. +sim_freq 1000000000000 # Frequency of simulated ticks +sim_insts 10976 # Number of instructions simulated +sim_seconds 0.000001 # Number of seconds simulated +sim_ticks 1421211 # Number of ticks simulated +system.cpu.commit.COM:branches 2152 # Number of branches committed +system.cpu.commit.COM:bw_lim_events 172 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits +system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle.samples 221349 +system.cpu.commit.COM:committed_per_cycle.min_value 0 + 0 215844 9751.30% + 1 2970 134.18% + 2 1290 58.28% + 3 631 28.51% + 4 208 9.40% + 5 90 4.07% + 6 133 6.01% + 7 11 0.50% + 8 172 7.77% +system.cpu.commit.COM:committed_per_cycle.max_value 8 +system.cpu.commit.COM:committed_per_cycle.end_dist + +system.cpu.commit.COM:count 10976 # Number of instructions committed +system.cpu.commit.COM:loads 1462 # Number of loads committed +system.cpu.commit.COM:membars 0 # Number of memory barriers committed +system.cpu.commit.COM:refs 2760 # Number of memory references committed +system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed +system.cpu.commit.branchMispredicts 2077 # The number of times a branch was mispredicted +system.cpu.commit.commitCommittedInsts 10976 # The number of committed instructions +system.cpu.commit.commitNonSpecStalls 327 # The number of times commit has been forced to stall to communicate backwards +system.cpu.commit.commitSquashedInsts 14263 # The number of squashed insts skipped by commit +system.cpu.committedInsts 10976 # Number of Instructions Simulated +system.cpu.committedInsts_total 10976 # Number of Instructions Simulated +system.cpu.cpi 129.483509 # CPI: Cycles Per Instruction +system.cpu.cpi_total 129.483509 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 2737 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 6585.044776 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 6511.939394 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 2603 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 882396 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.048959 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 134 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_hits 68 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_miss_latency 429788 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.024114 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 66 # number of ReadReq MSHR misses +system.cpu.dcache.SwapReq_accesses 6 # number of SwapReq accesses(hits+misses) +system.cpu.dcache.SwapReq_hits 6 # number of SwapReq hits +system.cpu.dcache.WriteReq_accesses 1292 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 7960.583924 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7136.918605 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 869 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 3367327 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.327399 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 423 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_hits 337 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_miss_latency 613775 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.066563 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 86 # number of WriteReq MSHR misses +system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked +system.cpu.dcache.avg_refs 22.881579 # Average number of references to valid blocks. +system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.cache_copies 0 # number of cache copies performed +system.cpu.dcache.demand_accesses 4029 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 7629.664273 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 6865.546053 # average overall mshr miss latency +system.cpu.dcache.demand_hits 3472 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 4249723 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.138248 # miss rate for demand accesses +system.cpu.dcache.demand_misses 557 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 405 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 1043563 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.037726 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 152 # number of demand (read+write) MSHR misses +system.cpu.dcache.fast_writes 0 # number of fast writes performed +system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.dcache.overall_accesses 4029 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 7629.664273 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 6865.546053 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency +system.cpu.dcache.overall_hits 3472 # number of overall hits +system.cpu.dcache.overall_miss_latency 4249723 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.138248 # miss rate for overall accesses +system.cpu.dcache.overall_misses 557 # number of overall misses +system.cpu.dcache.overall_mshr_hits 405 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 1043563 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.037726 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 152 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.dcache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.dcache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.dcache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.dcache.replacements 0 # number of replacements +system.cpu.dcache.sampled_refs 152 # Sample count of references to valid blocks. +system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.dcache.tagsinuse 90.938737 # Cycle average of tags in use +system.cpu.dcache.total_refs 3478 # Total number of references to valid blocks. +system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.dcache.writebacks 0 # number of writebacks +system.cpu.decode.DECODE:BlockedCycles 192719 # Number of cycles decode is blocked +system.cpu.decode.DECODE:DecodedInsts 39774 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 20128 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 8238 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 3162 # Number of cycles decode is squashing +system.cpu.decode.DECODE:UnblockCycles 264 # Number of cycles decode is unblocking +system.cpu.fetch.Branches 7846 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 5085 # Number of cache lines fetched +system.cpu.fetch.Cycles 14399 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 745 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 43304 # Number of instructions fetch has processed +system.cpu.fetch.SquashCycles 2134 # Number of cycles fetch has spent squashing +system.cpu.fetch.branchRate 0.034947 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 5085 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 2990 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 0.192881 # Number of inst fetches per cycle +system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist.samples 224511 +system.cpu.fetch.rateDist.min_value 0 + 0 215198 9585.19% + 1 2258 100.57% + 2 627 27.93% + 3 958 42.67% + 4 553 24.63% + 5 816 36.35% + 6 951 42.36% + 7 280 12.47% + 8 2870 127.83% +system.cpu.fetch.rateDist.max_value 8 +system.cpu.fetch.rateDist.end_dist + +system.cpu.icache.ReadReq_accesses 5085 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 5148.266776 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 4502.972752 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 4474 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 3145591 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.120157 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 611 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 244 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 1652591 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.072173 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 367 # number of ReadReq MSHR misses +system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked +system.cpu.icache.avg_refs 12.325069 # Average number of references to valid blocks. +system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.icache.cache_copies 0 # number of cache copies performed +system.cpu.icache.demand_accesses 5085 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 5148.266776 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 4502.972752 # average overall mshr miss latency +system.cpu.icache.demand_hits 4474 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 3145591 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.120157 # miss rate for demand accesses +system.cpu.icache.demand_misses 611 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 244 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 1652591 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.072173 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 367 # number of demand (read+write) MSHR misses +system.cpu.icache.fast_writes 0 # number of fast writes performed +system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.icache.overall_accesses 5085 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 5148.266776 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 4502.972752 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency +system.cpu.icache.overall_hits 4474 # number of overall hits +system.cpu.icache.overall_miss_latency 3145591 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.120157 # miss rate for overall accesses +system.cpu.icache.overall_misses 611 # number of overall misses +system.cpu.icache.overall_mshr_hits 244 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 1652591 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.072173 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 367 # number of overall MSHR misses +system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.icache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.icache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.icache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.icache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.icache.replacements 1 # number of replacements +system.cpu.icache.sampled_refs 363 # Sample count of references to valid blocks. +system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.icache.tagsinuse 172.869174 # Cycle average of tags in use +system.cpu.icache.total_refs 4474 # Total number of references to valid blocks. +system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.icache.writebacks 0 # number of writebacks +system.cpu.idleCycles 1196701 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 3576 # Number of branches executed +system.cpu.iew.EXEC:nop 0 # number of nop insts executed +system.cpu.iew.EXEC:rate 0.092548 # Inst execution rate +system.cpu.iew.EXEC:refs 5257 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 2386 # Number of stores executed +system.cpu.iew.EXEC:swp 0 # number of swp insts executed +system.cpu.iew.WB:consumers 9737 # num instructions consuming a value +system.cpu.iew.WB:count 19769 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.790901 # average fanout of values written-back +system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ +system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ +system.cpu.iew.WB:producers 7701 # num instructions producing a value +system.cpu.iew.WB:rate 0.088054 # insts written-back per cycle +system.cpu.iew.WB:sent 20061 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 2593 # Number of branch mispredicts detected at execute +system.cpu.iew.iewBlockCycles 476 # Number of cycles IEW is blocking +system.cpu.iew.iewDispLoadInsts 3250 # Number of dispatched load instructions +system.cpu.iew.iewDispNonSpecInsts 617 # Number of dispatched non-speculative instructions +system.cpu.iew.iewDispSquashedInsts 2705 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 2817 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 25240 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 2871 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 1780 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 20778 # Number of executed instructions +system.cpu.iew.iewIQFullEvents 7 # Number of times the IQ has become full, causing a stall +system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle +system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall +system.cpu.iew.iewSquashCycles 3162 # Number of cycles IEW is squashing +system.cpu.iew.iewUnblockCycles 35 # Number of cycles IEW is unblocking +system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding +system.cpu.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked +system.cpu.iew.lsq.thread.0.forwLoads 39 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.0.ignoredResponses 5 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address +system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address +system.cpu.iew.lsq.thread.0.memOrderViolation 54 # Number of memory ordering violations +system.cpu.iew.lsq.thread.0.rescheduledLoads 0 # Number of loads that were rescheduled +system.cpu.iew.lsq.thread.0.squashedLoads 1788 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 1519 # Number of stores squashed +system.cpu.iew.memOrderViolationEvents 54 # Number of memory order violations +system.cpu.iew.predictedNotTakenIncorrect 962 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedTakenIncorrect 1631 # Number of branches that were predicted taken incorrectly +system.cpu.ipc 0.007723 # IPC: Instructions Per Cycle +system.cpu.ipc_total 0.007723 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 22558 # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0.start_dist + (null) 1831 8.12% # Type of FU issued + IntAlu 15054 66.73% # Type of FU issued + IntMult 0 0.00% # Type of FU issued + IntDiv 0 0.00% # Type of FU issued + FloatAdd 0 0.00% # Type of FU issued + FloatCmp 0 0.00% # Type of FU issued + FloatCvt 0 0.00% # Type of FU issued + FloatMult 0 0.00% # Type of FU issued + FloatDiv 0 0.00% # Type of FU issued + FloatSqrt 0 0.00% # Type of FU issued + MemRead 3091 13.70% # Type of FU issued + MemWrite 2582 11.45% # Type of FU issued + IprAccess 0 0.00% # Type of FU issued + InstPrefetch 0 0.00% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0.end_dist +system.cpu.iq.ISSUE:fu_busy_cnt 162 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_rate 0.007181 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_full.start_dist + (null) 0 0.00% # attempts to use FU when none available + IntAlu 42 25.93% # attempts to use FU when none available + IntMult 0 0.00% # attempts to use FU when none available + IntDiv 0 0.00% # attempts to use FU when none available + FloatAdd 0 0.00% # attempts to use FU when none available + FloatCmp 0 0.00% # attempts to use FU when none available + FloatCvt 0 0.00% # attempts to use FU when none available + FloatMult 0 0.00% # attempts to use FU when none available + FloatDiv 0 0.00% # attempts to use FU when none available + FloatSqrt 0 0.00% # attempts to use FU when none available + MemRead 14 8.64% # attempts to use FU when none available + MemWrite 106 65.43% # attempts to use FU when none available + IprAccess 0 0.00% # attempts to use FU when none available + InstPrefetch 0 0.00% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full.end_dist +system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle.samples 224511 +system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 + 0 215315 9590.40% + 1 4124 183.69% + 2 1297 57.77% + 3 1306 58.17% + 4 1190 53.00% + 5 707 31.49% + 6 433 19.29% + 7 83 3.70% + 8 56 2.49% +system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 +system.cpu.iq.ISSUE:issued_per_cycle.end_dist + +system.cpu.iq.ISSUE:rate 0.100476 # Inst issue rate +system.cpu.iq.iqInstsAdded 24623 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 22558 # Number of instructions issued +system.cpu.iq.iqNonSpecInstsAdded 617 # Number of non-speculative instructions added to the IQ +system.cpu.iq.iqSquashedInstsExamined 11469 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsIssued 174 # Number of squashed instructions issued +system.cpu.iq.iqSquashedNonSpecRemoved 290 # Number of squashed non-spec instructions that were removed +system.cpu.iq.iqSquashedOperandsExamined 5834 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.l2cache.ReadReq_accesses 513 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 4754.779727 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2343.506823 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_miss_latency 2439202 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 513 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 1202219 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 513 # number of ReadReq MSHR misses +system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked +system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks. +system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.cache_copies 0 # number of cache copies performed +system.cpu.l2cache.demand_accesses 513 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 4754.779727 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 2343.506823 # average overall mshr miss latency +system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 2439202 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 513 # number of demand (read+write) misses +system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.l2cache.demand_mshr_miss_latency 1202219 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 513 # number of demand (read+write) MSHR misses +system.cpu.l2cache.fast_writes 0 # number of fast writes performed +system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.l2cache.overall_accesses 513 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 4754.779727 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 2343.506823 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency +system.cpu.l2cache.overall_hits 0 # number of overall hits +system.cpu.l2cache.overall_miss_latency 2439202 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 513 # number of overall misses +system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.l2cache.overall_mshr_miss_latency 1202219 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 513 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.l2cache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.l2cache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.l2cache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.l2cache.replacements 0 # number of replacements +system.cpu.l2cache.sampled_refs 512 # Sample count of references to valid blocks. +system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.l2cache.tagsinuse 262.946375 # Cycle average of tags in use +system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. +system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.l2cache.writebacks 0 # number of writebacks +system.cpu.numCycles 224511 # number of cpu cycles simulated +system.cpu.rename.RENAME:BlockCycles 960 # Number of cycles rename is blocking +system.cpu.rename.RENAME:CommittedMaps 9868 # Number of HB maps that are committed +system.cpu.rename.RENAME:IQFullEvents 2 # Number of times rename has blocked due to IQ full +system.cpu.rename.RENAME:IdleCycles 20098 # Number of cycles rename is idle +system.cpu.rename.RENAME:LSQFullEvents 481 # Number of times rename has blocked due to LSQ full +system.cpu.rename.RENAME:ROBFullEvents 4 # Number of times rename has blocked due to ROB full +system.cpu.rename.RENAME:RenameLookups 46931 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 31260 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 25831 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 7921 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 3162 # Number of cycles rename is squashing +system.cpu.rename.RENAME:SquashedInsts 8042 # Number of squashed instructions processed by rename +system.cpu.rename.RENAME:UnblockCycles 1212 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 15963 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 190573 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:serializingInsts 638 # count of serializing insts renamed +system.cpu.rename.RENAME:skidInsts 5594 # count of insts added to the skid buffer +system.cpu.rename.RENAME:tempSerializingInsts 629 # count of temporary serializing insts renamed +system.cpu.timesIdled 289 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.workload.PROG:num_syscalls 8 # Number of system calls + +---------- End Simulation Statistics ---------- diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr new file mode 100644 index 000000000..48affb0e2 --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr @@ -0,0 +1,4 @@ +warn: More than two loadable segments in ELF object. +warn: Ignoring segment @ 0x0 length 0x0. +0: system.remote_gdb.listener: listening for remote gdb on port 7003 +warn: Entering event queue @ 0. Starting simulation... diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout new file mode 100644 index 000000000..6cba2ba7e --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout @@ -0,0 +1,24 @@ +Begining test of difficult SPARC instructions... +LDSTUB: Passed +SWAP: Passed +CAS FAIL: Passed +CAS WORK: Passed +CASX FAIL: Passed +CASX WORK: Passed +LDTX: Passed +LDTW: Passed +STTW: Passed +Done +M5 Simulator System + +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved + + +M5 compiled Apr 9 2007 03:06:26 +M5 started Mon Apr 9 03:06:54 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/o3-timing tests/run.py quick/02.insttest/sparc/linux/o3-timing +Global frequency set at 1000000000000 ticks per second +Exiting @ tick 1421211 because target called exit() diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.ini b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.ini new file mode 100644 index 000000000..85d14933a --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.ini @@ -0,0 +1,187 @@ +[root] +type=Root +children=system +dummy=0 + +[system] +type=System +children=cpu membus physmem +mem_mode=atomic +physmem=system.physmem + +[system.cpu] +type=TimingSimpleCPU +children=dcache icache l2cache toL2Bus workload +clock=1 +cpu_id=0 +defer_registration=false +function_trace=false +function_trace_start=0 +max_insts_all_threads=0 +max_insts_any_thread=0 +max_loads_all_threads=0 +max_loads_any_thread=0 +phase=0 +progress_interval=0 +system=system +workload=system.cpu.workload +dcache_port=system.cpu.dcache.cpu_side +icache_port=system.cpu.icache.cpu_side + +[system.cpu.dcache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=262144 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.dcache_port +mem_side=system.cpu.toL2Bus.port[1] + +[system.cpu.icache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=131072 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.icache_port +mem_side=system.cpu.toL2Bus.port[0] + +[system.cpu.l2cache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=2097152 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.toL2Bus.port[2] +mem_side=system.membus.port[1] + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +responder_set=false +width=64 +port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side + +[system.cpu.workload] +type=LiveProcess +cmd=insttest +cwd= +egid=100 +env= +euid=100 +executable=tests/test-progs/insttest/bin/sparc/linux/insttest +gid=100 +input=cin +output=cout +pid=100 +ppid=99 +system=system +uid=100 + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +responder_set=false +width=64 +port=system.physmem.port system.cpu.l2cache.mem_side + +[system.physmem] +type=PhysicalMemory +file= +latency=1 +range=0:134217727 +zero=false +port=system.membus.port[0] + diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out new file mode 100644 index 000000000..ec2d1886a --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out @@ -0,0 +1,178 @@ +[root] +type=Root +dummy=0 + +[system.physmem] +type=PhysicalMemory +file= +range=[0,134217727] +latency=1 +zero=false + +[system] +type=System +physmem=system.physmem +mem_mode=atomic + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +width=64 +responder_set=false + +[system.cpu.workload] +type=LiveProcess +cmd=insttest +executable=tests/test-progs/insttest/bin/sparc/linux/insttest +input=cin +output=cout +env= +cwd= +system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 + +[system.cpu] +type=TimingSimpleCPU +max_insts_any_thread=0 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +progress_interval=0 +system=system +cpu_id=0 +workload=system.cpu.workload +clock=1 +phase=0 +defer_registration=false +// width not specified +function_trace=false +function_trace_start=0 +// simulate_stalls not specified + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +width=64 +responder_set=false + +[system.cpu.icache] +type=BaseCache +size=131072 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.dcache] +type=BaseCache +size=262144 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.l2cache] +type=BaseCache +size=2097152 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/m5stats.txt b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/m5stats.txt new file mode 100644 index 000000000..a4396b3da --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/m5stats.txt @@ -0,0 +1,215 @@ + +---------- Begin Simulation Statistics ---------- +host_inst_rate 39129 # Simulator instruction rate (inst/s) +host_mem_usage 153232 # Number of bytes of host memory used +host_seconds 0.28 # Real time elapsed on the host +host_tick_rate 6030675 # Simulator tick rate (ticks/s) +sim_freq 1000000000000 # Frequency of simulated ticks +sim_insts 11001 # Number of instructions simulated +sim_seconds 0.000002 # Number of seconds simulated +sim_ticks 1698003 # Number of ticks simulated +system.cpu.dcache.ReadReq_accesses 1462 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 3977.759259 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2977.759259 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 1408 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 214799 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.036936 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 54 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 160799 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.036936 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 54 # number of ReadReq MSHR misses +system.cpu.dcache.SwapReq_accesses 6 # number of SwapReq accesses(hits+misses) +system.cpu.dcache.SwapReq_hits 6 # number of SwapReq hits +system.cpu.dcache.WriteReq_accesses 1292 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 3963.647727 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 2963.647727 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 1204 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 348801 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.068111 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 88 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_miss_latency 260801 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.068111 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 88 # number of WriteReq MSHR misses +system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked +system.cpu.dcache.avg_refs 18.436620 # Average number of references to valid blocks. +system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.cache_copies 0 # number of cache copies performed +system.cpu.dcache.demand_accesses 2754 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 3969.014085 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 2969.014085 # average overall mshr miss latency +system.cpu.dcache.demand_hits 2612 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 563600 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.051561 # miss rate for demand accesses +system.cpu.dcache.demand_misses 142 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 421600 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.051561 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 142 # number of demand (read+write) MSHR misses +system.cpu.dcache.fast_writes 0 # number of fast writes performed +system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.dcache.overall_accesses 2754 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 3969.014085 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 2969.014085 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency +system.cpu.dcache.overall_hits 2612 # number of overall hits +system.cpu.dcache.overall_miss_latency 563600 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.051561 # miss rate for overall accesses +system.cpu.dcache.overall_misses 142 # number of overall misses +system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 421600 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.051561 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 142 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.dcache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.dcache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.dcache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.dcache.replacements 0 # number of replacements +system.cpu.dcache.sampled_refs 142 # Sample count of references to valid blocks. +system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.dcache.tagsinuse 86.872921 # Cycle average of tags in use +system.cpu.dcache.total_refs 2618 # Total number of references to valid blocks. +system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.dcache.writebacks 0 # number of writebacks +system.cpu.icache.ReadReq_accesses 11002 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 3961.367491 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 2961.367491 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 10719 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 1121067 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.025723 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 283 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_miss_latency 838067 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.025723 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 283 # number of ReadReq MSHR misses +system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked +system.cpu.icache.avg_refs 37.876325 # Average number of references to valid blocks. +system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.icache.cache_copies 0 # number of cache copies performed +system.cpu.icache.demand_accesses 11002 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 3961.367491 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 2961.367491 # average overall mshr miss latency +system.cpu.icache.demand_hits 10719 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 1121067 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.025723 # miss rate for demand accesses +system.cpu.icache.demand_misses 283 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 838067 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.025723 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 283 # number of demand (read+write) MSHR misses +system.cpu.icache.fast_writes 0 # number of fast writes performed +system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.icache.overall_accesses 11002 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 3961.367491 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 2961.367491 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency +system.cpu.icache.overall_hits 10719 # number of overall hits +system.cpu.icache.overall_miss_latency 1121067 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.025723 # miss rate for overall accesses +system.cpu.icache.overall_misses 283 # number of overall misses +system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 838067 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.025723 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 283 # number of overall MSHR misses +system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.icache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.icache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.icache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.icache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.icache.replacements 0 # number of replacements +system.cpu.icache.sampled_refs 283 # Sample count of references to valid blocks. +system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.icache.tagsinuse 125.297191 # Cycle average of tags in use +system.cpu.icache.total_refs 10719 # Total number of references to valid blocks. +system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.icache.writebacks 0 # number of writebacks +system.cpu.idle_fraction 0 # Percentage of idle cycles +system.cpu.l2cache.ReadReq_accesses 423 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 2968.515366 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 1967.515366 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_miss_latency 1255682 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 423 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 832259 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 423 # number of ReadReq MSHR misses +system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked +system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks. +system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.cache_copies 0 # number of cache copies performed +system.cpu.l2cache.demand_accesses 423 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 2968.515366 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 1967.515366 # average overall mshr miss latency +system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 1255682 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 423 # number of demand (read+write) misses +system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.l2cache.demand_mshr_miss_latency 832259 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 423 # number of demand (read+write) MSHR misses +system.cpu.l2cache.fast_writes 0 # number of fast writes performed +system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.l2cache.overall_accesses 423 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 2968.515366 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 1967.515366 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency +system.cpu.l2cache.overall_hits 0 # number of overall hits +system.cpu.l2cache.overall_miss_latency 1255682 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 423 # number of overall misses +system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.l2cache.overall_mshr_miss_latency 832259 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 423 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.l2cache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.l2cache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.l2cache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.l2cache.replacements 0 # number of replacements +system.cpu.l2cache.sampled_refs 423 # Sample count of references to valid blocks. +system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.l2cache.tagsinuse 211.742547 # Cycle average of tags in use +system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. +system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.l2cache.writebacks 0 # number of writebacks +system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles +system.cpu.numCycles 1698003 # number of cpu cycles simulated +system.cpu.num_insts 11001 # Number of instructions executed +system.cpu.num_refs 2760 # Number of memory references +system.cpu.workload.PROG:num_syscalls 8 # Number of system calls + +---------- End Simulation Statistics ---------- diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stderr b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stderr new file mode 100644 index 000000000..fce46c90e --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stderr @@ -0,0 +1,4 @@ +warn: More than two loadable segments in ELF object. +warn: Ignoring segment @ 0x0 length 0x0. +0: system.remote_gdb.listener: listening for remote gdb on port 7000 +warn: Entering event queue @ 0. Starting simulation... diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stdout b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stdout new file mode 100644 index 000000000..100a1ebce --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stdout @@ -0,0 +1,24 @@ +Begining test of difficult SPARC instructions... +LDSTUB: Passed +SWAP: Passed +CAS FAIL: Passed +CAS WORK: Passed +CASX FAIL: Passed +CASX WORK: Passed +LDTX: Passed +LDTW: Passed +STTW: Passed +Done +M5 Simulator System + +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved + + +M5 compiled Apr 8 2007 05:25:15 +M5 started Sun Apr 8 22:54:12 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/simple-timing tests/run.py quick/02.insttest/sparc/linux/simple-timing +Global frequency set at 1000000000000 ticks per second +Exiting @ tick 1698003 because target called exit() |