summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--build_opts/SPARC_SE2
-rw-r--r--src/arch/sparc/isa/formats/mem/swap.isa10
-rw-r--r--src/arch/x86/isa/base.isa7
-rw-r--r--src/arch/x86/isa/formats/formats.isa3
-rw-r--r--src/arch/x86/isa/macroop.isa (renamed from src/arch/x86/isa/formats/macroop.isa)65
-rw-r--r--src/arch/x86/isa/main.isa49
-rw-r--r--src/arch/x86/isa/microasm.isa179
-rw-r--r--src/arch/x86/isa/microops/base.isa11
-rw-r--r--src/arch/x86/isa/specialize.isa172
-rw-r--r--src/arch/x86/predecoder.cc64
-rw-r--r--src/arch/x86/predecoder.hh2
-rw-r--r--src/arch/x86/predecoder_tables.cc18
-rw-r--r--src/arch/x86/types.hh46
-rw-r--r--src/arch/x86/utility.hh3
-rw-r--r--src/cpu/base_dyn_inst.hh5
-rw-r--r--src/cpu/o3/lsq_unit.hh33
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh22
-rw-r--r--src/cpu/o3/regfile.hh14
-rw-r--r--src/cpu/o3/rename_impl.hh10
-rw-r--r--tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini379
-rw-r--r--tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out367
-rw-r--r--tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt410
-rw-r--r--tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr4
-rw-r--r--tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout24
-rw-r--r--tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.ini187
-rw-r--r--tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out178
-rw-r--r--tests/quick/02.insttest/ref/sparc/linux/simple-timing/m5stats.txt215
-rw-r--r--tests/quick/02.insttest/ref/sparc/linux/simple-timing/stderr4
-rw-r--r--tests/quick/02.insttest/ref/sparc/linux/simple-timing/stdout24
29 files changed, 2197 insertions, 310 deletions
diff --git a/build_opts/SPARC_SE b/build_opts/SPARC_SE
index 62b6841ad..b288d3908 100644
--- a/build_opts/SPARC_SE
+++ b/build_opts/SPARC_SE
@@ -1,3 +1,3 @@
TARGET_ISA = 'sparc'
-CPU_MODELS = 'AtomicSimpleCPU,TimingSimpleCPU'
+CPU_MODELS = 'AtomicSimpleCPU,TimingSimpleCPU,O3CPU'
FULL_SYSTEM = 0
diff --git a/src/arch/sparc/isa/formats/mem/swap.isa b/src/arch/sparc/isa/formats/mem/swap.isa
index 818597a84..b71542a2b 100644
--- a/src/arch/sparc/isa/formats/mem/swap.isa
+++ b/src/arch/sparc/isa/formats/mem/swap.isa
@@ -137,7 +137,7 @@ def format Swap(code, postacc_code, mem_flags, *opt_flags) {{
decoder_output,
exec_output,
decode_block) = doMemFormat(code, SwapFuncs, '', name, Name, flags,
- opt_flags, postacc_code)
+ ["IsStoreConditional"], postacc_code)
}};
def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{
@@ -148,7 +148,7 @@ def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{
decoder_output,
exec_output,
decode_block) = doMemFormat(code, SwapFuncs, AlternateASIPrivFaultCheck,
- name, Name, flags, opt_flags, postacc_code)
+ name, Name, flags, ["IsStoreConditional"], postacc_code)
}};
@@ -163,8 +163,8 @@ let {{
decode_block = BasicDecode.subst(iop)
microParams = {"code": code, "postacc_code" : postacc_code,
"ea_code" : addrCalcReg, "fault_check" : faultCode}
- exec_output = doSplitExecute(execute, name, Name, asi, opt_flags,
- microParams);
+ exec_output = doSplitExecute(execute, name, Name, asi,
+ ["IsStoreConditional"], microParams);
return (header_output, decoder_output, exec_output, decode_block)
}};
@@ -177,7 +177,7 @@ def format CasAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{
decoder_output,
exec_output,
decode_block) = doCasFormat(code, SwapFuncs, AlternateASIPrivFaultCheck,
- name, Name, flags, opt_flags, postacc_code)
+ name, Name, flags, ["IsStoreConditional"], postacc_code)
}};
diff --git a/src/arch/x86/isa/base.isa b/src/arch/x86/isa/base.isa
index 4776f7a7e..cd166b306 100644
--- a/src/arch/x86/isa/base.isa
+++ b/src/arch/x86/isa/base.isa
@@ -79,6 +79,13 @@ output header {{
void printReg(std::ostream &os, int reg) const;
void printSrcReg(std::ostream &os, int reg) const;
void printDestReg(std::ostream &os, int reg) const;
+
+ inline uint64_t merge(uint64_t into, uint64_t val, int size) const
+ {
+ //FIXME This needs to be significantly more sophisticated
+ return val;
+ }
+
};
}};
diff --git a/src/arch/x86/isa/formats/formats.isa b/src/arch/x86/isa/formats/formats.isa
index f4e5c402f..d763c05bc 100644
--- a/src/arch/x86/isa/formats/formats.isa
+++ b/src/arch/x86/isa/formats/formats.isa
@@ -95,9 +95,6 @@
//malfunction of the decode mechanism.
##include "error.isa"
-//Include code to build up macro op instructions
-##include "macroop.isa"
-
//Include a format which implements a batch of instructions which do the same
//thing on a variety of inputs
##include "multi.isa"
diff --git a/src/arch/x86/isa/formats/macroop.isa b/src/arch/x86/isa/macroop.isa
index 717103df1..7d41a2dea 100644
--- a/src/arch/x86/isa/formats/macroop.isa
+++ b/src/arch/x86/isa/macroop.isa
@@ -55,16 +55,20 @@
//
// Authors: Gabe Black
-////////////////////////////////////////////////////////////////////
-//
-// Instructions that do the same thing to multiple sets of arguments.
-//
+// Execute method for macroops.
+def template MacroExecPanic {{
+ Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const
+ {
+ panic("Tried to execute macroop directly!");
+ M5_DUMMY_RETURN
+ }
+}};
output header {{
// Base class for most macroops, except ones that need to commit as
// they go.
- class X86MacroInst : public X86StaticInst
+ class X86MacroInst : public StaticInst
{
protected:
const uint32_t numMicroOps;
@@ -72,7 +76,7 @@ output header {{
//Constructor.
X86MacroInst(const char *mnem, ExtMachInst _machInst,
uint32_t _numMicroOps)
- : X86StaticInst(mnem, _machInst, No_OpClass),
+ : StaticInst(mnem, _machInst, No_OpClass),
numMicroOps(_numMicroOps)
{
assert(numMicroOps);
@@ -85,9 +89,6 @@ output header {{
delete [] microOps;
}
- std::string generateDisassembly(Addr pc,
- const SymbolTable *symtab) const;
-
StaticInstPtr * microOps;
StaticInstPtr fetchMicroOp(MicroPC microPC)
@@ -96,21 +97,7 @@ output header {{
return microOps[microPC];
}
- %(BasicExecPanic)s
- };
-
- // Base class for macroops which commit as they go. This is for
- // instructions which can be partially completed like those with the
- // rep prefix. This prevents those instructions from overflowing
- // buffers with uncommitted microops.
- class X86RollingMacroInst : public X86MacroInst
- {
- protected:
- //Constructor.
- X86RollingMacroInst(const char *mnem, ExtMachInst _machInst,
- uint32_t _numMicroOps)
- : X86MacroInst(mnem, _machInst, numMicroOps)
- {}
+ %(MacroExecPanic)s
};
}};
@@ -121,34 +108,24 @@ def template MacroConstructor {{
{
%(constructor)s;
//alloc_micro_ops is the code that sets up the microOps
- //array in the parent class. This hook will hopefully
- //allow all that to be automated.
+ //array in the parent class.
%(alloc_micro_ops)s;
- setMicroFlags();
}
}};
let {{
- def genMacroOp(name, Name, ops, rolling = False):
+ def genMacroOp(name, Name, opSeq):
baseClass = 'X86MacroInst'
- if rolling:
- baseClass = 'X86RollingMacroInst'
- numMicroOps = len(ops)
+ numMicroOps = len(opSeq.ops)
allocMicroOps = ''
micropc = 0
- allocMicroOps += \
- "microOps[0] = %s;\n" % \
- op.getAllocator(True, not rolling, True, False)
- micropc += 1
- if numMicroOps > 2:
- for op in ops[1:-1]:
- allocMicroOps += \
- "microOps[%d] = %s;\n" % \
- (micropc, op.getAllocator(True, not rolling, False, False))
- micropc += 1
- allocMicroOps += \
- "microOps[%d] = %s;\n" % \
- op.getAllocator(True, not rolling, False, True)
+ for op in opSeq.ops:
+ allocMicroOps += \
+ "microOps[%d] = %s;\n" % \
+ (micropc, op.getAllocator(True, op.delayed,
+ micropc == 0,
+ micropc == numMicroOps - 1))
+ micropc += 1
iop = InstObjParams(name, Name, baseClass,
{'code' : '', 'num_micro_ops' : numMicroOps,
'alloc_micro_ops' : allocMicroOps})
diff --git a/src/arch/x86/isa/main.isa b/src/arch/x86/isa/main.isa
index cc3a9bee4..063d7125d 100644
--- a/src/arch/x86/isa/main.isa
+++ b/src/arch/x86/isa/main.isa
@@ -72,26 +72,55 @@
namespace X86ISA;
-//Include the simple microcode assembler
-##include "microasm.isa"
+////////////////////////////////////////////////////////////////////
+//
+// General infrastructure code. These files provide infrastructure
+// which was developed to support x86 but isn't specific to it.
+//
-//Include the bitfield definitions
-##include "bitfields.isa"
+//Include code to build macroops.
+##include "macroop.isa"
-//Include the operand_types and operand definitions
-##include "operands.isa"
+//Include the simple microcode assembler. This will hopefully stay
+//unspecialized for x86 and can later be made available to other ISAs.
+##include "microasm.isa"
+
+////////////////////////////////////////////////////////////////////
+//
+// X86 only infrastructure code.
+//
-//Include the base class for x86 instructions, and some support code
+//Include the base class for x86 instructions, and some support code.
##include "base.isa"
-//Include the instruction definitions
-##include "insts/insts.isa"
+//Include code to specialize an instruction template to operate on
+//a particular set of operands. This is specific to x86 and the x86
+//microcode ISA.
+##include "specialize.isa"
+
+////////////////////////////////////////////////////////////////////
+//
+// Code which directly specifies isa components like instructions
+// microops, and the decoder.
+//
//Include the definitions for the instruction formats
##include "formats/formats.isa"
-//Include the definitions of the micro ops
+//Include the operand_types and operand definitions. These are needed by
+//the microop definitions.
+##include "operands.isa"
+
+//Include the definitions of the micro ops.
+//These are StaticInst classes which stand on their own and make up an
+//internal instruction set.
##include "microops/microops.isa"
+//Include the instruction definitions which are microop assembler programs.
+##include "insts/insts.isa"
+
+//Include the bitfield definitions
+##include "bitfields.isa"
+
//Include the decoder definition
##include "decoder/decoder.isa"
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index b94b55aab..23567aae9 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -57,152 +57,17 @@
////////////////////////////////////////////////////////////////////
//
-// Code to "specialize" a microcode sequence to use a particular
-// variety of operands
+// The microcode assembler
//
let {{
- # This builds either a regular or macro op to implement the sequence of
- # ops we give it.
- def genInst(name, Name, ops):
- # If we can implement this instruction with exactly one microop, just
- # use that directly.
- newStmnt = ''
- if len(ops) == 1:
- decode_block = "return (X86StaticInst *)(%s);" % \
- ops[0].getAllocator()
- return ('', '', decode_block, '')
- else:
- # Build a macroop to contain the sequence of microops we've
- # been given.
- return genMacroOp(name, Name, ops)
-}};
-
-let {{
- # This code builds up a decode block which decodes based on switchval.
- # vals is a dict which matches case values with what should be decoded to.
- # builder is called on the exploded contents of "vals" values to generate
- # whatever code should be used.
- def doSplitDecode(name, Name, builder, switchVal, vals, default = None):
- header_output = ''
- decoder_output = ''
- decode_block = 'switch(%s) {\n' % switchVal
- exec_output = ''
- for (val, todo) in vals.items():
- (new_header_output,
- new_decoder_output,
- new_decode_block,
- new_exec_output) = builder(name, Name, *todo)
- header_output += new_header_output
- decoder_output += new_decoder_output
- decode_block += '\tcase %s: %s\n' % (val, new_decode_block)
- exec_output += new_exec_output
- if default:
- (new_header_output,
- new_decoder_output,
- new_decode_block,
- new_exec_output) = builder(name, Name, *default)
- header_output += new_header_output
- decoder_output += new_decoder_output
- decode_block += '\tdefault: %s\n' % new_decode_block
- exec_output += new_exec_output
- decode_block += '}\n'
- return (header_output, decoder_output, decode_block, exec_output)
-}};
-
-let {{
- class OpType(object):
- parser = re.compile(r"(?P<tag>[A-Z][A-Z]*)(?P<size>[a-z][a-z]*)|(r(?P<reg>[A-Za-z0-9][A-Za-z0-9]*))")
- def __init__(self, opTypeString):
- match = OpType.parser.search(opTypeString)
- if match == None:
- raise Exception, "Problem parsing operand type %s" % opTypeString
- self.reg = match.group("reg")
- self.tag = match.group("tag")
- self.size = match.group("size")
+ # These are used when setting up microops so that they can specialize their
+ # base class template properly.
+ RegOpType = "RegisterOperand"
+ ImmOpType = "ImmediateOperand"
}};
let {{
-
- # This function specializes the given piece of code to use a particular
- # set of argument types described by "opTypes". These are "implemented"
- # in reverse order.
- def specializeInst(name, Name, code, opTypes):
- opNum = len(opTypes) - 1
- while len(opTypes):
- # print "Building a composite op with tags", opTypes
- # print "And code", code
- opNum = len(opTypes) - 1
- # A regular expression to find the operand placeholders we're
- # interested in.
- opRe = re.compile("\\^(?P<operandNum>%d)(?=[^0-9]|$)" % opNum)
-
- # Parse the operand type strign we're working with
- opType = OpType(opTypes[opNum])
-
- if opType.reg:
- #Figure out what to do with fixed register operands
- if opType.reg in ("Ax", "Bx", "Cx", "Dx"):
- code = opRe.sub("%%{INTREG_R%s}" % opType.reg.upper(), code)
- elif opType.reg == "Al":
- # We need a way to specify register width
- code = opRe.sub("%{INTREG_RAX}", code)
- else:
- print "Didn't know how to encode fixed register %s!" % opType.reg
- elif opType.tag == None or opType.size == None:
- raise Exception, "Problem parsing operand tag: %s" % opType.tag
- elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"):
- # Use the "reg" field of the ModRM byte to select the register
- code = opRe.sub("%{(uint8_t)MODRM_REG}", code)
- elif opType.tag in ("E", "Q", "W"):
- # This might refer to memory or to a register. We need to
- # divide it up farther.
- regCode = opRe.sub("%{(uint8_t)MODRM_RM}", code)
- regTypes = copy.copy(opTypes)
- regTypes.pop(-1)
- # This needs to refer to memory, but we'll fill in the details
- # later. It needs to take into account unaligned memory
- # addresses.
- memCode = opRe.sub("%0", code)
- memTypes = copy.copy(opTypes)
- memTypes.pop(-1)
- return doSplitDecode(name, Name, specializeInst, "MODRM_MOD",
- {"3" : (regCode, regTypes)}, (memCode, memTypes))
- elif opType.tag in ("I", "J"):
- # Immediates are already in the instruction, so don't leave in
- # those parameters
- code = opRe.sub("${IMMEDIATE}", code)
- elif opType.tag == "M":
- # This needs to refer to memory, but we'll fill in the details
- # later. It needs to take into account unaligned memory
- # addresses.
- code = opRe.sub("%0", code)
- elif opType.tag in ("PR", "R", "VR"):
- # There should probably be a check here to verify that mod
- # is equal to 11b
- code = opRe.sub("%{(uint8_t)MODRM_RM}", code)
- else:
- raise Exception, "Unrecognized tag %s." % opType.tag
- opTypes.pop(-1)
-
- # At this point, we've built up "code" to have all the necessary extra
- # instructions needed to implement whatever types of operands were
- # specified. Now we'll assemble it it into a microOp sequence.
- ops = assembleMicro(code)
-
- # Build a macroop to contain the sequence of microops we've
- # constructed. The decode block will be used to fill in our
- # inner decode structure, and the rest will be concatenated and
- # passed back.
- return genInst(name, Name, ops)
-}};
-
-////////////////////////////////////////////////////////////////////
-//
-// The microcode assembler
-//
-
-let {{
class MicroOpStatement(object):
def __init__(self):
self.className = ''
@@ -242,19 +107,9 @@ let {{
return 'new %s%s(machInst%s%s)' % (self.className, signature, self.microFlagsText(microFlags), args)
}};
-let {{
- def buildLabelDict(ops):
- labels = {}
- micropc = 0
- for op in ops:
- if op.label:
- labels[op.label] = count
- micropc += 1
- return labels
-}};
-
let{{
- def assembleMicro(code):
+ def assembleMicro(name, Name, code):
+
# This function takes in a block of microcode assembly and returns
# a python list of objects which describe it.
@@ -341,7 +196,13 @@ let{{
lineMatch = lineRe.search(code)
# Decode the labels into displacements
- labels = buildLabelDict(statements)
+
+ labels = {}
+ micropc = 0
+ for statement in statements:
+ if statement.label:
+ labels[statement.label] = count
+ micropc += 1
micropc = 0
for statement in statements:
for arg in statement.args:
@@ -353,5 +214,15 @@ let{{
# micropc + 1 + displacement.
arg["operandImm"] = labels[arg["operandLabel"]] - micropc - 1
micropc += 1
- return statements
+
+ # If we can implement this instruction with exactly one microop, just
+ # use that directly.
+ if len(statements) == 1:
+ decode_block = "return %s;" % \
+ statements[0].getAllocator()
+ return ('', '', decode_block, '')
+ else:
+ # Build a macroop to contain the sequence of microops we've
+ # been given.
+ return genMacroOp(name, Name, statements)
}};
diff --git a/src/arch/x86/isa/microops/base.isa b/src/arch/x86/isa/microops/base.isa
index b1351d999..4254994f3 100644
--- a/src/arch/x86/isa/microops/base.isa
+++ b/src/arch/x86/isa/microops/base.isa
@@ -63,12 +63,15 @@ output header {{
};
}};
-//A class which is the base of all x86 micro ops it provides a function to
+//A class which is the base of all x86 micro ops. It provides a function to
//set necessary flags appropriately.
output header {{
class X86MicroOpBase : public X86StaticInst
{
protected:
+ uint8_t opSize;
+ uint8_t addrSize;
+
X86MicroOpBase(bool isMicro, bool isDelayed,
bool isFirst, bool isLast,
const char *mnem, ExtMachInst _machInst,
@@ -94,6 +97,7 @@ def template BaseMicroOpTemplateDeclare {{
let {{
def buildBaseMicroOpTemplate(Name, numParams):
+ assert(numParams > 0)
signature = "<"
signature += "int SignatureOperandTypeSpecifier0"
for count in xrange(1,numParams):
@@ -102,10 +106,9 @@ let {{
signature += ">"
subs = {"signature" : signature, "class_name" : Name}
return BaseMicroOpTemplateDeclare.subst(subs)
+}};
- RegOpType = "RegisterOperand"
- ImmOpType = "ImmediateOperand"
-
+let {{
def buildMicroOpTemplateDict(*params):
signature = "<"
if len(params):
diff --git a/src/arch/x86/isa/specialize.isa b/src/arch/x86/isa/specialize.isa
new file mode 100644
index 000000000..9cac09770
--- /dev/null
+++ b/src/arch/x86/isa/specialize.isa
@@ -0,0 +1,172 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use. Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+// Director of Intellectual Property Licensing
+// Office of Strategy and Technology
+// Hewlett-Packard Company
+// 1501 Page Mill Road
+// Palo Alto, California 94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer. Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution. Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission. No right of
+// sublicense is granted herewith. Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses. Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Code to "specialize" a microcode sequence to use a particular
+// variety of operands
+//
+
+let {{
+ # This code builds up a decode block which decodes based on switchval.
+ # vals is a dict which matches case values with what should be decoded to.
+ # builder is called on the exploded contents of "vals" values to generate
+ # whatever code should be used.
+ def doSplitDecode(name, Name, builder, switchVal, vals, default = None):
+ header_output = ''
+ decoder_output = ''
+ decode_block = 'switch(%s) {\n' % switchVal
+ exec_output = ''
+ for (val, todo) in vals.items():
+ (new_header_output,
+ new_decoder_output,
+ new_decode_block,
+ new_exec_output) = builder(name, Name, *todo)
+ header_output += new_header_output
+ decoder_output += new_decoder_output
+ decode_block += '\tcase %s: %s\n' % (val, new_decode_block)
+ exec_output += new_exec_output
+ if default:
+ (new_header_output,
+ new_decoder_output,
+ new_decode_block,
+ new_exec_output) = builder(name, Name, *default)
+ header_output += new_header_output
+ decoder_output += new_decoder_output
+ decode_block += '\tdefault: %s\n' % new_decode_block
+ exec_output += new_exec_output
+ decode_block += '}\n'
+ return (header_output, decoder_output, decode_block, exec_output)
+}};
+
+let {{
+ class OpType(object):
+ parser = re.compile(r"(?P<tag>[A-Z][A-Z]*)(?P<size>[a-z][a-z]*)|(r(?P<reg>[A-Za-z0-9][A-Za-z0-9]*))")
+ def __init__(self, opTypeString):
+ match = OpType.parser.search(opTypeString)
+ if match == None:
+ raise Exception, "Problem parsing operand type %s" % opTypeString
+ self.reg = match.group("reg")
+ self.tag = match.group("tag")
+ self.size = match.group("size")
+
+ # This function specializes the given piece of code to use a particular
+ # set of argument types described by "opTypes". These are "implemented"
+ # in reverse order.
+ def specializeInst(name, Name, code, opTypes):
+ opNum = len(opTypes) - 1
+ while len(opTypes):
+ # print "Building a composite op with tags", opTypes
+ # print "And code", code
+ opNum = len(opTypes) - 1
+ # A regular expression to find the operand placeholders we're
+ # interested in.
+ opRe = re.compile("\\^(?P<operandNum>%d)(?=[^0-9]|$)" % opNum)
+
+ # Parse the operand type strign we're working with
+ opType = OpType(opTypes[opNum])
+
+ if opType.reg:
+ #Figure out what to do with fixed register operands
+ if opType.reg in ("Ax", "Bx", "Cx", "Dx"):
+ code = opRe.sub("%%{INTREG_R%s}" % opType.reg.upper(), code)
+ elif opType.reg == "Al":
+ # We need a way to specify register width
+ code = opRe.sub("%{INTREG_RAX}", code)
+ else:
+ print "Didn't know how to encode fixed register %s!" % opType.reg
+ elif opType.tag == None or opType.size == None:
+ raise Exception, "Problem parsing operand tag: %s" % opType.tag
+ elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"):
+ # Use the "reg" field of the ModRM byte to select the register
+ code = opRe.sub("%{(uint8_t)MODRM_REG}", code)
+ elif opType.tag in ("E", "Q", "W"):
+ # This might refer to memory or to a register. We need to
+ # divide it up farther.
+ regCode = opRe.sub("%{(uint8_t)MODRM_RM}", code)
+ regTypes = copy.copy(opTypes)
+ regTypes.pop(-1)
+ # This needs to refer to memory, but we'll fill in the details
+ # later. It needs to take into account unaligned memory
+ # addresses.
+ memCode = opRe.sub("%0", code)
+ memTypes = copy.copy(opTypes)
+ memTypes.pop(-1)
+ return doSplitDecode(name, Name, specializeInst, "MODRM_MOD",
+ {"3" : (regCode, regTypes)}, (memCode, memTypes))
+ elif opType.tag in ("I", "J"):
+ # Immediates are already in the instruction, so don't leave in
+ # those parameters
+ code = opRe.sub("${IMMEDIATE}", code)
+ elif opType.tag == "M":
+ # This needs to refer to memory, but we'll fill in the details
+ # later. It needs to take into account unaligned memory
+ # addresses.
+ code = opRe.sub("%0", code)
+ elif opType.tag in ("PR", "R", "VR"):
+ # There should probably be a check here to verify that mod
+ # is equal to 11b
+ code = opRe.sub("%{(uint8_t)MODRM_RM}", code)
+ else:
+ raise Exception, "Unrecognized tag %s." % opType.tag
+ opTypes.pop(-1)
+
+ # At this point, we've built up "code" to have all the necessary extra
+ # instructions needed to implement whatever types of operands were
+ # specified. Now we'll assemble it it into a StaticInst.
+ return assembleMicro(name, Name, code)
+}};
diff --git a/src/arch/x86/predecoder.cc b/src/arch/x86/predecoder.cc
index 80971e7cf..573012ee6 100644
--- a/src/arch/x86/predecoder.cc
+++ b/src/arch/x86/predecoder.cc
@@ -117,37 +117,33 @@ namespace X86ISA
//Operand size override prefixes
case OperandSizeOverride:
DPRINTF(Predecoder, "Found operand size override prefix.\n");
+ emi.legacy.op = true;
break;
case AddressSizeOverride:
DPRINTF(Predecoder, "Found address size override prefix.\n");
+ emi.legacy.addr = true;
break;
//Segment override prefixes
case CSOverride:
- DPRINTF(Predecoder, "Found cs segment override.\n");
- break;
case DSOverride:
- DPRINTF(Predecoder, "Found ds segment override.\n");
- break;
case ESOverride:
- DPRINTF(Predecoder, "Found es segment override.\n");
- break;
case FSOverride:
- DPRINTF(Predecoder, "Found fs segment override.\n");
- break;
case GSOverride:
- DPRINTF(Predecoder, "Found gs segment override.\n");
- break;
case SSOverride:
- DPRINTF(Predecoder, "Found ss segment override.\n");
+ DPRINTF(Predecoder, "Found segment override.\n");
+ emi.legacy.seg = prefix;
break;
case Lock:
DPRINTF(Predecoder, "Found lock prefix.\n");
+ emi.legacy.lock = true;
break;
case Rep:
DPRINTF(Predecoder, "Found rep prefix.\n");
+ emi.legacy.rep = true;
break;
case Repne:
DPRINTF(Predecoder, "Found repne prefix.\n");
+ emi.legacy.repne = true;
break;
case RexPrefix:
DPRINTF(Predecoder, "Found Rex prefix %#x.\n", nextByte);
@@ -198,16 +194,36 @@ namespace X86ISA
displacementCollected = 0;
emi.displacement = 0;
+ //Figure out the effective operand size. This can be overriden to
+ //a fixed value at the decoder level.
+ if(/*FIXME long mode*/1)
+ {
+ if(emi.rex && emi.rex.w)
+ emi.opSize = 3; // 64 bit operand size
+ else if(emi.legacy.op)
+ emi.opSize = 1; // 16 bit operand size
+ else
+ emi.opSize = 2; // 32 bit operand size
+ }
+ else if(/*FIXME default 32*/1)
+ {
+ if(emi.legacy.op)
+ emi.opSize = 1; // 16 bit operand size
+ else
+ emi.opSize = 2; // 32 bit operand size
+ }
+ else // 16 bit default operand size
+ {
+ if(emi.legacy.op)
+ emi.opSize = 2; // 32 bit operand size
+ else
+ emi.opSize = 1; // 16 bit operand size
+ }
+
//Figure out how big of an immediate we'll retreive based
//on the opcode.
- int immType = ImmediateType[
- emi.opcode.num - 1][nextByte];
- if(0) //16 bit mode
- immediateSize = ImmediateTypeToSize[0][immType];
- else if(!(emi.rex & 0x4)) //32 bit mode
- immediateSize = ImmediateTypeToSize[1][immType];
- else //64 bit mode
- immediateSize = ImmediateTypeToSize[2][immType];
+ int immType = ImmediateType[emi.opcode.num - 1][nextByte];
+ immediateSize = SizeTypeToSize[emi.opSize - 1][immType];
//Determine what to expect next
if (UsesModRM[emi.opcode.num - 1][nextByte]) {
@@ -351,6 +367,16 @@ namespace X86ISA
if(immediateSize == immediateCollected)
{
+ //XXX Warning! The following is an observed pattern and might
+ //not always be true!
+
+ //Instructions which use 64 bit operands but 32 bit immediates
+ //need to have the immediate sign extended to 64 bits.
+ //Instructions which use true 64 bit immediates won't be
+ //affected, and instructions that use true 32 bit immediates
+ //won't notice.
+ if(immediateSize == 4)
+ emi.immediate = sext<32>(emi.immediate);
DPRINTF(Predecoder, "Collected immediate %#x.\n",
emi.immediate);
emiIsReady = true;
diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh
index 1df17d6d2..6562ab9f5 100644
--- a/src/arch/x86/predecoder.hh
+++ b/src/arch/x86/predecoder.hh
@@ -73,7 +73,7 @@ namespace X86ISA
static const uint8_t Prefixes[256];
static const uint8_t UsesModRM[2][256];
static const uint8_t ImmediateType[2][256];
- static const uint8_t ImmediateTypeToSize[3][10];
+ static const uint8_t SizeTypeToSize[3][10];
protected:
ThreadContext * tc;
diff --git a/src/arch/x86/predecoder_tables.cc b/src/arch/x86/predecoder_tables.cc
index f233ad234..38b9c57a3 100644
--- a/src/arch/x86/predecoder_tables.cc
+++ b/src/arch/x86/predecoder_tables.cc
@@ -141,7 +141,7 @@ namespace X86ISA
}
};
- enum ImmediateTypes {
+ enum SizeType {
NoImm,
NI = NoImm,
ByteImm,
@@ -158,19 +158,19 @@ namespace X86ISA
VW = VWordImm,
ZWordImm,
ZW = ZWordImm,
- Pointer,
- PO = Pointer,
//The enter instruction takes -2- immediates for a total of 3 bytes
Enter,
- EN = Enter
+ EN = Enter,
+ Pointer,
+ PO = Pointer
};
- const uint8_t Predecoder::ImmediateTypeToSize[3][10] =
+ const uint8_t Predecoder::SizeTypeToSize[3][10] =
{
-// noimm byte word dword qword oword vword zword enter
- {0, 1, 2, 4, 8, 16, 2, 2, 3, 4}, //16 bit
- {0, 1, 2, 4, 8, 16, 4, 4, 3, 6}, //32 bit
- {0, 1, 2, 4, 8, 16, 4, 8, 3, 0} //64 bit
+// noimm byte word dword qword oword vword zword enter pointer
+ {0, 1, 2, 4, 8, 16, 2, 2, 3, 4 }, //16 bit
+ {0, 1, 2, 4, 8, 16, 4, 4, 3, 6 }, //32 bit
+ {0, 1, 2, 4, 8, 16, 4, 8, 3, 0 } //64 bit
};
//This table determines the immediate type. The first index is the
diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh
index cdac3c00e..022f20ee5 100644
--- a/src/arch/x86/types.hh
+++ b/src/arch/x86/types.hh
@@ -70,25 +70,31 @@ namespace X86ISA
typedef uint64_t MachInst;
enum Prefixes {
- NoOverride = 0,
- CSOverride = 1,
- DSOverride = 2,
- ESOverride = 3,
- FSOverride = 4,
- GSOverride = 5,
- SSOverride = 6,
- //The Rex prefix obviously doesn't fit in with the above, but putting
- //it here lets us save double the space the enums take up.
- RexPrefix = 7,
+ NoOverride,
+ CSOverride,
+ DSOverride,
+ ESOverride,
+ FSOverride,
+ GSOverride,
+ SSOverride,
+ RexPrefix,
+ OperandSizeOverride,
+ AddressSizeOverride,
+ Lock,
+ Rep,
+ Repne
+ };
+
+ BitUnion8(LegacyPrefixVector)
+ Bitfield<7> repne;
+ Bitfield<6> rep;
+ Bitfield<5> lock;
+ Bitfield<4> addr;
+ Bitfield<3> op;
//There can be only one segment override, so they share the
//first 3 bits in the legacyPrefixes bitfield.
- SegmentOverride = 0x7,
- OperandSizeOverride = 8,
- AddressSizeOverride = 16,
- Lock = 32,
- Rep = 64,
- Repne = 128
- };
+ Bitfield<2,0> seg;
+ EndBitUnion(LegacyPrefixVector)
BitUnion8(ModRM)
Bitfield<7,6> mod;
@@ -118,7 +124,7 @@ namespace X86ISA
struct ExtMachInst
{
//Prefixes
- uint8_t legacy;
+ LegacyPrefixVector legacy;
Rex rex;
//This holds all of the bytes of the opcode
struct
@@ -140,6 +146,10 @@ namespace X86ISA
//Immediate fields
uint64_t immediate;
uint64_t displacement;
+
+ //The effective operand size.
+ uint8_t opSize;
+ //The
};
inline static std::ostream &
diff --git a/src/arch/x86/utility.hh b/src/arch/x86/utility.hh
index e0bd09515..1c98e7fbc 100644
--- a/src/arch/x86/utility.hh
+++ b/src/arch/x86/utility.hh
@@ -78,7 +78,8 @@ namespace __hash_namespace {
((uint64_t)emi.opcode.prefixA << 16) |
((uint64_t)emi.opcode.prefixB << 8) |
((uint64_t)emi.opcode.op)) ^
- emi.immediate ^ emi.displacement;
+ emi.immediate ^ emi.displacement ^
+ emi.opSize;
};
};
}
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 6c6d90076..eed05c2f1 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -877,6 +877,11 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
effAddrValid = true;
physEffAddr = req->getPaddr();
memReqFlags = req->getFlags();
+
+ if (req->isCondSwap()) {
+ assert(res);
+ req->setExtraData(*res);
+ }
#if 0
if (cpu->system->memctrl->badaddr(physEffAddr)) {
fault = TheISA::genMachineCheckFault();
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index e1b27048d..f24de20d9 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -289,15 +289,19 @@ class LSQUnit {
struct SQEntry {
/** Constructs an empty store queue entry. */
SQEntry()
- : inst(NULL), req(NULL), size(0), data(0),
+ : inst(NULL), req(NULL), size(0),
canWB(0), committed(0), completed(0)
- { }
+ {
+ bzero(data, sizeof(data));
+ }
/** Constructs a store queue entry for a given instruction. */
SQEntry(DynInstPtr &_inst)
- : inst(_inst), req(NULL), size(0), data(0),
+ : inst(_inst), req(NULL), size(0),
canWB(0), committed(0), completed(0)
- { }
+ {
+ bzero(data, sizeof(data));
+ }
/** The store instruction. */
DynInstPtr inst;
@@ -306,7 +310,7 @@ class LSQUnit {
/** The size of the store. */
int size;
/** The store data. */
- IntReg data;
+ char data[sizeof(IntReg)];
/** Whether or not the store can writeback. */
bool canWB;
/** Whether or not the store is committed. */
@@ -554,22 +558,14 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
if ((store_has_lower_limit && store_has_upper_limit)) {
// Get shift amount for offset into the store's data.
int shift_amt = req->getVaddr() & (store_size - 1);
- // @todo: Magic number, assumes byte addressing
- shift_amt = shift_amt << 3;
-
- // Cast this to type T?
- data = storeQueue[store_idx].data >> shift_amt;
- // When the data comes from the store queue entry, it's in host
- // order. When it gets sent to the load, it needs to be in guest
- // order so when the load converts it again, it ends up back
- // in host order like the inst expects.
- data = TheISA::htog(data);
+ memcpy(&data, storeQueue[store_idx].data + shift_amt, sizeof(T));
assert(!load_inst->memData);
load_inst->memData = new uint8_t[64];
- memcpy(load_inst->memData, &data, req->getSize());
+ memcpy(load_inst->memData,
+ storeQueue[store_idx].data + shift_amt, req->getSize());
DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
"addr %#x, data %#x\n",
@@ -716,7 +712,10 @@ LSQUnit<Impl>::write(Request *req, T &data, int store_idx)
storeQueue[store_idx].req = req;
storeQueue[store_idx].size = sizeof(T);
- storeQueue[store_idx].data = data;
+ assert(sizeof(T) <= sizeof(storeQueue[store_idx].data));
+
+ T gData = htog(data);
+ memcpy(storeQueue[store_idx].data, &gData, sizeof(T));
// This function only writes the data to the store queue, so no fault
// can happen here.
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 2aa0d6b6a..44e2cea76 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -645,22 +645,10 @@ LSQUnit<Impl>::writebackStores()
assert(!inst->memData);
inst->memData = new uint8_t[64];
- TheISA::IntReg convertedData =
- TheISA::htog(storeQueue[storeWBIdx].data);
-
- //FIXME This is a hack to get SPARC working. It, along with endianness
- //in the memory system in general, need to be straightened out more
- //formally. The problem is that the data's endianness is swapped when
- //it's in the 64 bit data field in the store queue. The data that you
- //want won't start at the beginning of the field anymore unless it was
- //a 64 bit access.
- memcpy(inst->memData,
- (uint8_t *)&convertedData +
- (TheISA::ByteOrderDiffers ?
- (sizeof(TheISA::IntReg) - req->getSize()) : 0),
- req->getSize());
-
- PacketPtr data_pkt = new Packet(req, MemCmd::WriteReq,
+ memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize());
+
+ MemCmd command = req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq;
+ PacketPtr data_pkt = new Packet(req, command,
Packet::Broadcast);
data_pkt->dataStatic(inst->memData);
@@ -677,7 +665,7 @@ LSQUnit<Impl>::writebackStores()
inst->seqNum);
// @todo: Remove this SC hack once the memory system handles it.
- if (req->isLocked()) {
+ if (inst->isStoreConditional()) {
// Disable recording the result temporarily. Writing to
// misc regs normally updates the result, but this is not
// the desired behavior when handling store conditionals.
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index bbc69fc96..b5b1cd021 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -174,7 +174,7 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+ assert(reg_idx < numPhysicalFloatRegs);
DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
int(reg_idx), (uint64_t)val);
@@ -189,7 +189,7 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+ assert(reg_idx < numPhysicalFloatRegs);
DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
int(reg_idx), (uint64_t)val);
@@ -204,7 +204,7 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+ assert(reg_idx < numPhysicalFloatRegs);
DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
int(reg_idx), (uint64_t)val);
@@ -217,7 +217,7 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+ assert(reg_idx < numPhysicalFloatRegs);
DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
int(reg_idx), (uint64_t)val);
@@ -232,11 +232,11 @@ class PhysRegFile
MiscReg readMiscReg(int misc_reg, unsigned thread_id)
{
- return miscRegs[thread_id].readReg(misc_reg,
- cpu->tcBase(thread_id));
+ return miscRegs[thread_id].readReg(misc_reg, cpu->tcBase(thread_id));
}
- void setMiscRegNoEffect(int misc_reg, const MiscReg &val, unsigned thread_id)
+ void setMiscRegNoEffect(int misc_reg,
+ const MiscReg &val, unsigned thread_id)
{
miscRegs[thread_id].setRegNoEffect(misc_reg, val);
}
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index ec630b31e..431705e19 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -996,7 +996,12 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
if (src_reg < TheISA::FP_Base_DepTag) {
flat_src_reg = TheISA::flattenIntIndex(inst->tcBase(), src_reg);
DPRINTF(Rename, "Flattening index %d to %d.\n", (int)src_reg, (int)flat_src_reg);
+ } else {
+ // Floating point and Miscellaneous registers need their indexes
+ // adjusted to account for the expanded number of flattened int regs.
+ flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
}
+
inst->flattenSrcReg(src_idx, flat_src_reg);
// Look up the source registers to get the phys. register they've
@@ -1033,8 +1038,13 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
RegIndex dest_reg = inst->destRegIdx(dest_idx);
RegIndex flat_dest_reg = dest_reg;
if (dest_reg < TheISA::FP_Base_DepTag) {
+ // Integer registers are flattened.
flat_dest_reg = TheISA::flattenIntIndex(inst->tcBase(), dest_reg);
DPRINTF(Rename, "Flattening index %d to %d.\n", (int)dest_reg, (int)flat_dest_reg);
+ } else {
+ // Floating point and Miscellaneous registers need their indexes
+ // adjusted to account for the expanded number of flattened int regs.
+ flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
}
inst->flattenDestReg(dest_idx, flat_dest_reg);
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini
new file mode 100644
index 000000000..f804a40fe
--- /dev/null
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini
@@ -0,0 +1,379 @@
+[root]
+type=Root
+children=system
+dummy=0
+
+[system]
+type=System
+children=cpu membus physmem
+mem_mode=atomic
+physmem=system.physmem
+
+[system.cpu]
+type=DerivO3CPU
+children=dcache fuPool icache l2cache toL2Bus workload
+BTBEntries=4096
+BTBTagSize=16
+LFSTSize=1024
+LQEntries=32
+RASSize=16
+SQEntries=32
+SSITSize=1024
+activity=0
+backComSize=5
+choiceCtrBits=2
+choicePredictorSize=8192
+clock=1
+commitToDecodeDelay=1
+commitToFetchDelay=1
+commitToIEWDelay=1
+commitToRenameDelay=1
+commitWidth=8
+cpu_id=0
+decodeToFetchDelay=1
+decodeToRenameDelay=1
+decodeWidth=8
+defer_registration=false
+dispatchWidth=8
+fetchToDecodeDelay=1
+fetchTrapLatency=1
+fetchWidth=8
+forwardComSize=5
+fuPool=system.cpu.fuPool
+function_trace=false
+function_trace_start=0
+globalCtrBits=2
+globalHistoryBits=13
+globalPredictorSize=8192
+iewToCommitDelay=1
+iewToDecodeDelay=1
+iewToFetchDelay=1
+iewToRenameDelay=1
+instShiftAmt=2
+issueToExecuteDelay=1
+issueWidth=8
+localCtrBits=2
+localHistoryBits=11
+localHistoryTableSize=2048
+localPredictorSize=2048
+max_insts_all_threads=0
+max_insts_any_thread=0
+max_loads_all_threads=0
+max_loads_any_thread=0
+numIQEntries=64
+numPhysFloatRegs=256
+numPhysIntRegs=256
+numROBEntries=192
+numRobs=1
+numThreads=1
+phase=0
+predType=tournament
+progress_interval=0
+renameToDecodeDelay=1
+renameToFetchDelay=1
+renameToIEWDelay=2
+renameToROBDelay=1
+renameWidth=8
+squashWidth=8
+system=system
+trapLatency=13
+wbDepth=1
+wbWidth=8
+workload=system.cpu.workload
+dcache_port=system.cpu.dcache.cpu_side
+icache_port=system.cpu.icache.cpu_side
+
+[system.cpu.dcache]
+type=BaseCache
+adaptive_compression=false
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+hit_latency=1
+latency=1
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=262144
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=20
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.dcache_port
+mem_side=system.cpu.toL2Bus.port[1]
+
+[system.cpu.fuPool]
+type=FUPool
+children=FUList0 FUList1 FUList2 FUList3 FUList4 FUList5 FUList6 FUList7
+FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
+
+[system.cpu.fuPool.FUList0]
+type=FUDesc
+children=opList0
+count=6
+opList=system.cpu.fuPool.FUList0.opList0
+
+[system.cpu.fuPool.FUList0.opList0]
+type=OpDesc
+issueLat=1
+opClass=IntAlu
+opLat=1
+
+[system.cpu.fuPool.FUList1]
+type=FUDesc
+children=opList0 opList1
+count=2
+opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
+
+[system.cpu.fuPool.FUList1.opList0]
+type=OpDesc
+issueLat=1
+opClass=IntMult
+opLat=3
+
+[system.cpu.fuPool.FUList1.opList1]
+type=OpDesc
+issueLat=19
+opClass=IntDiv
+opLat=20
+
+[system.cpu.fuPool.FUList2]
+type=FUDesc
+children=opList0 opList1 opList2
+count=4
+opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
+
+[system.cpu.fuPool.FUList2.opList0]
+type=OpDesc
+issueLat=1
+opClass=FloatAdd
+opLat=2
+
+[system.cpu.fuPool.FUList2.opList1]
+type=OpDesc
+issueLat=1
+opClass=FloatCmp
+opLat=2
+
+[system.cpu.fuPool.FUList2.opList2]
+type=OpDesc
+issueLat=1
+opClass=FloatCvt
+opLat=2
+
+[system.cpu.fuPool.FUList3]
+type=FUDesc
+children=opList0 opList1 opList2
+count=2
+opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
+
+[system.cpu.fuPool.FUList3.opList0]
+type=OpDesc
+issueLat=1
+opClass=FloatMult
+opLat=4
+
+[system.cpu.fuPool.FUList3.opList1]
+type=OpDesc
+issueLat=12
+opClass=FloatDiv
+opLat=12
+
+[system.cpu.fuPool.FUList3.opList2]
+type=OpDesc
+issueLat=24
+opClass=FloatSqrt
+opLat=24
+
+[system.cpu.fuPool.FUList4]
+type=FUDesc
+children=opList0
+count=0
+opList=system.cpu.fuPool.FUList4.opList0
+
+[system.cpu.fuPool.FUList4.opList0]
+type=OpDesc
+issueLat=1
+opClass=MemRead
+opLat=1
+
+[system.cpu.fuPool.FUList5]
+type=FUDesc
+children=opList0
+count=0
+opList=system.cpu.fuPool.FUList5.opList0
+
+[system.cpu.fuPool.FUList5.opList0]
+type=OpDesc
+issueLat=1
+opClass=MemWrite
+opLat=1
+
+[system.cpu.fuPool.FUList6]
+type=FUDesc
+children=opList0 opList1
+count=4
+opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
+
+[system.cpu.fuPool.FUList6.opList0]
+type=OpDesc
+issueLat=1
+opClass=MemRead
+opLat=1
+
+[system.cpu.fuPool.FUList6.opList1]
+type=OpDesc
+issueLat=1
+opClass=MemWrite
+opLat=1
+
+[system.cpu.fuPool.FUList7]
+type=FUDesc
+children=opList0
+count=1
+opList=system.cpu.fuPool.FUList7.opList0
+
+[system.cpu.fuPool.FUList7.opList0]
+type=OpDesc
+issueLat=3
+opClass=IprAccess
+opLat=3
+
+[system.cpu.icache]
+type=BaseCache
+adaptive_compression=false
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+hit_latency=1
+latency=1
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=131072
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=20
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.icache_port
+mem_side=system.cpu.toL2Bus.port[0]
+
+[system.cpu.l2cache]
+type=BaseCache
+adaptive_compression=false
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+hit_latency=1
+latency=1
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=2097152
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=5
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.toL2Bus.port[2]
+mem_side=system.membus.port[1]
+
+[system.cpu.toL2Bus]
+type=Bus
+bus_id=0
+clock=1000
+responder_set=false
+width=64
+port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=insttest
+cwd=
+egid=100
+env=
+euid=100
+executable=tests/test-progs/insttest/bin/sparc/linux/insttest
+gid=100
+input=cin
+output=cout
+pid=100
+ppid=99
+system=system
+uid=100
+
+[system.membus]
+type=Bus
+bus_id=0
+clock=1000
+responder_set=false
+width=64
+port=system.physmem.port system.cpu.l2cache.mem_side
+
+[system.physmem]
+type=PhysicalMemory
+file=
+latency=1
+range=0:134217727
+zero=false
+port=system.membus.port[0]
+
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out
new file mode 100644
index 000000000..d248f77bf
--- /dev/null
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out
@@ -0,0 +1,367 @@
+[root]
+type=Root
+dummy=0
+
+[system.physmem]
+type=PhysicalMemory
+file=
+range=[0,134217727]
+latency=1
+zero=false
+
+[system]
+type=System
+physmem=system.physmem
+mem_mode=atomic
+
+[system.membus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+responder_set=false
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=insttest
+executable=tests/test-progs/insttest/bin/sparc/linux/insttest
+input=cin
+output=cout
+env=
+cwd=
+system=system
+uid=100
+euid=100
+gid=100
+egid=100
+pid=100
+ppid=99
+
+[system.cpu.fuPool.FUList0.opList0]
+type=OpDesc
+opClass=IntAlu
+opLat=1
+issueLat=1
+
+[system.cpu.fuPool.FUList0]
+type=FUDesc
+opList=system.cpu.fuPool.FUList0.opList0
+count=6
+
+[system.cpu.fuPool.FUList1.opList0]
+type=OpDesc
+opClass=IntMult
+opLat=3
+issueLat=1
+
+[system.cpu.fuPool.FUList1.opList1]
+type=OpDesc
+opClass=IntDiv
+opLat=20
+issueLat=19
+
+[system.cpu.fuPool.FUList1]
+type=FUDesc
+opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
+count=2
+
+[system.cpu.fuPool.FUList2.opList0]
+type=OpDesc
+opClass=FloatAdd
+opLat=2
+issueLat=1
+
+[system.cpu.fuPool.FUList2.opList1]
+type=OpDesc
+opClass=FloatCmp
+opLat=2
+issueLat=1
+
+[system.cpu.fuPool.FUList2.opList2]
+type=OpDesc
+opClass=FloatCvt
+opLat=2
+issueLat=1
+
+[system.cpu.fuPool.FUList2]
+type=FUDesc
+opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
+count=4
+
+[system.cpu.fuPool.FUList3.opList0]
+type=OpDesc
+opClass=FloatMult
+opLat=4
+issueLat=1
+
+[system.cpu.fuPool.FUList3.opList1]
+type=OpDesc
+opClass=FloatDiv
+opLat=12
+issueLat=12
+
+[system.cpu.fuPool.FUList3.opList2]
+type=OpDesc
+opClass=FloatSqrt
+opLat=24
+issueLat=24
+
+[system.cpu.fuPool.FUList3]
+type=FUDesc
+opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
+count=2
+
+[system.cpu.fuPool.FUList4.opList0]
+type=OpDesc
+opClass=MemRead
+opLat=1
+issueLat=1
+
+[system.cpu.fuPool.FUList4]
+type=FUDesc
+opList=system.cpu.fuPool.FUList4.opList0
+count=0
+
+[system.cpu.fuPool.FUList5.opList0]
+type=OpDesc
+opClass=MemWrite
+opLat=1
+issueLat=1
+
+[system.cpu.fuPool.FUList5]
+type=FUDesc
+opList=system.cpu.fuPool.FUList5.opList0
+count=0
+
+[system.cpu.fuPool.FUList6.opList0]
+type=OpDesc
+opClass=MemRead
+opLat=1
+issueLat=1
+
+[system.cpu.fuPool.FUList6.opList1]
+type=OpDesc
+opClass=MemWrite
+opLat=1
+issueLat=1
+
+[system.cpu.fuPool.FUList6]
+type=FUDesc
+opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
+count=4
+
+[system.cpu.fuPool.FUList7.opList0]
+type=OpDesc
+opClass=IprAccess
+opLat=3
+issueLat=3
+
+[system.cpu.fuPool.FUList7]
+type=FUDesc
+opList=system.cpu.fuPool.FUList7.opList0
+count=1
+
+[system.cpu.fuPool]
+type=FUPool
+FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
+
+[system.cpu]
+type=DerivO3CPU
+clock=1
+phase=0
+numThreads=1
+cpu_id=0
+activity=0
+workload=system.cpu.workload
+checker=null
+max_insts_any_thread=0
+max_insts_all_threads=0
+max_loads_any_thread=0
+max_loads_all_threads=0
+progress_interval=0
+cachePorts=200
+decodeToFetchDelay=1
+renameToFetchDelay=1
+iewToFetchDelay=1
+commitToFetchDelay=1
+fetchWidth=8
+renameToDecodeDelay=1
+iewToDecodeDelay=1
+commitToDecodeDelay=1
+fetchToDecodeDelay=1
+decodeWidth=8
+iewToRenameDelay=1
+commitToRenameDelay=1
+decodeToRenameDelay=1
+renameWidth=8
+commitToIEWDelay=1
+renameToIEWDelay=2
+issueToExecuteDelay=1
+dispatchWidth=8
+issueWidth=8
+wbWidth=8
+wbDepth=1
+fuPool=system.cpu.fuPool
+iewToCommitDelay=1
+renameToROBDelay=1
+commitWidth=8
+squashWidth=8
+trapLatency=13
+backComSize=5
+forwardComSize=5
+predType=tournament
+localPredictorSize=2048
+localCtrBits=2
+localHistoryTableSize=2048
+localHistoryBits=11
+globalPredictorSize=8192
+globalCtrBits=2
+globalHistoryBits=13
+choicePredictorSize=8192
+choiceCtrBits=2
+BTBEntries=4096
+BTBTagSize=16
+RASSize=16
+LQEntries=32
+SQEntries=32
+LFSTSize=1024
+SSITSize=1024
+numPhysIntRegs=256
+numPhysFloatRegs=256
+numIQEntries=64
+numROBEntries=192
+smtNumFetchingThreads=1
+smtFetchPolicy=SingleThread
+smtLSQPolicy=Partitioned
+smtLSQThreshold=100
+smtIQPolicy=Partitioned
+smtIQThreshold=100
+smtROBPolicy=Partitioned
+smtROBThreshold=100
+smtCommitPolicy=RoundRobin
+instShiftAmt=2
+defer_registration=false
+function_trace=false
+function_trace_start=0
+
+[system.cpu.icache]
+type=BaseCache
+size=131072
+assoc=2
+block_size=64
+latency=1
+mshrs=10
+tgts_per_mshr=20
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+hit_latency=1
+
+[system.cpu.dcache]
+type=BaseCache
+size=262144
+assoc=2
+block_size=64
+latency=1
+mshrs=10
+tgts_per_mshr=20
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+hit_latency=1
+
+[system.cpu.l2cache]
+type=BaseCache
+size=2097152
+assoc=2
+block_size=64
+latency=1
+mshrs=10
+tgts_per_mshr=5
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+hit_latency=1
+
+[system.cpu.toL2Bus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+responder_set=false
+
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt
new file mode 100644
index 000000000..7c0d31494
--- /dev/null
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt
@@ -0,0 +1,410 @@
+
+---------- Begin Simulation Statistics ----------
+global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly.
+global.BPredUnit.BTBHits 2990 # Number of BTB hits
+global.BPredUnit.BTBLookups 7055 # Number of BTB lookups
+global.BPredUnit.RASInCorrect 0 # Number of incorrect RAS predictions.
+global.BPredUnit.condIncorrect 2077 # Number of conditional branches incorrect
+global.BPredUnit.condPredicted 7846 # Number of conditional branches predicted
+global.BPredUnit.lookups 7846 # Number of BP lookups
+global.BPredUnit.usedRAS 0 # Number of times the RAS was used to get a target.
+host_inst_rate 15119 # Simulator instruction rate (inst/s)
+host_mem_usage 154868 # Number of bytes of host memory used
+host_seconds 0.73 # Real time elapsed on the host
+host_tick_rate 1956796 # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads 12 # Number of conflicting loads.
+memdepunit.memDep.conflictingStores 0 # Number of conflicting stores.
+memdepunit.memDep.insertedLoads 3250 # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores 2817 # Number of stores inserted to the mem dependence unit.
+sim_freq 1000000000000 # Frequency of simulated ticks
+sim_insts 10976 # Number of instructions simulated
+sim_seconds 0.000001 # Number of seconds simulated
+sim_ticks 1421211 # Number of ticks simulated
+system.cpu.commit.COM:branches 2152 # Number of branches committed
+system.cpu.commit.COM:bw_lim_events 172 # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits
+system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle
+system.cpu.commit.COM:committed_per_cycle.samples 221349
+system.cpu.commit.COM:committed_per_cycle.min_value 0
+ 0 215844 9751.30%
+ 1 2970 134.18%
+ 2 1290 58.28%
+ 3 631 28.51%
+ 4 208 9.40%
+ 5 90 4.07%
+ 6 133 6.01%
+ 7 11 0.50%
+ 8 172 7.77%
+system.cpu.commit.COM:committed_per_cycle.max_value 8
+system.cpu.commit.COM:committed_per_cycle.end_dist
+
+system.cpu.commit.COM:count 10976 # Number of instructions committed
+system.cpu.commit.COM:loads 1462 # Number of loads committed
+system.cpu.commit.COM:membars 0 # Number of memory barriers committed
+system.cpu.commit.COM:refs 2760 # Number of memory references committed
+system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed
+system.cpu.commit.branchMispredicts 2077 # The number of times a branch was mispredicted
+system.cpu.commit.commitCommittedInsts 10976 # The number of committed instructions
+system.cpu.commit.commitNonSpecStalls 327 # The number of times commit has been forced to stall to communicate backwards
+system.cpu.commit.commitSquashedInsts 14263 # The number of squashed insts skipped by commit
+system.cpu.committedInsts 10976 # Number of Instructions Simulated
+system.cpu.committedInsts_total 10976 # Number of Instructions Simulated
+system.cpu.cpi 129.483509 # CPI: Cycles Per Instruction
+system.cpu.cpi_total 129.483509 # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses 2737 # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency 6585.044776 # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 6511.939394 # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits 2603 # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency 882396 # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate 0.048959 # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses 134 # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits 68 # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency 429788 # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate 0.024114 # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses 66 # number of ReadReq MSHR misses
+system.cpu.dcache.SwapReq_accesses 6 # number of SwapReq accesses(hits+misses)
+system.cpu.dcache.SwapReq_hits 6 # number of SwapReq hits
+system.cpu.dcache.WriteReq_accesses 1292 # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_avg_miss_latency 7960.583924 # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7136.918605 # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits 869 # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency 3367327 # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate 0.327399 # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses 423 # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_hits 337 # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_miss_latency 613775 # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate 0.066563 # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses 86 # number of WriteReq MSHR misses
+system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
+system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs 22.881579 # Average number of references to valid blocks.
+system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked
+system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked
+system.cpu.dcache.cache_copies 0 # number of cache copies performed
+system.cpu.dcache.demand_accesses 4029 # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency 7629.664273 # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 6865.546053 # average overall mshr miss latency
+system.cpu.dcache.demand_hits 3472 # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency 4249723 # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate 0.138248 # miss rate for demand accesses
+system.cpu.dcache.demand_misses 557 # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits 405 # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency 1043563 # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate 0.037726 # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses 152 # number of demand (read+write) MSHR misses
+system.cpu.dcache.fast_writes 0 # number of fast writes performed
+system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
+system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
+system.cpu.dcache.overall_accesses 4029 # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency 7629.664273 # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 6865.546053 # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
+system.cpu.dcache.overall_hits 3472 # number of overall hits
+system.cpu.dcache.overall_miss_latency 4249723 # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate 0.138248 # miss rate for overall accesses
+system.cpu.dcache.overall_misses 557 # number of overall misses
+system.cpu.dcache.overall_mshr_hits 405 # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency 1043563 # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate 0.037726 # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses 152 # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
+system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
+system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache
+system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr
+system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue
+system.cpu.dcache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left
+system.cpu.dcache.prefetcher.num_hwpf_identified 0 # number of hwpf identified
+system.cpu.dcache.prefetcher.num_hwpf_issued 0 # number of hwpf issued
+system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated
+system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page
+system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.dcache.replacements 0 # number of replacements
+system.cpu.dcache.sampled_refs 152 # Sample count of references to valid blocks.
+system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
+system.cpu.dcache.tagsinuse 90.938737 # Cycle average of tags in use
+system.cpu.dcache.total_refs 3478 # Total number of references to valid blocks.
+system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
+system.cpu.dcache.writebacks 0 # number of writebacks
+system.cpu.decode.DECODE:BlockedCycles 192719 # Number of cycles decode is blocked
+system.cpu.decode.DECODE:DecodedInsts 39774 # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles 20128 # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles 8238 # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles 3162 # Number of cycles decode is squashing
+system.cpu.decode.DECODE:UnblockCycles 264 # Number of cycles decode is unblocking
+system.cpu.fetch.Branches 7846 # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines 5085 # Number of cache lines fetched
+system.cpu.fetch.Cycles 14399 # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes 745 # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts 43304 # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles 2134 # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate 0.034947 # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles 5085 # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches 2990 # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate 0.192881 # Number of inst fetches per cycle
+system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total)
+system.cpu.fetch.rateDist.samples 224511
+system.cpu.fetch.rateDist.min_value 0
+ 0 215198 9585.19%
+ 1 2258 100.57%
+ 2 627 27.93%
+ 3 958 42.67%
+ 4 553 24.63%
+ 5 816 36.35%
+ 6 951 42.36%
+ 7 280 12.47%
+ 8 2870 127.83%
+system.cpu.fetch.rateDist.max_value 8
+system.cpu.fetch.rateDist.end_dist
+
+system.cpu.icache.ReadReq_accesses 5085 # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 5148.266776 # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 4502.972752 # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits 4474 # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency 3145591 # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate 0.120157 # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses 611 # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits 244 # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency 1652591 # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate 0.072173 # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses 367 # number of ReadReq MSHR misses
+system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
+system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked
+system.cpu.icache.avg_refs 12.325069 # Average number of references to valid blocks.
+system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked
+system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked
+system.cpu.icache.cache_copies 0 # number of cache copies performed
+system.cpu.icache.demand_accesses 5085 # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 5148.266776 # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 4502.972752 # average overall mshr miss latency
+system.cpu.icache.demand_hits 4474 # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency 3145591 # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate 0.120157 # miss rate for demand accesses
+system.cpu.icache.demand_misses 611 # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits 244 # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency 1652591 # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate 0.072173 # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses 367 # number of demand (read+write) MSHR misses
+system.cpu.icache.fast_writes 0 # number of fast writes performed
+system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
+system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
+system.cpu.icache.overall_accesses 5085 # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 5148.266776 # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 4502.972752 # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
+system.cpu.icache.overall_hits 4474 # number of overall hits
+system.cpu.icache.overall_miss_latency 3145591 # number of overall miss cycles
+system.cpu.icache.overall_miss_rate 0.120157 # miss rate for overall accesses
+system.cpu.icache.overall_misses 611 # number of overall misses
+system.cpu.icache.overall_mshr_hits 244 # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency 1652591 # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate 0.072173 # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses 367 # number of overall MSHR misses
+system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
+system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
+system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache
+system.cpu.icache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr
+system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue
+system.cpu.icache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left
+system.cpu.icache.prefetcher.num_hwpf_identified 0 # number of hwpf identified
+system.cpu.icache.prefetcher.num_hwpf_issued 0 # number of hwpf issued
+system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated
+system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page
+system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.icache.replacements 1 # number of replacements
+system.cpu.icache.sampled_refs 363 # Sample count of references to valid blocks.
+system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
+system.cpu.icache.tagsinuse 172.869174 # Cycle average of tags in use
+system.cpu.icache.total_refs 4474 # Total number of references to valid blocks.
+system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
+system.cpu.icache.writebacks 0 # number of writebacks
+system.cpu.idleCycles 1196701 # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches 3576 # Number of branches executed
+system.cpu.iew.EXEC:nop 0 # number of nop insts executed
+system.cpu.iew.EXEC:rate 0.092548 # Inst execution rate
+system.cpu.iew.EXEC:refs 5257 # number of memory reference insts executed
+system.cpu.iew.EXEC:stores 2386 # Number of stores executed
+system.cpu.iew.EXEC:swp 0 # number of swp insts executed
+system.cpu.iew.WB:consumers 9737 # num instructions consuming a value
+system.cpu.iew.WB:count 19769 # cumulative count of insts written-back
+system.cpu.iew.WB:fanout 0.790901 # average fanout of values written-back
+system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ
+system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ
+system.cpu.iew.WB:producers 7701 # num instructions producing a value
+system.cpu.iew.WB:rate 0.088054 # insts written-back per cycle
+system.cpu.iew.WB:sent 20061 # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts 2593 # Number of branch mispredicts detected at execute
+system.cpu.iew.iewBlockCycles 476 # Number of cycles IEW is blocking
+system.cpu.iew.iewDispLoadInsts 3250 # Number of dispatched load instructions
+system.cpu.iew.iewDispNonSpecInsts 617 # Number of dispatched non-speculative instructions
+system.cpu.iew.iewDispSquashedInsts 2705 # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts 2817 # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts 25240 # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts 2871 # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts 1780 # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts 20778 # Number of executed instructions
+system.cpu.iew.iewIQFullEvents 7 # Number of times the IQ has become full, causing a stall
+system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle
+system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall
+system.cpu.iew.iewSquashCycles 3162 # Number of cycles IEW is squashing
+system.cpu.iew.iewUnblockCycles 35 # Number of cycles IEW is unblocking
+system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding
+system.cpu.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked
+system.cpu.iew.lsq.thread.0.forwLoads 39 # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.ignoredResponses 5 # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address
+system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address
+system.cpu.iew.lsq.thread.0.memOrderViolation 54 # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.rescheduledLoads 0 # Number of loads that were rescheduled
+system.cpu.iew.lsq.thread.0.squashedLoads 1788 # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores 1519 # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents 54 # Number of memory order violations
+system.cpu.iew.predictedNotTakenIncorrect 962 # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect 1631 # Number of branches that were predicted taken incorrectly
+system.cpu.ipc 0.007723 # IPC: Instructions Per Cycle
+system.cpu.ipc_total 0.007723 # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0 22558 # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0.start_dist
+ (null) 1831 8.12% # Type of FU issued
+ IntAlu 15054 66.73% # Type of FU issued
+ IntMult 0 0.00% # Type of FU issued
+ IntDiv 0 0.00% # Type of FU issued
+ FloatAdd 0 0.00% # Type of FU issued
+ FloatCmp 0 0.00% # Type of FU issued
+ FloatCvt 0 0.00% # Type of FU issued
+ FloatMult 0 0.00% # Type of FU issued
+ FloatDiv 0 0.00% # Type of FU issued
+ FloatSqrt 0 0.00% # Type of FU issued
+ MemRead 3091 13.70% # Type of FU issued
+ MemWrite 2582 11.45% # Type of FU issued
+ IprAccess 0 0.00% # Type of FU issued
+ InstPrefetch 0 0.00% # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0.end_dist
+system.cpu.iq.ISSUE:fu_busy_cnt 162 # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate 0.007181 # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_full.start_dist
+ (null) 0 0.00% # attempts to use FU when none available
+ IntAlu 42 25.93% # attempts to use FU when none available
+ IntMult 0 0.00% # attempts to use FU when none available
+ IntDiv 0 0.00% # attempts to use FU when none available
+ FloatAdd 0 0.00% # attempts to use FU when none available
+ FloatCmp 0 0.00% # attempts to use FU when none available
+ FloatCvt 0 0.00% # attempts to use FU when none available
+ FloatMult 0 0.00% # attempts to use FU when none available
+ FloatDiv 0 0.00% # attempts to use FU when none available
+ FloatSqrt 0 0.00% # attempts to use FU when none available
+ MemRead 14 8.64% # attempts to use FU when none available
+ MemWrite 106 65.43% # attempts to use FU when none available
+ IprAccess 0 0.00% # attempts to use FU when none available
+ InstPrefetch 0 0.00% # attempts to use FU when none available
+system.cpu.iq.ISSUE:fu_full.end_dist
+system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle
+system.cpu.iq.ISSUE:issued_per_cycle.samples 224511
+system.cpu.iq.ISSUE:issued_per_cycle.min_value 0
+ 0 215315 9590.40%
+ 1 4124 183.69%
+ 2 1297 57.77%
+ 3 1306 58.17%
+ 4 1190 53.00%
+ 5 707 31.49%
+ 6 433 19.29%
+ 7 83 3.70%
+ 8 56 2.49%
+system.cpu.iq.ISSUE:issued_per_cycle.max_value 8
+system.cpu.iq.ISSUE:issued_per_cycle.end_dist
+
+system.cpu.iq.ISSUE:rate 0.100476 # Inst issue rate
+system.cpu.iq.iqInstsAdded 24623 # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued 22558 # Number of instructions issued
+system.cpu.iq.iqNonSpecInstsAdded 617 # Number of non-speculative instructions added to the IQ
+system.cpu.iq.iqSquashedInstsExamined 11469 # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued 174 # Number of squashed instructions issued
+system.cpu.iq.iqSquashedNonSpecRemoved 290 # Number of squashed non-spec instructions that were removed
+system.cpu.iq.iqSquashedOperandsExamined 5834 # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.l2cache.ReadReq_accesses 513 # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency 4754.779727 # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2343.506823 # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency 2439202 # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses 513 # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency 1202219 # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses 513 # number of ReadReq MSHR misses
+system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
+system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked
+system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks.
+system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked
+system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked
+system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
+system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked
+system.cpu.l2cache.cache_copies 0 # number of cache copies performed
+system.cpu.l2cache.demand_accesses 513 # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency 4754.779727 # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency 2343.506823 # average overall mshr miss latency
+system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits
+system.cpu.l2cache.demand_miss_latency 2439202 # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses
+system.cpu.l2cache.demand_misses 513 # number of demand (read+write) misses
+system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
+system.cpu.l2cache.demand_mshr_miss_latency 1202219 # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_misses 513 # number of demand (read+write) MSHR misses
+system.cpu.l2cache.fast_writes 0 # number of fast writes performed
+system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
+system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
+system.cpu.l2cache.overall_accesses 513 # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency 4754.779727 # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency 2343.506823 # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
+system.cpu.l2cache.overall_hits 0 # number of overall hits
+system.cpu.l2cache.overall_miss_latency 2439202 # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses
+system.cpu.l2cache.overall_misses 513 # number of overall misses
+system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
+system.cpu.l2cache.overall_mshr_miss_latency 1202219 # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_misses 513 # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
+system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue
+system.cpu.l2cache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left
+system.cpu.l2cache.prefetcher.num_hwpf_identified 0 # number of hwpf identified
+system.cpu.l2cache.prefetcher.num_hwpf_issued 0 # number of hwpf issued
+system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated
+system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page
+system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.l2cache.replacements 0 # number of replacements
+system.cpu.l2cache.sampled_refs 512 # Sample count of references to valid blocks.
+system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
+system.cpu.l2cache.tagsinuse 262.946375 # Cycle average of tags in use
+system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks.
+system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
+system.cpu.l2cache.writebacks 0 # number of writebacks
+system.cpu.numCycles 224511 # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles 960 # Number of cycles rename is blocking
+system.cpu.rename.RENAME:CommittedMaps 9868 # Number of HB maps that are committed
+system.cpu.rename.RENAME:IQFullEvents 2 # Number of times rename has blocked due to IQ full
+system.cpu.rename.RENAME:IdleCycles 20098 # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents 481 # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:ROBFullEvents 4 # Number of times rename has blocked due to ROB full
+system.cpu.rename.RENAME:RenameLookups 46931 # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts 31260 # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands 25831 # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles 7921 # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles 3162 # Number of cycles rename is squashing
+system.cpu.rename.RENAME:SquashedInsts 8042 # Number of squashed instructions processed by rename
+system.cpu.rename.RENAME:UnblockCycles 1212 # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps 15963 # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles 190573 # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializingInsts 638 # count of serializing insts renamed
+system.cpu.rename.RENAME:skidInsts 5594 # count of insts added to the skid buffer
+system.cpu.rename.RENAME:tempSerializingInsts 629 # count of temporary serializing insts renamed
+system.cpu.timesIdled 289 # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.workload.PROG:num_syscalls 8 # Number of system calls
+
+---------- End Simulation Statistics ----------
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr
new file mode 100644
index 000000000..48affb0e2
--- /dev/null
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr
@@ -0,0 +1,4 @@
+warn: More than two loadable segments in ELF object.
+warn: Ignoring segment @ 0x0 length 0x0.
+0: system.remote_gdb.listener: listening for remote gdb on port 7003
+warn: Entering event queue @ 0. Starting simulation...
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout
new file mode 100644
index 000000000..6cba2ba7e
--- /dev/null
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout
@@ -0,0 +1,24 @@
+Begining test of difficult SPARC instructions...
+LDSTUB: Passed
+SWAP: Passed
+CAS FAIL: Passed
+CAS WORK: Passed
+CASX FAIL: Passed
+CASX WORK: Passed
+LDTX: Passed
+LDTW: Passed
+STTW: Passed
+Done
+M5 Simulator System
+
+Copyright (c) 2001-2006
+The Regents of The University of Michigan
+All Rights Reserved
+
+
+M5 compiled Apr 9 2007 03:06:26
+M5 started Mon Apr 9 03:06:54 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/o3-timing tests/run.py quick/02.insttest/sparc/linux/o3-timing
+Global frequency set at 1000000000000 ticks per second
+Exiting @ tick 1421211 because target called exit()
diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.ini b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.ini
new file mode 100644
index 000000000..85d14933a
--- /dev/null
+++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.ini
@@ -0,0 +1,187 @@
+[root]
+type=Root
+children=system
+dummy=0
+
+[system]
+type=System
+children=cpu membus physmem
+mem_mode=atomic
+physmem=system.physmem
+
+[system.cpu]
+type=TimingSimpleCPU
+children=dcache icache l2cache toL2Bus workload
+clock=1
+cpu_id=0
+defer_registration=false
+function_trace=false
+function_trace_start=0
+max_insts_all_threads=0
+max_insts_any_thread=0
+max_loads_all_threads=0
+max_loads_any_thread=0
+phase=0
+progress_interval=0
+system=system
+workload=system.cpu.workload
+dcache_port=system.cpu.dcache.cpu_side
+icache_port=system.cpu.icache.cpu_side
+
+[system.cpu.dcache]
+type=BaseCache
+adaptive_compression=false
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+hit_latency=1
+latency=1
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=262144
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=5
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.dcache_port
+mem_side=system.cpu.toL2Bus.port[1]
+
+[system.cpu.icache]
+type=BaseCache
+adaptive_compression=false
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+hit_latency=1
+latency=1
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=131072
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=5
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.icache_port
+mem_side=system.cpu.toL2Bus.port[0]
+
+[system.cpu.l2cache]
+type=BaseCache
+adaptive_compression=false
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+hit_latency=1
+latency=1
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=2097152
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=5
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.toL2Bus.port[2]
+mem_side=system.membus.port[1]
+
+[system.cpu.toL2Bus]
+type=Bus
+bus_id=0
+clock=1000
+responder_set=false
+width=64
+port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=insttest
+cwd=
+egid=100
+env=
+euid=100
+executable=tests/test-progs/insttest/bin/sparc/linux/insttest
+gid=100
+input=cin
+output=cout
+pid=100
+ppid=99
+system=system
+uid=100
+
+[system.membus]
+type=Bus
+bus_id=0
+clock=1000
+responder_set=false
+width=64
+port=system.physmem.port system.cpu.l2cache.mem_side
+
+[system.physmem]
+type=PhysicalMemory
+file=
+latency=1
+range=0:134217727
+zero=false
+port=system.membus.port[0]
+
diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out
new file mode 100644
index 000000000..ec2d1886a
--- /dev/null
+++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out
@@ -0,0 +1,178 @@
+[root]
+type=Root
+dummy=0
+
+[system.physmem]
+type=PhysicalMemory
+file=
+range=[0,134217727]
+latency=1
+zero=false
+
+[system]
+type=System
+physmem=system.physmem
+mem_mode=atomic
+
+[system.membus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+responder_set=false
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=insttest
+executable=tests/test-progs/insttest/bin/sparc/linux/insttest
+input=cin
+output=cout
+env=
+cwd=
+system=system
+uid=100
+euid=100
+gid=100
+egid=100
+pid=100
+ppid=99
+
+[system.cpu]
+type=TimingSimpleCPU
+max_insts_any_thread=0
+max_insts_all_threads=0
+max_loads_any_thread=0
+max_loads_all_threads=0
+progress_interval=0
+system=system
+cpu_id=0
+workload=system.cpu.workload
+clock=1
+phase=0
+defer_registration=false
+// width not specified
+function_trace=false
+function_trace_start=0
+// simulate_stalls not specified
+
+[system.cpu.toL2Bus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+responder_set=false
+
+[system.cpu.icache]
+type=BaseCache
+size=131072
+assoc=2
+block_size=64
+latency=1
+mshrs=10
+tgts_per_mshr=5
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+hit_latency=1
+
+[system.cpu.dcache]
+type=BaseCache
+size=262144
+assoc=2
+block_size=64
+latency=1
+mshrs=10
+tgts_per_mshr=5
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+hit_latency=1
+
+[system.cpu.l2cache]
+type=BaseCache
+size=2097152
+assoc=2
+block_size=64
+latency=1
+mshrs=10
+tgts_per_mshr=5
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+hit_latency=1
+
diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/m5stats.txt b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/m5stats.txt
new file mode 100644
index 000000000..a4396b3da
--- /dev/null
+++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/m5stats.txt
@@ -0,0 +1,215 @@
+
+---------- Begin Simulation Statistics ----------
+host_inst_rate 39129 # Simulator instruction rate (inst/s)
+host_mem_usage 153232 # Number of bytes of host memory used
+host_seconds 0.28 # Real time elapsed on the host
+host_tick_rate 6030675 # Simulator tick rate (ticks/s)
+sim_freq 1000000000000 # Frequency of simulated ticks
+sim_insts 11001 # Number of instructions simulated
+sim_seconds 0.000002 # Number of seconds simulated
+sim_ticks 1698003 # Number of ticks simulated
+system.cpu.dcache.ReadReq_accesses 1462 # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency 3977.759259 # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2977.759259 # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits 1408 # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency 214799 # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate 0.036936 # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses 54 # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_miss_latency 160799 # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate 0.036936 # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses 54 # number of ReadReq MSHR misses
+system.cpu.dcache.SwapReq_accesses 6 # number of SwapReq accesses(hits+misses)
+system.cpu.dcache.SwapReq_hits 6 # number of SwapReq hits
+system.cpu.dcache.WriteReq_accesses 1292 # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_avg_miss_latency 3963.647727 # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 2963.647727 # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits 1204 # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency 348801 # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate 0.068111 # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses 88 # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_miss_latency 260801 # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate 0.068111 # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses 88 # number of WriteReq MSHR misses
+system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
+system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs 18.436620 # Average number of references to valid blocks.
+system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked
+system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked
+system.cpu.dcache.cache_copies 0 # number of cache copies performed
+system.cpu.dcache.demand_accesses 2754 # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency 3969.014085 # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 2969.014085 # average overall mshr miss latency
+system.cpu.dcache.demand_hits 2612 # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency 563600 # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate 0.051561 # miss rate for demand accesses
+system.cpu.dcache.demand_misses 142 # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency 421600 # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate 0.051561 # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses 142 # number of demand (read+write) MSHR misses
+system.cpu.dcache.fast_writes 0 # number of fast writes performed
+system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
+system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
+system.cpu.dcache.overall_accesses 2754 # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency 3969.014085 # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 2969.014085 # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
+system.cpu.dcache.overall_hits 2612 # number of overall hits
+system.cpu.dcache.overall_miss_latency 563600 # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate 0.051561 # miss rate for overall accesses
+system.cpu.dcache.overall_misses 142 # number of overall misses
+system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency 421600 # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate 0.051561 # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses 142 # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
+system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
+system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache
+system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr
+system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue
+system.cpu.dcache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left
+system.cpu.dcache.prefetcher.num_hwpf_identified 0 # number of hwpf identified
+system.cpu.dcache.prefetcher.num_hwpf_issued 0 # number of hwpf issued
+system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated
+system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page
+system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.dcache.replacements 0 # number of replacements
+system.cpu.dcache.sampled_refs 142 # Sample count of references to valid blocks.
+system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
+system.cpu.dcache.tagsinuse 86.872921 # Cycle average of tags in use
+system.cpu.dcache.total_refs 2618 # Total number of references to valid blocks.
+system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
+system.cpu.dcache.writebacks 0 # number of writebacks
+system.cpu.icache.ReadReq_accesses 11002 # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 3961.367491 # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 2961.367491 # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits 10719 # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency 1121067 # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate 0.025723 # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses 283 # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_miss_latency 838067 # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate 0.025723 # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses 283 # number of ReadReq MSHR misses
+system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
+system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked
+system.cpu.icache.avg_refs 37.876325 # Average number of references to valid blocks.
+system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked
+system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked
+system.cpu.icache.cache_copies 0 # number of cache copies performed
+system.cpu.icache.demand_accesses 11002 # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 3961.367491 # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 2961.367491 # average overall mshr miss latency
+system.cpu.icache.demand_hits 10719 # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency 1121067 # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate 0.025723 # miss rate for demand accesses
+system.cpu.icache.demand_misses 283 # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency 838067 # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate 0.025723 # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses 283 # number of demand (read+write) MSHR misses
+system.cpu.icache.fast_writes 0 # number of fast writes performed
+system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
+system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
+system.cpu.icache.overall_accesses 11002 # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 3961.367491 # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 2961.367491 # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
+system.cpu.icache.overall_hits 10719 # number of overall hits
+system.cpu.icache.overall_miss_latency 1121067 # number of overall miss cycles
+system.cpu.icache.overall_miss_rate 0.025723 # miss rate for overall accesses
+system.cpu.icache.overall_misses 283 # number of overall misses
+system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency 838067 # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate 0.025723 # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses 283 # number of overall MSHR misses
+system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
+system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
+system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache
+system.cpu.icache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr
+system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue
+system.cpu.icache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left
+system.cpu.icache.prefetcher.num_hwpf_identified 0 # number of hwpf identified
+system.cpu.icache.prefetcher.num_hwpf_issued 0 # number of hwpf issued
+system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated
+system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page
+system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.icache.replacements 0 # number of replacements
+system.cpu.icache.sampled_refs 283 # Sample count of references to valid blocks.
+system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
+system.cpu.icache.tagsinuse 125.297191 # Cycle average of tags in use
+system.cpu.icache.total_refs 10719 # Total number of references to valid blocks.
+system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
+system.cpu.icache.writebacks 0 # number of writebacks
+system.cpu.idle_fraction 0 # Percentage of idle cycles
+system.cpu.l2cache.ReadReq_accesses 423 # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency 2968.515366 # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 1967.515366 # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency 1255682 # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses 423 # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency 832259 # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses 423 # number of ReadReq MSHR misses
+system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
+system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked
+system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks.
+system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked
+system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked
+system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
+system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked
+system.cpu.l2cache.cache_copies 0 # number of cache copies performed
+system.cpu.l2cache.demand_accesses 423 # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency 2968.515366 # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency 1967.515366 # average overall mshr miss latency
+system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits
+system.cpu.l2cache.demand_miss_latency 1255682 # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses
+system.cpu.l2cache.demand_misses 423 # number of demand (read+write) misses
+system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
+system.cpu.l2cache.demand_mshr_miss_latency 832259 # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_misses 423 # number of demand (read+write) MSHR misses
+system.cpu.l2cache.fast_writes 0 # number of fast writes performed
+system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
+system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
+system.cpu.l2cache.overall_accesses 423 # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency 2968.515366 # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency 1967.515366 # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
+system.cpu.l2cache.overall_hits 0 # number of overall hits
+system.cpu.l2cache.overall_miss_latency 1255682 # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses
+system.cpu.l2cache.overall_misses 423 # number of overall misses
+system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
+system.cpu.l2cache.overall_mshr_miss_latency 832259 # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_misses 423 # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
+system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue
+system.cpu.l2cache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left
+system.cpu.l2cache.prefetcher.num_hwpf_identified 0 # number of hwpf identified
+system.cpu.l2cache.prefetcher.num_hwpf_issued 0 # number of hwpf issued
+system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated
+system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page
+system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.l2cache.replacements 0 # number of replacements
+system.cpu.l2cache.sampled_refs 423 # Sample count of references to valid blocks.
+system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
+system.cpu.l2cache.tagsinuse 211.742547 # Cycle average of tags in use
+system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks.
+system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
+system.cpu.l2cache.writebacks 0 # number of writebacks
+system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
+system.cpu.numCycles 1698003 # number of cpu cycles simulated
+system.cpu.num_insts 11001 # Number of instructions executed
+system.cpu.num_refs 2760 # Number of memory references
+system.cpu.workload.PROG:num_syscalls 8 # Number of system calls
+
+---------- End Simulation Statistics ----------
diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stderr b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stderr
new file mode 100644
index 000000000..fce46c90e
--- /dev/null
+++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stderr
@@ -0,0 +1,4 @@
+warn: More than two loadable segments in ELF object.
+warn: Ignoring segment @ 0x0 length 0x0.
+0: system.remote_gdb.listener: listening for remote gdb on port 7000
+warn: Entering event queue @ 0. Starting simulation...
diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stdout b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stdout
new file mode 100644
index 000000000..100a1ebce
--- /dev/null
+++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stdout
@@ -0,0 +1,24 @@
+Begining test of difficult SPARC instructions...
+LDSTUB: Passed
+SWAP: Passed
+CAS FAIL: Passed
+CAS WORK: Passed
+CASX FAIL: Passed
+CASX WORK: Passed
+LDTX: Passed
+LDTW: Passed
+STTW: Passed
+Done
+M5 Simulator System
+
+Copyright (c) 2001-2006
+The Regents of The University of Michigan
+All Rights Reserved
+
+
+M5 compiled Apr 8 2007 05:25:15
+M5 started Sun Apr 8 22:54:12 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/simple-timing tests/run.py quick/02.insttest/sparc/linux/simple-timing
+Global frequency set at 1000000000000 ticks per second
+Exiting @ tick 1698003 because target called exit()