diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/arch/sparc/isa/formats/mem/swap.isa | 10 | ||||
-rw-r--r-- | src/arch/x86/isa/base.isa | 7 | ||||
-rw-r--r-- | src/arch/x86/isa/formats/formats.isa | 3 | ||||
-rw-r--r-- | src/arch/x86/isa/macroop.isa (renamed from src/arch/x86/isa/formats/macroop.isa) | 65 | ||||
-rw-r--r-- | src/arch/x86/isa/main.isa | 49 | ||||
-rw-r--r-- | src/arch/x86/isa/microasm.isa | 179 | ||||
-rw-r--r-- | src/arch/x86/isa/microops/base.isa | 11 | ||||
-rw-r--r-- | src/arch/x86/isa/specialize.isa | 172 | ||||
-rw-r--r-- | src/arch/x86/predecoder.cc | 64 | ||||
-rw-r--r-- | src/arch/x86/predecoder.hh | 2 | ||||
-rw-r--r-- | src/arch/x86/predecoder_tables.cc | 18 | ||||
-rw-r--r-- | src/arch/x86/types.hh | 46 | ||||
-rw-r--r-- | src/arch/x86/utility.hh | 3 | ||||
-rw-r--r-- | src/cpu/base_dyn_inst.hh | 5 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit.hh | 33 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit_impl.hh | 22 | ||||
-rw-r--r-- | src/cpu/o3/regfile.hh | 14 | ||||
-rw-r--r-- | src/cpu/o3/rename_impl.hh | 10 |
18 files changed, 404 insertions, 309 deletions
diff --git a/src/arch/sparc/isa/formats/mem/swap.isa b/src/arch/sparc/isa/formats/mem/swap.isa index 818597a84..b71542a2b 100644 --- a/src/arch/sparc/isa/formats/mem/swap.isa +++ b/src/arch/sparc/isa/formats/mem/swap.isa @@ -137,7 +137,7 @@ def format Swap(code, postacc_code, mem_flags, *opt_flags) {{ decoder_output, exec_output, decode_block) = doMemFormat(code, SwapFuncs, '', name, Name, flags, - opt_flags, postacc_code) + ["IsStoreConditional"], postacc_code) }}; def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{ @@ -148,7 +148,7 @@ def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{ decoder_output, exec_output, decode_block) = doMemFormat(code, SwapFuncs, AlternateASIPrivFaultCheck, - name, Name, flags, opt_flags, postacc_code) + name, Name, flags, ["IsStoreConditional"], postacc_code) }}; @@ -163,8 +163,8 @@ let {{ decode_block = BasicDecode.subst(iop) microParams = {"code": code, "postacc_code" : postacc_code, "ea_code" : addrCalcReg, "fault_check" : faultCode} - exec_output = doSplitExecute(execute, name, Name, asi, opt_flags, - microParams); + exec_output = doSplitExecute(execute, name, Name, asi, + ["IsStoreConditional"], microParams); return (header_output, decoder_output, exec_output, decode_block) }}; @@ -177,7 +177,7 @@ def format CasAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{ decoder_output, exec_output, decode_block) = doCasFormat(code, SwapFuncs, AlternateASIPrivFaultCheck, - name, Name, flags, opt_flags, postacc_code) + name, Name, flags, ["IsStoreConditional"], postacc_code) }}; diff --git a/src/arch/x86/isa/base.isa b/src/arch/x86/isa/base.isa index 4776f7a7e..cd166b306 100644 --- a/src/arch/x86/isa/base.isa +++ b/src/arch/x86/isa/base.isa @@ -79,6 +79,13 @@ output header {{ void printReg(std::ostream &os, int reg) const; void printSrcReg(std::ostream &os, int reg) const; void printDestReg(std::ostream &os, int reg) const; + + inline uint64_t merge(uint64_t into, uint64_t val, int size) const + { + //FIXME This needs to be significantly more sophisticated + return val; + } + }; }}; diff --git a/src/arch/x86/isa/formats/formats.isa b/src/arch/x86/isa/formats/formats.isa index f4e5c402f..d763c05bc 100644 --- a/src/arch/x86/isa/formats/formats.isa +++ b/src/arch/x86/isa/formats/formats.isa @@ -95,9 +95,6 @@ //malfunction of the decode mechanism. ##include "error.isa" -//Include code to build up macro op instructions -##include "macroop.isa" - //Include a format which implements a batch of instructions which do the same //thing on a variety of inputs ##include "multi.isa" diff --git a/src/arch/x86/isa/formats/macroop.isa b/src/arch/x86/isa/macroop.isa index 717103df1..7d41a2dea 100644 --- a/src/arch/x86/isa/formats/macroop.isa +++ b/src/arch/x86/isa/macroop.isa @@ -55,16 +55,20 @@ // // Authors: Gabe Black -//////////////////////////////////////////////////////////////////// -// -// Instructions that do the same thing to multiple sets of arguments. -// +// Execute method for macroops. +def template MacroExecPanic {{ + Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const + { + panic("Tried to execute macroop directly!"); + M5_DUMMY_RETURN + } +}}; output header {{ // Base class for most macroops, except ones that need to commit as // they go. - class X86MacroInst : public X86StaticInst + class X86MacroInst : public StaticInst { protected: const uint32_t numMicroOps; @@ -72,7 +76,7 @@ output header {{ //Constructor. X86MacroInst(const char *mnem, ExtMachInst _machInst, uint32_t _numMicroOps) - : X86StaticInst(mnem, _machInst, No_OpClass), + : StaticInst(mnem, _machInst, No_OpClass), numMicroOps(_numMicroOps) { assert(numMicroOps); @@ -85,9 +89,6 @@ output header {{ delete [] microOps; } - std::string generateDisassembly(Addr pc, - const SymbolTable *symtab) const; - StaticInstPtr * microOps; StaticInstPtr fetchMicroOp(MicroPC microPC) @@ -96,21 +97,7 @@ output header {{ return microOps[microPC]; } - %(BasicExecPanic)s - }; - - // Base class for macroops which commit as they go. This is for - // instructions which can be partially completed like those with the - // rep prefix. This prevents those instructions from overflowing - // buffers with uncommitted microops. - class X86RollingMacroInst : public X86MacroInst - { - protected: - //Constructor. - X86RollingMacroInst(const char *mnem, ExtMachInst _machInst, - uint32_t _numMicroOps) - : X86MacroInst(mnem, _machInst, numMicroOps) - {} + %(MacroExecPanic)s }; }}; @@ -121,34 +108,24 @@ def template MacroConstructor {{ { %(constructor)s; //alloc_micro_ops is the code that sets up the microOps - //array in the parent class. This hook will hopefully - //allow all that to be automated. + //array in the parent class. %(alloc_micro_ops)s; - setMicroFlags(); } }}; let {{ - def genMacroOp(name, Name, ops, rolling = False): + def genMacroOp(name, Name, opSeq): baseClass = 'X86MacroInst' - if rolling: - baseClass = 'X86RollingMacroInst' - numMicroOps = len(ops) + numMicroOps = len(opSeq.ops) allocMicroOps = '' micropc = 0 - allocMicroOps += \ - "microOps[0] = %s;\n" % \ - op.getAllocator(True, not rolling, True, False) - micropc += 1 - if numMicroOps > 2: - for op in ops[1:-1]: - allocMicroOps += \ - "microOps[%d] = %s;\n" % \ - (micropc, op.getAllocator(True, not rolling, False, False)) - micropc += 1 - allocMicroOps += \ - "microOps[%d] = %s;\n" % \ - op.getAllocator(True, not rolling, False, True) + for op in opSeq.ops: + allocMicroOps += \ + "microOps[%d] = %s;\n" % \ + (micropc, op.getAllocator(True, op.delayed, + micropc == 0, + micropc == numMicroOps - 1)) + micropc += 1 iop = InstObjParams(name, Name, baseClass, {'code' : '', 'num_micro_ops' : numMicroOps, 'alloc_micro_ops' : allocMicroOps}) diff --git a/src/arch/x86/isa/main.isa b/src/arch/x86/isa/main.isa index cc3a9bee4..063d7125d 100644 --- a/src/arch/x86/isa/main.isa +++ b/src/arch/x86/isa/main.isa @@ -72,26 +72,55 @@ namespace X86ISA; -//Include the simple microcode assembler -##include "microasm.isa" +//////////////////////////////////////////////////////////////////// +// +// General infrastructure code. These files provide infrastructure +// which was developed to support x86 but isn't specific to it. +// -//Include the bitfield definitions -##include "bitfields.isa" +//Include code to build macroops. +##include "macroop.isa" -//Include the operand_types and operand definitions -##include "operands.isa" +//Include the simple microcode assembler. This will hopefully stay +//unspecialized for x86 and can later be made available to other ISAs. +##include "microasm.isa" + +//////////////////////////////////////////////////////////////////// +// +// X86 only infrastructure code. +// -//Include the base class for x86 instructions, and some support code +//Include the base class for x86 instructions, and some support code. ##include "base.isa" -//Include the instruction definitions -##include "insts/insts.isa" +//Include code to specialize an instruction template to operate on +//a particular set of operands. This is specific to x86 and the x86 +//microcode ISA. +##include "specialize.isa" + +//////////////////////////////////////////////////////////////////// +// +// Code which directly specifies isa components like instructions +// microops, and the decoder. +// //Include the definitions for the instruction formats ##include "formats/formats.isa" -//Include the definitions of the micro ops +//Include the operand_types and operand definitions. These are needed by +//the microop definitions. +##include "operands.isa" + +//Include the definitions of the micro ops. +//These are StaticInst classes which stand on their own and make up an +//internal instruction set. ##include "microops/microops.isa" +//Include the instruction definitions which are microop assembler programs. +##include "insts/insts.isa" + +//Include the bitfield definitions +##include "bitfields.isa" + //Include the decoder definition ##include "decoder/decoder.isa" diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa index b94b55aab..23567aae9 100644 --- a/src/arch/x86/isa/microasm.isa +++ b/src/arch/x86/isa/microasm.isa @@ -57,152 +57,17 @@ //////////////////////////////////////////////////////////////////// // -// Code to "specialize" a microcode sequence to use a particular -// variety of operands +// The microcode assembler // let {{ - # This builds either a regular or macro op to implement the sequence of - # ops we give it. - def genInst(name, Name, ops): - # If we can implement this instruction with exactly one microop, just - # use that directly. - newStmnt = '' - if len(ops) == 1: - decode_block = "return (X86StaticInst *)(%s);" % \ - ops[0].getAllocator() - return ('', '', decode_block, '') - else: - # Build a macroop to contain the sequence of microops we've - # been given. - return genMacroOp(name, Name, ops) -}}; - -let {{ - # This code builds up a decode block which decodes based on switchval. - # vals is a dict which matches case values with what should be decoded to. - # builder is called on the exploded contents of "vals" values to generate - # whatever code should be used. - def doSplitDecode(name, Name, builder, switchVal, vals, default = None): - header_output = '' - decoder_output = '' - decode_block = 'switch(%s) {\n' % switchVal - exec_output = '' - for (val, todo) in vals.items(): - (new_header_output, - new_decoder_output, - new_decode_block, - new_exec_output) = builder(name, Name, *todo) - header_output += new_header_output - decoder_output += new_decoder_output - decode_block += '\tcase %s: %s\n' % (val, new_decode_block) - exec_output += new_exec_output - if default: - (new_header_output, - new_decoder_output, - new_decode_block, - new_exec_output) = builder(name, Name, *default) - header_output += new_header_output - decoder_output += new_decoder_output - decode_block += '\tdefault: %s\n' % new_decode_block - exec_output += new_exec_output - decode_block += '}\n' - return (header_output, decoder_output, decode_block, exec_output) -}}; - -let {{ - class OpType(object): - parser = re.compile(r"(?P<tag>[A-Z][A-Z]*)(?P<size>[a-z][a-z]*)|(r(?P<reg>[A-Za-z0-9][A-Za-z0-9]*))") - def __init__(self, opTypeString): - match = OpType.parser.search(opTypeString) - if match == None: - raise Exception, "Problem parsing operand type %s" % opTypeString - self.reg = match.group("reg") - self.tag = match.group("tag") - self.size = match.group("size") + # These are used when setting up microops so that they can specialize their + # base class template properly. + RegOpType = "RegisterOperand" + ImmOpType = "ImmediateOperand" }}; let {{ - - # This function specializes the given piece of code to use a particular - # set of argument types described by "opTypes". These are "implemented" - # in reverse order. - def specializeInst(name, Name, code, opTypes): - opNum = len(opTypes) - 1 - while len(opTypes): - # print "Building a composite op with tags", opTypes - # print "And code", code - opNum = len(opTypes) - 1 - # A regular expression to find the operand placeholders we're - # interested in. - opRe = re.compile("\\^(?P<operandNum>%d)(?=[^0-9]|$)" % opNum) - - # Parse the operand type strign we're working with - opType = OpType(opTypes[opNum]) - - if opType.reg: - #Figure out what to do with fixed register operands - if opType.reg in ("Ax", "Bx", "Cx", "Dx"): - code = opRe.sub("%%{INTREG_R%s}" % opType.reg.upper(), code) - elif opType.reg == "Al": - # We need a way to specify register width - code = opRe.sub("%{INTREG_RAX}", code) - else: - print "Didn't know how to encode fixed register %s!" % opType.reg - elif opType.tag == None or opType.size == None: - raise Exception, "Problem parsing operand tag: %s" % opType.tag - elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"): - # Use the "reg" field of the ModRM byte to select the register - code = opRe.sub("%{(uint8_t)MODRM_REG}", code) - elif opType.tag in ("E", "Q", "W"): - # This might refer to memory or to a register. We need to - # divide it up farther. - regCode = opRe.sub("%{(uint8_t)MODRM_RM}", code) - regTypes = copy.copy(opTypes) - regTypes.pop(-1) - # This needs to refer to memory, but we'll fill in the details - # later. It needs to take into account unaligned memory - # addresses. - memCode = opRe.sub("%0", code) - memTypes = copy.copy(opTypes) - memTypes.pop(-1) - return doSplitDecode(name, Name, specializeInst, "MODRM_MOD", - {"3" : (regCode, regTypes)}, (memCode, memTypes)) - elif opType.tag in ("I", "J"): - # Immediates are already in the instruction, so don't leave in - # those parameters - code = opRe.sub("${IMMEDIATE}", code) - elif opType.tag == "M": - # This needs to refer to memory, but we'll fill in the details - # later. It needs to take into account unaligned memory - # addresses. - code = opRe.sub("%0", code) - elif opType.tag in ("PR", "R", "VR"): - # There should probably be a check here to verify that mod - # is equal to 11b - code = opRe.sub("%{(uint8_t)MODRM_RM}", code) - else: - raise Exception, "Unrecognized tag %s." % opType.tag - opTypes.pop(-1) - - # At this point, we've built up "code" to have all the necessary extra - # instructions needed to implement whatever types of operands were - # specified. Now we'll assemble it it into a microOp sequence. - ops = assembleMicro(code) - - # Build a macroop to contain the sequence of microops we've - # constructed. The decode block will be used to fill in our - # inner decode structure, and the rest will be concatenated and - # passed back. - return genInst(name, Name, ops) -}}; - -//////////////////////////////////////////////////////////////////// -// -// The microcode assembler -// - -let {{ class MicroOpStatement(object): def __init__(self): self.className = '' @@ -242,19 +107,9 @@ let {{ return 'new %s%s(machInst%s%s)' % (self.className, signature, self.microFlagsText(microFlags), args) }}; -let {{ - def buildLabelDict(ops): - labels = {} - micropc = 0 - for op in ops: - if op.label: - labels[op.label] = count - micropc += 1 - return labels -}}; - let{{ - def assembleMicro(code): + def assembleMicro(name, Name, code): + # This function takes in a block of microcode assembly and returns # a python list of objects which describe it. @@ -341,7 +196,13 @@ let{{ lineMatch = lineRe.search(code) # Decode the labels into displacements - labels = buildLabelDict(statements) + + labels = {} + micropc = 0 + for statement in statements: + if statement.label: + labels[statement.label] = count + micropc += 1 micropc = 0 for statement in statements: for arg in statement.args: @@ -353,5 +214,15 @@ let{{ # micropc + 1 + displacement. arg["operandImm"] = labels[arg["operandLabel"]] - micropc - 1 micropc += 1 - return statements + + # If we can implement this instruction with exactly one microop, just + # use that directly. + if len(statements) == 1: + decode_block = "return %s;" % \ + statements[0].getAllocator() + return ('', '', decode_block, '') + else: + # Build a macroop to contain the sequence of microops we've + # been given. + return genMacroOp(name, Name, statements) }}; diff --git a/src/arch/x86/isa/microops/base.isa b/src/arch/x86/isa/microops/base.isa index b1351d999..4254994f3 100644 --- a/src/arch/x86/isa/microops/base.isa +++ b/src/arch/x86/isa/microops/base.isa @@ -63,12 +63,15 @@ output header {{ }; }}; -//A class which is the base of all x86 micro ops it provides a function to +//A class which is the base of all x86 micro ops. It provides a function to //set necessary flags appropriately. output header {{ class X86MicroOpBase : public X86StaticInst { protected: + uint8_t opSize; + uint8_t addrSize; + X86MicroOpBase(bool isMicro, bool isDelayed, bool isFirst, bool isLast, const char *mnem, ExtMachInst _machInst, @@ -94,6 +97,7 @@ def template BaseMicroOpTemplateDeclare {{ let {{ def buildBaseMicroOpTemplate(Name, numParams): + assert(numParams > 0) signature = "<" signature += "int SignatureOperandTypeSpecifier0" for count in xrange(1,numParams): @@ -102,10 +106,9 @@ let {{ signature += ">" subs = {"signature" : signature, "class_name" : Name} return BaseMicroOpTemplateDeclare.subst(subs) +}}; - RegOpType = "RegisterOperand" - ImmOpType = "ImmediateOperand" - +let {{ def buildMicroOpTemplateDict(*params): signature = "<" if len(params): diff --git a/src/arch/x86/isa/specialize.isa b/src/arch/x86/isa/specialize.isa new file mode 100644 index 000000000..9cac09770 --- /dev/null +++ b/src/arch/x86/isa/specialize.isa @@ -0,0 +1,172 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2007 The Hewlett-Packard Development Company +// All rights reserved. +// +// Redistribution and use of this software in source and binary forms, +// with or without modification, are permitted provided that the +// following conditions are met: +// +// The software must be used only for Non-Commercial Use which means any +// use which is NOT directed to receiving any direct monetary +// compensation for, or commercial advantage from such use. Illustrative +// examples of non-commercial use are academic research, personal study, +// teaching, education and corporate research & development. +// Illustrative examples of commercial use are distributing products for +// commercial advantage and providing services using the software for +// commercial advantage. +// +// If you wish to use this software or functionality therein that may be +// covered by patents for commercial use, please contact: +// Director of Intellectual Property Licensing +// Office of Strategy and Technology +// Hewlett-Packard Company +// 1501 Page Mill Road +// Palo Alto, California 94304 +// +// Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. Redistributions +// in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or +// other materials provided with the distribution. Neither the name of +// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. No right of +// sublicense is granted herewith. Derivatives of the software and +// output created using the software may be prepared, but only for +// Non-Commercial Uses. Derivatives of the software may be shared with +// others provided: (i) the others agree to abide by the list of +// conditions herein which includes the Non-Commercial Use restrictions; +// and (ii) such Derivatives of the software include the above copyright +// notice to acknowledge the contribution from this software where +// applicable, this list of conditions and the disclaimer below. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Code to "specialize" a microcode sequence to use a particular +// variety of operands +// + +let {{ + # This code builds up a decode block which decodes based on switchval. + # vals is a dict which matches case values with what should be decoded to. + # builder is called on the exploded contents of "vals" values to generate + # whatever code should be used. + def doSplitDecode(name, Name, builder, switchVal, vals, default = None): + header_output = '' + decoder_output = '' + decode_block = 'switch(%s) {\n' % switchVal + exec_output = '' + for (val, todo) in vals.items(): + (new_header_output, + new_decoder_output, + new_decode_block, + new_exec_output) = builder(name, Name, *todo) + header_output += new_header_output + decoder_output += new_decoder_output + decode_block += '\tcase %s: %s\n' % (val, new_decode_block) + exec_output += new_exec_output + if default: + (new_header_output, + new_decoder_output, + new_decode_block, + new_exec_output) = builder(name, Name, *default) + header_output += new_header_output + decoder_output += new_decoder_output + decode_block += '\tdefault: %s\n' % new_decode_block + exec_output += new_exec_output + decode_block += '}\n' + return (header_output, decoder_output, decode_block, exec_output) +}}; + +let {{ + class OpType(object): + parser = re.compile(r"(?P<tag>[A-Z][A-Z]*)(?P<size>[a-z][a-z]*)|(r(?P<reg>[A-Za-z0-9][A-Za-z0-9]*))") + def __init__(self, opTypeString): + match = OpType.parser.search(opTypeString) + if match == None: + raise Exception, "Problem parsing operand type %s" % opTypeString + self.reg = match.group("reg") + self.tag = match.group("tag") + self.size = match.group("size") + + # This function specializes the given piece of code to use a particular + # set of argument types described by "opTypes". These are "implemented" + # in reverse order. + def specializeInst(name, Name, code, opTypes): + opNum = len(opTypes) - 1 + while len(opTypes): + # print "Building a composite op with tags", opTypes + # print "And code", code + opNum = len(opTypes) - 1 + # A regular expression to find the operand placeholders we're + # interested in. + opRe = re.compile("\\^(?P<operandNum>%d)(?=[^0-9]|$)" % opNum) + + # Parse the operand type strign we're working with + opType = OpType(opTypes[opNum]) + + if opType.reg: + #Figure out what to do with fixed register operands + if opType.reg in ("Ax", "Bx", "Cx", "Dx"): + code = opRe.sub("%%{INTREG_R%s}" % opType.reg.upper(), code) + elif opType.reg == "Al": + # We need a way to specify register width + code = opRe.sub("%{INTREG_RAX}", code) + else: + print "Didn't know how to encode fixed register %s!" % opType.reg + elif opType.tag == None or opType.size == None: + raise Exception, "Problem parsing operand tag: %s" % opType.tag + elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"): + # Use the "reg" field of the ModRM byte to select the register + code = opRe.sub("%{(uint8_t)MODRM_REG}", code) + elif opType.tag in ("E", "Q", "W"): + # This might refer to memory or to a register. We need to + # divide it up farther. + regCode = opRe.sub("%{(uint8_t)MODRM_RM}", code) + regTypes = copy.copy(opTypes) + regTypes.pop(-1) + # This needs to refer to memory, but we'll fill in the details + # later. It needs to take into account unaligned memory + # addresses. + memCode = opRe.sub("%0", code) + memTypes = copy.copy(opTypes) + memTypes.pop(-1) + return doSplitDecode(name, Name, specializeInst, "MODRM_MOD", + {"3" : (regCode, regTypes)}, (memCode, memTypes)) + elif opType.tag in ("I", "J"): + # Immediates are already in the instruction, so don't leave in + # those parameters + code = opRe.sub("${IMMEDIATE}", code) + elif opType.tag == "M": + # This needs to refer to memory, but we'll fill in the details + # later. It needs to take into account unaligned memory + # addresses. + code = opRe.sub("%0", code) + elif opType.tag in ("PR", "R", "VR"): + # There should probably be a check here to verify that mod + # is equal to 11b + code = opRe.sub("%{(uint8_t)MODRM_RM}", code) + else: + raise Exception, "Unrecognized tag %s." % opType.tag + opTypes.pop(-1) + + # At this point, we've built up "code" to have all the necessary extra + # instructions needed to implement whatever types of operands were + # specified. Now we'll assemble it it into a StaticInst. + return assembleMicro(name, Name, code) +}}; diff --git a/src/arch/x86/predecoder.cc b/src/arch/x86/predecoder.cc index 80971e7cf..573012ee6 100644 --- a/src/arch/x86/predecoder.cc +++ b/src/arch/x86/predecoder.cc @@ -117,37 +117,33 @@ namespace X86ISA //Operand size override prefixes case OperandSizeOverride: DPRINTF(Predecoder, "Found operand size override prefix.\n"); + emi.legacy.op = true; break; case AddressSizeOverride: DPRINTF(Predecoder, "Found address size override prefix.\n"); + emi.legacy.addr = true; break; //Segment override prefixes case CSOverride: - DPRINTF(Predecoder, "Found cs segment override.\n"); - break; case DSOverride: - DPRINTF(Predecoder, "Found ds segment override.\n"); - break; case ESOverride: - DPRINTF(Predecoder, "Found es segment override.\n"); - break; case FSOverride: - DPRINTF(Predecoder, "Found fs segment override.\n"); - break; case GSOverride: - DPRINTF(Predecoder, "Found gs segment override.\n"); - break; case SSOverride: - DPRINTF(Predecoder, "Found ss segment override.\n"); + DPRINTF(Predecoder, "Found segment override.\n"); + emi.legacy.seg = prefix; break; case Lock: DPRINTF(Predecoder, "Found lock prefix.\n"); + emi.legacy.lock = true; break; case Rep: DPRINTF(Predecoder, "Found rep prefix.\n"); + emi.legacy.rep = true; break; case Repne: DPRINTF(Predecoder, "Found repne prefix.\n"); + emi.legacy.repne = true; break; case RexPrefix: DPRINTF(Predecoder, "Found Rex prefix %#x.\n", nextByte); @@ -198,16 +194,36 @@ namespace X86ISA displacementCollected = 0; emi.displacement = 0; + //Figure out the effective operand size. This can be overriden to + //a fixed value at the decoder level. + if(/*FIXME long mode*/1) + { + if(emi.rex && emi.rex.w) + emi.opSize = 3; // 64 bit operand size + else if(emi.legacy.op) + emi.opSize = 1; // 16 bit operand size + else + emi.opSize = 2; // 32 bit operand size + } + else if(/*FIXME default 32*/1) + { + if(emi.legacy.op) + emi.opSize = 1; // 16 bit operand size + else + emi.opSize = 2; // 32 bit operand size + } + else // 16 bit default operand size + { + if(emi.legacy.op) + emi.opSize = 2; // 32 bit operand size + else + emi.opSize = 1; // 16 bit operand size + } + //Figure out how big of an immediate we'll retreive based //on the opcode. - int immType = ImmediateType[ - emi.opcode.num - 1][nextByte]; - if(0) //16 bit mode - immediateSize = ImmediateTypeToSize[0][immType]; - else if(!(emi.rex & 0x4)) //32 bit mode - immediateSize = ImmediateTypeToSize[1][immType]; - else //64 bit mode - immediateSize = ImmediateTypeToSize[2][immType]; + int immType = ImmediateType[emi.opcode.num - 1][nextByte]; + immediateSize = SizeTypeToSize[emi.opSize - 1][immType]; //Determine what to expect next if (UsesModRM[emi.opcode.num - 1][nextByte]) { @@ -351,6 +367,16 @@ namespace X86ISA if(immediateSize == immediateCollected) { + //XXX Warning! The following is an observed pattern and might + //not always be true! + + //Instructions which use 64 bit operands but 32 bit immediates + //need to have the immediate sign extended to 64 bits. + //Instructions which use true 64 bit immediates won't be + //affected, and instructions that use true 32 bit immediates + //won't notice. + if(immediateSize == 4) + emi.immediate = sext<32>(emi.immediate); DPRINTF(Predecoder, "Collected immediate %#x.\n", emi.immediate); emiIsReady = true; diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh index 1df17d6d2..6562ab9f5 100644 --- a/src/arch/x86/predecoder.hh +++ b/src/arch/x86/predecoder.hh @@ -73,7 +73,7 @@ namespace X86ISA static const uint8_t Prefixes[256]; static const uint8_t UsesModRM[2][256]; static const uint8_t ImmediateType[2][256]; - static const uint8_t ImmediateTypeToSize[3][10]; + static const uint8_t SizeTypeToSize[3][10]; protected: ThreadContext * tc; diff --git a/src/arch/x86/predecoder_tables.cc b/src/arch/x86/predecoder_tables.cc index f233ad234..38b9c57a3 100644 --- a/src/arch/x86/predecoder_tables.cc +++ b/src/arch/x86/predecoder_tables.cc @@ -141,7 +141,7 @@ namespace X86ISA } }; - enum ImmediateTypes { + enum SizeType { NoImm, NI = NoImm, ByteImm, @@ -158,19 +158,19 @@ namespace X86ISA VW = VWordImm, ZWordImm, ZW = ZWordImm, - Pointer, - PO = Pointer, //The enter instruction takes -2- immediates for a total of 3 bytes Enter, - EN = Enter + EN = Enter, + Pointer, + PO = Pointer }; - const uint8_t Predecoder::ImmediateTypeToSize[3][10] = + const uint8_t Predecoder::SizeTypeToSize[3][10] = { -// noimm byte word dword qword oword vword zword enter - {0, 1, 2, 4, 8, 16, 2, 2, 3, 4}, //16 bit - {0, 1, 2, 4, 8, 16, 4, 4, 3, 6}, //32 bit - {0, 1, 2, 4, 8, 16, 4, 8, 3, 0} //64 bit +// noimm byte word dword qword oword vword zword enter pointer + {0, 1, 2, 4, 8, 16, 2, 2, 3, 4 }, //16 bit + {0, 1, 2, 4, 8, 16, 4, 4, 3, 6 }, //32 bit + {0, 1, 2, 4, 8, 16, 4, 8, 3, 0 } //64 bit }; //This table determines the immediate type. The first index is the diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh index cdac3c00e..022f20ee5 100644 --- a/src/arch/x86/types.hh +++ b/src/arch/x86/types.hh @@ -70,25 +70,31 @@ namespace X86ISA typedef uint64_t MachInst; enum Prefixes { - NoOverride = 0, - CSOverride = 1, - DSOverride = 2, - ESOverride = 3, - FSOverride = 4, - GSOverride = 5, - SSOverride = 6, - //The Rex prefix obviously doesn't fit in with the above, but putting - //it here lets us save double the space the enums take up. - RexPrefix = 7, + NoOverride, + CSOverride, + DSOverride, + ESOverride, + FSOverride, + GSOverride, + SSOverride, + RexPrefix, + OperandSizeOverride, + AddressSizeOverride, + Lock, + Rep, + Repne + }; + + BitUnion8(LegacyPrefixVector) + Bitfield<7> repne; + Bitfield<6> rep; + Bitfield<5> lock; + Bitfield<4> addr; + Bitfield<3> op; //There can be only one segment override, so they share the //first 3 bits in the legacyPrefixes bitfield. - SegmentOverride = 0x7, - OperandSizeOverride = 8, - AddressSizeOverride = 16, - Lock = 32, - Rep = 64, - Repne = 128 - }; + Bitfield<2,0> seg; + EndBitUnion(LegacyPrefixVector) BitUnion8(ModRM) Bitfield<7,6> mod; @@ -118,7 +124,7 @@ namespace X86ISA struct ExtMachInst { //Prefixes - uint8_t legacy; + LegacyPrefixVector legacy; Rex rex; //This holds all of the bytes of the opcode struct @@ -140,6 +146,10 @@ namespace X86ISA //Immediate fields uint64_t immediate; uint64_t displacement; + + //The effective operand size. + uint8_t opSize; + //The }; inline static std::ostream & diff --git a/src/arch/x86/utility.hh b/src/arch/x86/utility.hh index e0bd09515..1c98e7fbc 100644 --- a/src/arch/x86/utility.hh +++ b/src/arch/x86/utility.hh @@ -78,7 +78,8 @@ namespace __hash_namespace { ((uint64_t)emi.opcode.prefixA << 16) | ((uint64_t)emi.opcode.prefixB << 8) | ((uint64_t)emi.opcode.op)) ^ - emi.immediate ^ emi.displacement; + emi.immediate ^ emi.displacement ^ + emi.opSize; }; }; } diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 6c6d90076..eed05c2f1 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -877,6 +877,11 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) effAddrValid = true; physEffAddr = req->getPaddr(); memReqFlags = req->getFlags(); + + if (req->isCondSwap()) { + assert(res); + req->setExtraData(*res); + } #if 0 if (cpu->system->memctrl->badaddr(physEffAddr)) { fault = TheISA::genMachineCheckFault(); diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index e1b27048d..f24de20d9 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -289,15 +289,19 @@ class LSQUnit { struct SQEntry { /** Constructs an empty store queue entry. */ SQEntry() - : inst(NULL), req(NULL), size(0), data(0), + : inst(NULL), req(NULL), size(0), canWB(0), committed(0), completed(0) - { } + { + bzero(data, sizeof(data)); + } /** Constructs a store queue entry for a given instruction. */ SQEntry(DynInstPtr &_inst) - : inst(_inst), req(NULL), size(0), data(0), + : inst(_inst), req(NULL), size(0), canWB(0), committed(0), completed(0) - { } + { + bzero(data, sizeof(data)); + } /** The store instruction. */ DynInstPtr inst; @@ -306,7 +310,7 @@ class LSQUnit { /** The size of the store. */ int size; /** The store data. */ - IntReg data; + char data[sizeof(IntReg)]; /** Whether or not the store can writeback. */ bool canWB; /** Whether or not the store is committed. */ @@ -554,22 +558,14 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) if ((store_has_lower_limit && store_has_upper_limit)) { // Get shift amount for offset into the store's data. int shift_amt = req->getVaddr() & (store_size - 1); - // @todo: Magic number, assumes byte addressing - shift_amt = shift_amt << 3; - - // Cast this to type T? - data = storeQueue[store_idx].data >> shift_amt; - // When the data comes from the store queue entry, it's in host - // order. When it gets sent to the load, it needs to be in guest - // order so when the load converts it again, it ends up back - // in host order like the inst expects. - data = TheISA::htog(data); + memcpy(&data, storeQueue[store_idx].data + shift_amt, sizeof(T)); assert(!load_inst->memData); load_inst->memData = new uint8_t[64]; - memcpy(load_inst->memData, &data, req->getSize()); + memcpy(load_inst->memData, + storeQueue[store_idx].data + shift_amt, req->getSize()); DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " "addr %#x, data %#x\n", @@ -716,7 +712,10 @@ LSQUnit<Impl>::write(Request *req, T &data, int store_idx) storeQueue[store_idx].req = req; storeQueue[store_idx].size = sizeof(T); - storeQueue[store_idx].data = data; + assert(sizeof(T) <= sizeof(storeQueue[store_idx].data)); + + T gData = htog(data); + memcpy(storeQueue[store_idx].data, &gData, sizeof(T)); // This function only writes the data to the store queue, so no fault // can happen here. diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 2aa0d6b6a..44e2cea76 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -645,22 +645,10 @@ LSQUnit<Impl>::writebackStores() assert(!inst->memData); inst->memData = new uint8_t[64]; - TheISA::IntReg convertedData = - TheISA::htog(storeQueue[storeWBIdx].data); - - //FIXME This is a hack to get SPARC working. It, along with endianness - //in the memory system in general, need to be straightened out more - //formally. The problem is that the data's endianness is swapped when - //it's in the 64 bit data field in the store queue. The data that you - //want won't start at the beginning of the field anymore unless it was - //a 64 bit access. - memcpy(inst->memData, - (uint8_t *)&convertedData + - (TheISA::ByteOrderDiffers ? - (sizeof(TheISA::IntReg) - req->getSize()) : 0), - req->getSize()); - - PacketPtr data_pkt = new Packet(req, MemCmd::WriteReq, + memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); + + MemCmd command = req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq; + PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast); data_pkt->dataStatic(inst->memData); @@ -677,7 +665,7 @@ LSQUnit<Impl>::writebackStores() inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. - if (req->isLocked()) { + if (inst->isStoreConditional()) { // Disable recording the result temporarily. Writing to // misc regs normally updates the result, but this is not // the desired behavior when handling store conditionals. diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index bbc69fc96..b5b1cd021 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -174,7 +174,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -189,7 +189,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -204,7 +204,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -217,7 +217,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -232,11 +232,11 @@ class PhysRegFile MiscReg readMiscReg(int misc_reg, unsigned thread_id) { - return miscRegs[thread_id].readReg(misc_reg, - cpu->tcBase(thread_id)); + return miscRegs[thread_id].readReg(misc_reg, cpu->tcBase(thread_id)); } - void setMiscRegNoEffect(int misc_reg, const MiscReg &val, unsigned thread_id) + void setMiscRegNoEffect(int misc_reg, + const MiscReg &val, unsigned thread_id) { miscRegs[thread_id].setRegNoEffect(misc_reg, val); } diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index ec630b31e..431705e19 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -996,7 +996,12 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid) if (src_reg < TheISA::FP_Base_DepTag) { flat_src_reg = TheISA::flattenIntIndex(inst->tcBase(), src_reg); DPRINTF(Rename, "Flattening index %d to %d.\n", (int)src_reg, (int)flat_src_reg); + } else { + // Floating point and Miscellaneous registers need their indexes + // adjusted to account for the expanded number of flattened int regs. + flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; } + inst->flattenSrcReg(src_idx, flat_src_reg); // Look up the source registers to get the phys. register they've @@ -1033,8 +1038,13 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid) RegIndex dest_reg = inst->destRegIdx(dest_idx); RegIndex flat_dest_reg = dest_reg; if (dest_reg < TheISA::FP_Base_DepTag) { + // Integer registers are flattened. flat_dest_reg = TheISA::flattenIntIndex(inst->tcBase(), dest_reg); DPRINTF(Rename, "Flattening index %d to %d.\n", (int)dest_reg, (int)flat_dest_reg); + } else { + // Floating point and Miscellaneous registers need their indexes + // adjusted to account for the expanded number of flattened int regs. + flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; } inst->flattenDestReg(dest_idx, flat_dest_reg); |