18 files changed, 404 insertions, 309 deletions
diff --git a/src/arch/sparc/isa/formats/mem/swap.isa b/src/arch/sparc/isa/formats/mem/swap.isa
index 818597a84..b71542a2b 100644
--- a/src/arch/sparc/isa/formats/mem/swap.isa
+++ b/src/arch/sparc/isa/formats/mem/swap.isa
@@ -137,7 +137,7 @@ def format Swap(code, postacc_code, mem_flags, *opt_flags) {{
      decoder_output,
      exec_output,
      decode_block) = doMemFormat(code, SwapFuncs, '', name, Name, flags,
-         opt_flags, postacc_code)
+         ["IsStoreConditional"], postacc_code)
 }};
 
 def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{
@@ -148,7 +148,7 @@ def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{
      decoder_output,
      exec_output,
      decode_block) = doMemFormat(code, SwapFuncs, AlternateASIPrivFaultCheck,
-         name, Name, flags, opt_flags, postacc_code)
+         name, Name, flags, ["IsStoreConditional"], postacc_code)
 }};
 
 
@@ -163,8 +163,8 @@ let {{
         decode_block = BasicDecode.subst(iop)
         microParams = {"code": code, "postacc_code" : postacc_code,
             "ea_code" : addrCalcReg, "fault_check" : faultCode}
-        exec_output = doSplitExecute(execute, name, Name, asi, opt_flags,
-                microParams);
+        exec_output = doSplitExecute(execute, name, Name, asi,
+                ["IsStoreConditional"], microParams);
         return (header_output, decoder_output, exec_output, decode_block)
 }};
 
@@ -177,7 +177,7 @@ def format CasAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{
      decoder_output,
      exec_output,
      decode_block) = doCasFormat(code, SwapFuncs, AlternateASIPrivFaultCheck,
-         name, Name, flags, opt_flags, postacc_code)
+         name, Name, flags, ["IsStoreConditional"], postacc_code)
 }};
 
 
diff --git a/src/arch/x86/isa/base.isa b/src/arch/x86/isa/base.isa
index 4776f7a7e..cd166b306 100644
--- a/src/arch/x86/isa/base.isa
+++ b/src/arch/x86/isa/base.isa
@@ -79,6 +79,13 @@ output header {{
             void printReg(std::ostream &os, int reg) const;
             void printSrcReg(std::ostream &os, int reg) const;
             void printDestReg(std::ostream &os, int reg) const;
+
+            inline uint64_t merge(uint64_t into, uint64_t val, int size) const
+            {
+                //FIXME This needs to be significantly more sophisticated
+                return val;
+            }
+
         };
 }};
 
diff --git a/src/arch/x86/isa/formats/formats.isa b/src/arch/x86/isa/formats/formats.isa
index f4e5c402f..d763c05bc 100644
--- a/src/arch/x86/isa/formats/formats.isa
+++ b/src/arch/x86/isa/formats/formats.isa
@@ -95,9 +95,6 @@
 //malfunction of the decode mechanism.
 ##include "error.isa"
 
-//Include code to build up macro op instructions
-##include "macroop.isa"
-
 //Include a format which implements a batch of instructions which do the same
 //thing on a variety of inputs
 ##include "multi.isa"
diff --git a/src/arch/x86/isa/formats/macroop.isa b/src/arch/x86/isa/macroop.isa
index 717103df1..7d41a2dea 100644
--- a/src/arch/x86/isa/formats/macroop.isa
+++ b/src/arch/x86/isa/macroop.isa
@@ -55,16 +55,20 @@
 //
 // Authors: Gabe Black
 
-////////////////////////////////////////////////////////////////////
-//
-// Instructions that do the same thing to multiple sets of arguments.
-//
+// Execute method for macroops.
+def template MacroExecPanic {{
+        Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const
+        {
+            panic("Tried to execute macroop directly!");
+            M5_DUMMY_RETURN
+        }
+}};
 
 output header {{
 
         // Base class for most macroops, except ones that need to commit as
         // they go.
-        class X86MacroInst : public X86StaticInst
+        class X86MacroInst : public StaticInst
         {
           protected:
             const uint32_t numMicroOps;
@@ -72,7 +76,7 @@ output header {{
             //Constructor.
             X86MacroInst(const char *mnem, ExtMachInst _machInst,
                     uint32_t _numMicroOps)
-                        : X86StaticInst(mnem, _machInst, No_OpClass),
+                        : StaticInst(mnem, _machInst, No_OpClass),
                         numMicroOps(_numMicroOps)
             {
                 assert(numMicroOps);
@@ -85,9 +89,6 @@ output header {{
                 delete [] microOps;
             }
 
-            std::string generateDisassembly(Addr pc,
-                const SymbolTable *symtab) const;
-
             StaticInstPtr * microOps;
 
             StaticInstPtr fetchMicroOp(MicroPC microPC)
@@ -96,21 +97,7 @@ output header {{
                 return microOps[microPC];
             }
 
-            %(BasicExecPanic)s
-        };
-
-        // Base class for macroops which commit as they go. This is for
-        // instructions which can be partially completed like those with the
-        // rep prefix. This prevents those instructions from overflowing
-        // buffers with uncommitted microops.
-        class X86RollingMacroInst : public X86MacroInst
-        {
-          protected:
-            //Constructor.
-            X86RollingMacroInst(const char *mnem, ExtMachInst _machInst,
-                    uint32_t _numMicroOps)
-                        : X86MacroInst(mnem, _machInst, numMicroOps)
-            {}
+            %(MacroExecPanic)s
         };
 }};
 
@@ -121,34 +108,24 @@ def template MacroConstructor {{
         {
                 %(constructor)s;
                 //alloc_micro_ops is the code that sets up the microOps
-                //array in the parent class. This hook will hopefully
-                //allow all that to be automated.
+                //array in the parent class.
                 %(alloc_micro_ops)s;
-                setMicroFlags();
         }
 }};
 
 let {{
-    def genMacroOp(name, Name, ops, rolling = False):
+    def genMacroOp(name, Name, opSeq):
         baseClass = 'X86MacroInst'
-        if rolling:
-            baseClass = 'X86RollingMacroInst'
-        numMicroOps = len(ops)
+        numMicroOps = len(opSeq.ops)
         allocMicroOps = ''
         micropc = 0
-        allocMicroOps += \
-            "microOps[0] = %s;\n" % \
-            op.getAllocator(True, not rolling, True, False)
-        micropc += 1
-        if numMicroOps > 2:
-            for op in ops[1:-1]:
-                allocMicroOps += \
-                    "microOps[%d] = %s;\n" % \
-                    (micropc, op.getAllocator(True, not rolling, False, False))
-                micropc += 1
-        allocMicroOps += \
-            "microOps[%d] = %s;\n" % \
-            op.getAllocator(True, not rolling, False, True)
+        for op in opSeq.ops:
+            allocMicroOps += \
+                "microOps[%d] = %s;\n" % \
+                (micropc, op.getAllocator(True, op.delayed,
+                                          micropc == 0,
+                                          micropc == numMicroOps - 1))
+            micropc += 1
         iop = InstObjParams(name, Name, baseClass,
                 {'code' : '', 'num_micro_ops' : numMicroOps,
                 'alloc_micro_ops' : allocMicroOps})
diff --git a/src/arch/x86/isa/main.isa b/src/arch/x86/isa/main.isa
index cc3a9bee4..063d7125d 100644
--- a/src/arch/x86/isa/main.isa
+++ b/src/arch/x86/isa/main.isa
@@ -72,26 +72,55 @@
 
 namespace X86ISA;
 
-//Include the simple microcode assembler
-##include "microasm.isa"
+////////////////////////////////////////////////////////////////////
+//
+// General infrastructure code. These files provide infrastructure
+// which was developed to support x86 but isn't specific to it.
+//
 
-//Include the bitfield definitions
-##include "bitfields.isa"
+//Include code to build macroops.
+##include "macroop.isa"
 
-//Include the operand_types and operand definitions
-##include "operands.isa"
+//Include the simple microcode assembler. This will hopefully stay
+//unspecialized for x86 and can later be made available to other ISAs.
+##include "microasm.isa"
+
+////////////////////////////////////////////////////////////////////
+//
+// X86 only infrastructure code.
+//
 
-//Include the base class for x86 instructions, and some support code
+//Include the base class for x86 instructions, and some support code.
 ##include "base.isa"
 
-//Include the instruction definitions
-##include "insts/insts.isa"
+//Include code to specialize an instruction template to operate on
+//a particular set of operands. This is specific to x86 and the x86
+//microcode ISA.
+##include "specialize.isa"
+
+////////////////////////////////////////////////////////////////////
+//
+// Code which directly specifies isa components like instructions
+// microops, and the decoder.
+//
 
 //Include the definitions for the instruction formats
 ##include "formats/formats.isa"
 
-//Include the definitions of the micro ops
+//Include the operand_types and operand definitions. These are needed by
+//the microop definitions.
+##include "operands.isa"
+
+//Include the definitions of the micro ops.
+//These are StaticInst classes which stand on their own and make up an
+//internal instruction set.
 ##include "microops/microops.isa"
 
+//Include the instruction definitions which are microop assembler programs.
+##include "insts/insts.isa"
+
+//Include the bitfield definitions
+##include "bitfields.isa"
+
 //Include the decoder definition
 ##include "decoder/decoder.isa"
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index b94b55aab..23567aae9 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -57,152 +57,17 @@
 
 ////////////////////////////////////////////////////////////////////
 //
-//  Code to "specialize" a microcode sequence to use a particular
-//  variety of operands
+//  The microcode assembler
 //
 
 let {{
-    # This builds either a regular or macro op to implement the sequence of
-    # ops we give it.
-    def genInst(name, Name, ops):
-        # If we can implement this instruction with exactly one microop, just
-        # use that directly.
-        newStmnt = ''
-        if len(ops) == 1:
-            decode_block = "return (X86StaticInst *)(%s);" % \
-                            ops[0].getAllocator()
-            return ('', '', decode_block, '')
-        else:
-            # Build a macroop to contain the sequence of microops we've
-            # been given.
-            return genMacroOp(name, Name, ops)
-}};
-
-let {{
-    # This code builds up a decode block which decodes based on switchval.
-    # vals is a dict which matches case values with what should be decoded to.
-    # builder is called on the exploded contents of "vals" values to generate
-    # whatever code should be used.
-    def doSplitDecode(name, Name, builder, switchVal, vals, default = None):
-        header_output = ''
-        decoder_output = ''
-        decode_block = 'switch(%s) {\n' % switchVal
-        exec_output = ''
-        for (val, todo) in vals.items():
-            (new_header_output,
-             new_decoder_output,
-             new_decode_block,
-             new_exec_output) = builder(name, Name, *todo)
-            header_output += new_header_output
-            decoder_output += new_decoder_output
-            decode_block += '\tcase %s: %s\n' % (val, new_decode_block)
-            exec_output += new_exec_output
-        if default:
-            (new_header_output,
-             new_decoder_output,
-             new_decode_block,
-             new_exec_output) = builder(name, Name, *default)
-            header_output += new_header_output
-            decoder_output += new_decoder_output
-            decode_block += '\tdefault: %s\n' % new_decode_block
-            exec_output += new_exec_output
-        decode_block += '}\n'
-        return (header_output, decoder_output, decode_block, exec_output)
-}};
-
-let {{
-    class OpType(object):
-        parser = re.compile(r"(?P<tag>[A-Z][A-Z]*)(?P<size>[a-z][a-z]*)|(r(?P<reg>[A-Za-z0-9][A-Za-z0-9]*))")
-        def __init__(self, opTypeString):
-            match = OpType.parser.search(opTypeString)
-            if match == None:
-                raise Exception, "Problem parsing operand type %s" % opTypeString
-            self.reg = match.group("reg")
-            self.tag = match.group("tag")
-            self.size = match.group("size")
+    # These are used when setting up microops so that they can specialize their
+    # base class template properly.
+    RegOpType = "RegisterOperand"
+    ImmOpType = "ImmediateOperand"
 }};
 
 let {{
-
-    # This function specializes the given piece of code to use a particular
-    # set of argument types described by "opTypes". These are "implemented"
-    # in reverse order.
-    def specializeInst(name, Name, code, opTypes):
-        opNum = len(opTypes) - 1
-        while len(opTypes):
-            # print "Building a composite op with tags", opTypes
-            # print "And code", code
-            opNum = len(opTypes) - 1
-            # A regular expression to find the operand placeholders we're
-            # interested in.
-            opRe = re.compile("\\^(?P<operandNum>%d)(?=[^0-9]|$)" % opNum)
-
-            # Parse the operand type strign we're working with
-            opType = OpType(opTypes[opNum])
-
-            if opType.reg:
-                #Figure out what to do with fixed register operands
-                if opType.reg in ("Ax", "Bx", "Cx", "Dx"):
-                    code = opRe.sub("%%{INTREG_R%s}" % opType.reg.upper(), code)
-                elif opType.reg == "Al":
-                    # We need a way to specify register width
-                    code = opRe.sub("%{INTREG_RAX}", code)
-                else:
-                    print "Didn't know how to encode fixed register %s!" % opType.reg
-            elif opType.tag == None or opType.size == None:
-                raise Exception, "Problem parsing operand tag: %s" % opType.tag
-            elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"):
-                # Use the "reg" field of the ModRM byte to select the register
-                code = opRe.sub("%{(uint8_t)MODRM_REG}", code)
-            elif opType.tag in ("E", "Q", "W"):
-                # This might refer to memory or to a register. We need to
-                # divide it up farther.
-                regCode = opRe.sub("%{(uint8_t)MODRM_RM}", code)
-                regTypes = copy.copy(opTypes)
-                regTypes.pop(-1)
-                # This needs to refer to memory, but we'll fill in the details
-                # later. It needs to take into account unaligned memory
-                # addresses.
-                memCode = opRe.sub("%0", code)
-                memTypes = copy.copy(opTypes)
-                memTypes.pop(-1)
-                return doSplitDecode(name, Name, specializeInst, "MODRM_MOD",
-                    {"3" : (regCode, regTypes)}, (memCode, memTypes))
-            elif opType.tag in ("I", "J"):
-                # Immediates are already in the instruction, so don't leave in
-                # those parameters
-                code = opRe.sub("${IMMEDIATE}", code)
-            elif opType.tag == "M":
-                # This needs to refer to memory, but we'll fill in the details
-                # later. It needs to take into account unaligned memory
-                # addresses.
-                code = opRe.sub("%0", code)
-            elif opType.tag in ("PR", "R", "VR"):
-                # There should probably be a check here to verify that mod
-                # is equal to 11b
-                code = opRe.sub("%{(uint8_t)MODRM_RM}", code)
-            else:
-                raise Exception, "Unrecognized tag %s." % opType.tag
-            opTypes.pop(-1)
-
-        # At this point, we've built up "code" to have all the necessary extra
-        # instructions needed to implement whatever types of operands were
-        # specified. Now we'll assemble it it into a microOp sequence.
-        ops = assembleMicro(code)
-
-        # Build a macroop to contain the sequence of microops we've
-        # constructed. The decode block will be used to fill in our
-        # inner decode structure, and the rest will be concatenated and
-        # passed back.
-        return genInst(name, Name, ops)
-}};
-
-////////////////////////////////////////////////////////////////////
-//
-//  The microcode assembler
-//
-
-let {{
     class MicroOpStatement(object):
         def __init__(self):
             self.className = ''
@@ -242,19 +107,9 @@ let {{
             return 'new %s%s(machInst%s%s)' % (self.className, signature, self.microFlagsText(microFlags), args)
 }};
 
-let {{
-    def buildLabelDict(ops):
-        labels = {}
-        micropc = 0
-        for op in ops:
-            if op.label:
-                labels[op.label] = count
-            micropc += 1
-        return labels
-}};
-
 let{{
-    def assembleMicro(code):
+    def assembleMicro(name, Name, code):
+
         # This function takes in a block of microcode assembly and returns
         # a python list of objects which describe it.
 
@@ -341,7 +196,13 @@ let{{
             lineMatch = lineRe.search(code)
 
         # Decode the labels into displacements
-        labels = buildLabelDict(statements)
+
+        labels = {}
+        micropc = 0
+        for statement in statements:
+            if statement.label:
+                labels[statement.label] = count
+            micropc += 1
         micropc = 0
         for statement in statements:
             for arg in statement.args:
@@ -353,5 +214,15 @@ let{{
                     # micropc + 1 + displacement.
                     arg["operandImm"] = labels[arg["operandLabel"]] - micropc - 1
             micropc += 1
-        return statements
+
+        # If we can implement this instruction with exactly one microop, just
+        # use that directly.
+        if len(statements) == 1:
+            decode_block = "return %s;" % \
+                            statements[0].getAllocator()
+            return ('', '', decode_block, '')
+        else:
+            # Build a macroop to contain the sequence of microops we've
+            # been given.
+            return genMacroOp(name, Name, statements)
 }};
diff --git a/src/arch/x86/isa/microops/base.isa b/src/arch/x86/isa/microops/base.isa
index b1351d999..4254994f3 100644
--- a/src/arch/x86/isa/microops/base.isa
+++ b/src/arch/x86/isa/microops/base.isa
@@ -63,12 +63,15 @@ output header {{
     };
 }};
 
-//A class which is the base of all x86 micro ops it provides a function to
+//A class which is the base of all x86 micro ops. It provides a function to
 //set necessary flags appropriately.
 output header {{
     class X86MicroOpBase : public X86StaticInst
     {
       protected:
+        uint8_t opSize;
+        uint8_t addrSize;
+
         X86MicroOpBase(bool isMicro, bool isDelayed,
                 bool isFirst, bool isLast,
                 const char *mnem, ExtMachInst _machInst,
@@ -94,6 +97,7 @@ def template BaseMicroOpTemplateDeclare {{
 
 let {{
     def buildBaseMicroOpTemplate(Name, numParams):
+        assert(numParams > 0)
         signature = "<"
         signature += "int SignatureOperandTypeSpecifier0"
         for count in xrange(1,numParams):
@@ -102,10 +106,9 @@ let {{
         signature += ">"
         subs = {"signature" : signature, "class_name" : Name}
         return BaseMicroOpTemplateDeclare.subst(subs)
+}};
 
-    RegOpType = "RegisterOperand"
-    ImmOpType = "ImmediateOperand"
-
+let {{
     def buildMicroOpTemplateDict(*params):
         signature = "<"
         if len(params):
diff --git a/src/arch/x86/isa/specialize.isa b/src/arch/x86/isa/specialize.isa
new file mode 100644
index 000000000..9cac09770
--- /dev/null
+++ b/src/arch/x86/isa/specialize.isa
@@ -0,0 +1,172 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+//  Code to "specialize" a microcode sequence to use a particular
+//  variety of operands
+//
+
+let {{
+    # This code builds up a decode block which decodes based on switchval.
+    # vals is a dict which matches case values with what should be decoded to.
+    # builder is called on the exploded contents of "vals" values to generate
+    # whatever code should be used.
+    def doSplitDecode(name, Name, builder, switchVal, vals, default = None):
+        header_output = ''
+        decoder_output = ''
+        decode_block = 'switch(%s) {\n' % switchVal
+        exec_output = ''
+        for (val, todo) in vals.items():
+            (new_header_output,
+             new_decoder_output,
+             new_decode_block,
+             new_exec_output) = builder(name, Name, *todo)
+            header_output += new_header_output
+            decoder_output += new_decoder_output
+            decode_block += '\tcase %s: %s\n' % (val, new_decode_block)
+            exec_output += new_exec_output
+        if default:
+            (new_header_output,
+             new_decoder_output,
+             new_decode_block,
+             new_exec_output) = builder(name, Name, *default)
+            header_output += new_header_output
+            decoder_output += new_decoder_output
+            decode_block += '\tdefault: %s\n' % new_decode_block
+            exec_output += new_exec_output
+        decode_block += '}\n'
+        return (header_output, decoder_output, decode_block, exec_output)
+}};
+
+let {{
+    class OpType(object):
+        parser = re.compile(r"(?P<tag>[A-Z][A-Z]*)(?P<size>[a-z][a-z]*)|(r(?P<reg>[A-Za-z0-9][A-Za-z0-9]*))")
+        def __init__(self, opTypeString):
+            match = OpType.parser.search(opTypeString)
+            if match == None:
+                raise Exception, "Problem parsing operand type %s" % opTypeString
+            self.reg = match.group("reg")
+            self.tag = match.group("tag")
+            self.size = match.group("size")
+
+    # This function specializes the given piece of code to use a particular
+    # set of argument types described by "opTypes". These are "implemented"
+    # in reverse order.
+    def specializeInst(name, Name, code, opTypes):
+        opNum = len(opTypes) - 1
+        while len(opTypes):
+            # print "Building a composite op with tags", opTypes
+            # print "And code", code
+            opNum = len(opTypes) - 1
+            # A regular expression to find the operand placeholders we're
+            # interested in.
+            opRe = re.compile("\\^(?P<operandNum>%d)(?=[^0-9]|$)" % opNum)
+
+            # Parse the operand type strign we're working with
+            opType = OpType(opTypes[opNum])
+
+            if opType.reg:
+                #Figure out what to do with fixed register operands
+                if opType.reg in ("Ax", "Bx", "Cx", "Dx"):
+                    code = opRe.sub("%%{INTREG_R%s}" % opType.reg.upper(), code)
+                elif opType.reg == "Al":
+                    # We need a way to specify register width
+                    code = opRe.sub("%{INTREG_RAX}", code)
+                else:
+                    print "Didn't know how to encode fixed register %s!" % opType.reg
+            elif opType.tag == None or opType.size == None:
+                raise Exception, "Problem parsing operand tag: %s" % opType.tag
+            elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"):
+                # Use the "reg" field of the ModRM byte to select the register
+                code = opRe.sub("%{(uint8_t)MODRM_REG}", code)
+            elif opType.tag in ("E", "Q", "W"):
+                # This might refer to memory or to a register. We need to
+                # divide it up farther.
+                regCode = opRe.sub("%{(uint8_t)MODRM_RM}", code)
+                regTypes = copy.copy(opTypes)
+                regTypes.pop(-1)
+                # This needs to refer to memory, but we'll fill in the details
+                # later. It needs to take into account unaligned memory
+                # addresses.
+                memCode = opRe.sub("%0", code)
+                memTypes = copy.copy(opTypes)
+                memTypes.pop(-1)
+                return doSplitDecode(name, Name, specializeInst, "MODRM_MOD",
+                    {"3" : (regCode, regTypes)}, (memCode, memTypes))
+            elif opType.tag in ("I", "J"):
+                # Immediates are already in the instruction, so don't leave in
+                # those parameters
+                code = opRe.sub("${IMMEDIATE}", code)
+            elif opType.tag == "M":
+                # This needs to refer to memory, but we'll fill in the details
+                # later. It needs to take into account unaligned memory
+                # addresses.
+                code = opRe.sub("%0", code)
+            elif opType.tag in ("PR", "R", "VR"):
+                # There should probably be a check here to verify that mod
+                # is equal to 11b
+                code = opRe.sub("%{(uint8_t)MODRM_RM}", code)
+            else:
+                raise Exception, "Unrecognized tag %s." % opType.tag
+            opTypes.pop(-1)
+
+        # At this point, we've built up "code" to have all the necessary extra
+        # instructions needed to implement whatever types of operands were
+        # specified. Now we'll assemble it it into a StaticInst.
+        return assembleMicro(name, Name, code)
+}};
diff --git a/src/arch/x86/predecoder.cc b/src/arch/x86/predecoder.cc
index 80971e7cf..573012ee6 100644
--- a/src/arch/x86/predecoder.cc
+++ b/src/arch/x86/predecoder.cc
@@ -117,37 +117,33 @@ namespace X86ISA
             //Operand size override prefixes
           case OperandSizeOverride:
             DPRINTF(Predecoder, "Found operand size override prefix.\n");
+            emi.legacy.op = true;
             break;
           case AddressSizeOverride:
             DPRINTF(Predecoder, "Found address size override prefix.\n");
+            emi.legacy.addr = true;
             break;
             //Segment override prefixes
           case CSOverride:
-            DPRINTF(Predecoder, "Found cs segment override.\n");
-            break;
           case DSOverride:
-            DPRINTF(Predecoder, "Found ds segment override.\n");
-            break;
           case ESOverride:
-            DPRINTF(Predecoder, "Found es segment override.\n");
-            break;
           case FSOverride:
-            DPRINTF(Predecoder, "Found fs segment override.\n");
-            break;
           case GSOverride:
-            DPRINTF(Predecoder, "Found gs segment override.\n");
-            break;
           case SSOverride:
-            DPRINTF(Predecoder, "Found ss segment override.\n");
+            DPRINTF(Predecoder, "Found segment override.\n");
+            emi.legacy.seg = prefix;
             break;
           case Lock:
             DPRINTF(Predecoder, "Found lock prefix.\n");
+            emi.legacy.lock = true;
             break;
           case Rep:
             DPRINTF(Predecoder, "Found rep prefix.\n");
+            emi.legacy.rep = true;
             break;
           case Repne:
             DPRINTF(Predecoder, "Found repne prefix.\n");
+            emi.legacy.repne = true;
             break;
           case RexPrefix:
             DPRINTF(Predecoder, "Found Rex prefix %#x.\n", nextByte);
@@ -198,16 +194,36 @@ namespace X86ISA
             displacementCollected = 0;
             emi.displacement = 0;
 
+            //Figure out the effective operand size. This can be overriden to
+            //a fixed value at the decoder level.
+            if(/*FIXME long mode*/1)
+            {
+                if(emi.rex && emi.rex.w)
+                    emi.opSize = 3; // 64 bit operand size
+                else if(emi.legacy.op)
+                    emi.opSize = 1; // 16 bit operand size
+                else
+                    emi.opSize = 2; // 32 bit operand size
+            }
+            else if(/*FIXME default 32*/1)
+            {
+                if(emi.legacy.op)
+                    emi.opSize = 1; // 16 bit operand size
+                else
+                    emi.opSize = 2; // 32 bit operand size
+            }
+            else // 16 bit default operand size
+            {
+                if(emi.legacy.op)
+                    emi.opSize = 2; // 32 bit operand size
+                else
+                    emi.opSize = 1; // 16 bit operand size
+            }
+
             //Figure out how big of an immediate we'll retreive based
             //on the opcode.
-            int immType = ImmediateType[
-                emi.opcode.num - 1][nextByte];
-            if(0) //16 bit mode
-                immediateSize = ImmediateTypeToSize[0][immType];
-            else if(!(emi.rex & 0x4)) //32 bit mode
-                immediateSize = ImmediateTypeToSize[1][immType];
-            else //64 bit mode
-                immediateSize = ImmediateTypeToSize[2][immType];
+            int immType = ImmediateType[emi.opcode.num - 1][nextByte];
+            immediateSize = SizeTypeToSize[emi.opSize - 1][immType];
 
             //Determine what to expect next
             if (UsesModRM[emi.opcode.num - 1][nextByte]) {
@@ -351,6 +367,16 @@ namespace X86ISA
 
         if(immediateSize == immediateCollected)
         {
+            //XXX Warning! The following is an observed pattern and might
+            //not always be true!
+
+            //Instructions which use 64 bit operands but 32 bit immediates
+            //need to have the immediate sign extended to 64 bits.
+            //Instructions which use true 64 bit immediates won't be
+            //affected, and instructions that use true 32 bit immediates
+            //won't notice.
+            if(immediateSize == 4)
+                emi.immediate = sext<32>(emi.immediate);
             DPRINTF(Predecoder, "Collected immediate %#x.\n",
                     emi.immediate);
             emiIsReady = true;
diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh
index 1df17d6d2..6562ab9f5 100644
--- a/src/arch/x86/predecoder.hh
+++ b/src/arch/x86/predecoder.hh
@@ -73,7 +73,7 @@ namespace X86ISA
         static const uint8_t Prefixes[256];
         static const uint8_t UsesModRM[2][256];
         static const uint8_t ImmediateType[2][256];
-        static const uint8_t ImmediateTypeToSize[3][10];
+        static const uint8_t SizeTypeToSize[3][10];
 
       protected:
         ThreadContext * tc;
diff --git a/src/arch/x86/predecoder_tables.cc b/src/arch/x86/predecoder_tables.cc
index f233ad234..38b9c57a3 100644
--- a/src/arch/x86/predecoder_tables.cc
+++ b/src/arch/x86/predecoder_tables.cc
@@ -141,7 +141,7 @@ namespace X86ISA
         }
     };
 
-    enum ImmediateTypes {
+    enum SizeType {
         NoImm,
         NI = NoImm,
         ByteImm,
@@ -158,19 +158,19 @@ namespace X86ISA
         VW = VWordImm,
         ZWordImm,
         ZW = ZWordImm,
-        Pointer,
-        PO = Pointer,
         //The enter instruction takes -2- immediates for a total of 3 bytes
         Enter,
-        EN = Enter
+        EN = Enter,
+        Pointer,
+        PO = Pointer
     };
 
-    const uint8_t Predecoder::ImmediateTypeToSize[3][10] =
+    const uint8_t Predecoder::SizeTypeToSize[3][10] =
     {
-//       noimm byte word dword qword oword vword zword enter
-        {0,    1,   2,   4,    8,    16,   2,    2,    3,    4}, //16 bit
-        {0,    1,   2,   4,    8,    16,   4,    4,    3,    6}, //32 bit
-        {0,    1,   2,   4,    8,    16,   4,    8,    3,    0}  //64 bit
+//       noimm byte word dword qword oword vword zword enter pointer
+        {0,    1,   2,   4,    8,    16,   2,    2,    3,    4      }, //16 bit
+        {0,    1,   2,   4,    8,    16,   4,    4,    3,    6      }, //32 bit
+        {0,    1,   2,   4,    8,    16,   4,    8,    3,    0      }  //64 bit
     };
 
     //This table determines the immediate type. The first index is the
diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh
index cdac3c00e..022f20ee5 100644
--- a/src/arch/x86/types.hh
+++ b/src/arch/x86/types.hh
@@ -70,25 +70,31 @@ namespace X86ISA
     typedef uint64_t MachInst;
 
     enum Prefixes {
-        NoOverride = 0,
-        CSOverride = 1,
-        DSOverride = 2,
-        ESOverride = 3,
-        FSOverride = 4,
-        GSOverride = 5,
-        SSOverride = 6,
-        //The Rex prefix obviously doesn't fit in with the above, but putting
-        //it here lets us save double the space the enums take up.
-        RexPrefix = 7,
+        NoOverride,
+        CSOverride,
+        DSOverride,
+        ESOverride,
+        FSOverride,
+        GSOverride,
+        SSOverride,
+        RexPrefix,
+        OperandSizeOverride,
+        AddressSizeOverride,
+        Lock,
+        Rep,
+        Repne
+    };
+
+    BitUnion8(LegacyPrefixVector)
+        Bitfield<7> repne;
+        Bitfield<6> rep;
+        Bitfield<5> lock;
+        Bitfield<4> addr;
+        Bitfield<3> op;
         //There can be only one segment override, so they share the
         //first 3 bits in the legacyPrefixes bitfield.
-        SegmentOverride = 0x7,
-        OperandSizeOverride = 8,
-        AddressSizeOverride = 16,
-        Lock = 32,
-        Rep = 64,
-        Repne = 128
-    };
+        Bitfield<2,0> seg;
+    EndBitUnion(LegacyPrefixVector)
 
     BitUnion8(ModRM)
         Bitfield<7,6> mod;
@@ -118,7 +124,7 @@ namespace X86ISA
     struct ExtMachInst
     {
         //Prefixes
-        uint8_t legacy;
+        LegacyPrefixVector legacy;
         Rex rex;
         //This holds all of the bytes of the opcode
         struct
@@ -140,6 +146,10 @@ namespace X86ISA
         //Immediate fields
         uint64_t immediate;
         uint64_t displacement;
+
+        //The effective operand size.
+        uint8_t opSize;
+        //The
     };
 
     inline static std::ostream &
diff --git a/src/arch/x86/utility.hh b/src/arch/x86/utility.hh
index e0bd09515..1c98e7fbc 100644
--- a/src/arch/x86/utility.hh
+++ b/src/arch/x86/utility.hh
@@ -78,7 +78,8 @@ namespace __hash_namespace {
                     ((uint64_t)emi.opcode.prefixA << 16) |
                     ((uint64_t)emi.opcode.prefixB << 8) |
                     ((uint64_t)emi.opcode.op)) ^
-                    emi.immediate ^ emi.displacement;
+                    emi.immediate ^ emi.displacement ^
+                    emi.opSize;
         };
     };
 }
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 6c6d90076..eed05c2f1 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -877,6 +877,11 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
         effAddrValid = true;
         physEffAddr = req->getPaddr();
         memReqFlags = req->getFlags();
+
+        if (req->isCondSwap()) {
+            assert(res);
+            req->setExtraData(*res);
+        }
 #if 0
         if (cpu->system->memctrl->badaddr(physEffAddr)) {
             fault = TheISA::genMachineCheckFault();
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index e1b27048d..f24de20d9 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -289,15 +289,19 @@ class LSQUnit {
     struct SQEntry {
         /** Constructs an empty store queue entry. */
         SQEntry()
-            : inst(NULL), req(NULL), size(0), data(0),
+            : inst(NULL), req(NULL), size(0),
               canWB(0), committed(0), completed(0)
-        { }
+        {
+            bzero(data, sizeof(data));
+        }
 
         /** Constructs a store queue entry for a given instruction. */
         SQEntry(DynInstPtr &_inst)
-            : inst(_inst), req(NULL), size(0), data(0),
+            : inst(_inst), req(NULL), size(0),
               canWB(0), committed(0), completed(0)
-        { }
+        {
+            bzero(data, sizeof(data));
+        }
 
         /** The store instruction. */
         DynInstPtr inst;
@@ -306,7 +310,7 @@ class LSQUnit {
         /** The size of the store. */
         int size;
         /** The store data. */
-        IntReg data;
+        char data[sizeof(IntReg)];
         /** Whether or not the store can writeback. */
         bool canWB;
         /** Whether or not the store is committed. */
@@ -554,22 +558,14 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
         if ((store_has_lower_limit && store_has_upper_limit)) {
             // Get shift amount for offset into the store's data.
             int shift_amt = req->getVaddr() & (store_size - 1);
-            // @todo: Magic number, assumes byte addressing
-            shift_amt = shift_amt << 3;
-
-            // Cast this to type T?
-            data = storeQueue[store_idx].data >> shift_amt;
 
-            // When the data comes from the store queue entry, it's in host
-            // order. When it gets sent to the load, it needs to be in guest
-            // order so when the load converts it again, it ends up back
-            // in host order like the inst expects.
-            data = TheISA::htog(data);
+            memcpy(&data, storeQueue[store_idx].data + shift_amt, sizeof(T));
 
             assert(!load_inst->memData);
             load_inst->memData = new uint8_t[64];
 
-            memcpy(load_inst->memData, &data, req->getSize());
+            memcpy(load_inst->memData,
+                    storeQueue[store_idx].data + shift_amt, req->getSize());
 
             DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
                     "addr %#x, data %#x\n",
@@ -716,7 +712,10 @@ LSQUnit<Impl>::write(Request *req, T &data, int store_idx)
 
     storeQueue[store_idx].req = req;
     storeQueue[store_idx].size = sizeof(T);
-    storeQueue[store_idx].data = data;
+    assert(sizeof(T) <= sizeof(storeQueue[store_idx].data));
+
+    T gData = htog(data);
+    memcpy(storeQueue[store_idx].data, &gData, sizeof(T));
 
     // This function only writes the data to the store queue, so no fault
     // can happen here.
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 2aa0d6b6a..44e2cea76 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -645,22 +645,10 @@ LSQUnit<Impl>::writebackStores()
         assert(!inst->memData);
         inst->memData = new uint8_t[64];
 
-        TheISA::IntReg convertedData =
-            TheISA::htog(storeQueue[storeWBIdx].data);
-
-        //FIXME This is a hack to get SPARC working. It, along with endianness
-        //in the memory system in general, need to be straightened out more
-        //formally. The problem is that the data's endianness is swapped when
-        //it's in the 64 bit data field in the store queue. The data that you
-        //want won't start at the beginning of the field anymore unless it was
-        //a 64 bit access.
-        memcpy(inst->memData,
-                (uint8_t *)&convertedData +
-                (TheISA::ByteOrderDiffers ?
-                 (sizeof(TheISA::IntReg) - req->getSize()) : 0),
-                req->getSize());
-
-        PacketPtr data_pkt = new Packet(req, MemCmd::WriteReq,
+        memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize());
+
+        MemCmd command = req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq;
+        PacketPtr data_pkt = new Packet(req, command,
                                         Packet::Broadcast);
         data_pkt->dataStatic(inst->memData);
 
@@ -677,7 +665,7 @@ LSQUnit<Impl>::writebackStores()
                 inst->seqNum);
 
         // @todo: Remove this SC hack once the memory system handles it.
-        if (req->isLocked()) {
+        if (inst->isStoreConditional()) {
             // Disable recording the result temporarily.  Writing to
             // misc regs normally updates the result, but this is not
             // the desired behavior when handling store conditionals.
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index bbc69fc96..b5b1cd021 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -174,7 +174,7 @@ class PhysRegFile
         // Remove the base Float reg dependency.
         reg_idx = reg_idx - numPhysicalIntRegs;
 
-        assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+        assert(reg_idx < numPhysicalFloatRegs);
 
         DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                 int(reg_idx), (uint64_t)val);
@@ -189,7 +189,7 @@ class PhysRegFile
         // Remove the base Float reg dependency.
         reg_idx = reg_idx - numPhysicalIntRegs;
 
-        assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+        assert(reg_idx < numPhysicalFloatRegs);
 
         DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                 int(reg_idx), (uint64_t)val);
@@ -204,7 +204,7 @@ class PhysRegFile
         // Remove the base Float reg dependency.
         reg_idx = reg_idx - numPhysicalIntRegs;
 
-        assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+        assert(reg_idx < numPhysicalFloatRegs);
 
         DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                 int(reg_idx), (uint64_t)val);
@@ -217,7 +217,7 @@ class PhysRegFile
         // Remove the base Float reg dependency.
         reg_idx = reg_idx - numPhysicalIntRegs;
 
-        assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+        assert(reg_idx < numPhysicalFloatRegs);
 
         DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                 int(reg_idx), (uint64_t)val);
@@ -232,11 +232,11 @@ class PhysRegFile
 
     MiscReg readMiscReg(int misc_reg, unsigned thread_id)
     {
-        return miscRegs[thread_id].readReg(misc_reg,
-                                                     cpu->tcBase(thread_id));
+        return miscRegs[thread_id].readReg(misc_reg, cpu->tcBase(thread_id));
     }
 
-    void setMiscRegNoEffect(int misc_reg, const MiscReg &val, unsigned thread_id)
+    void setMiscRegNoEffect(int misc_reg,
+            const MiscReg &val, unsigned thread_id)
     {
         miscRegs[thread_id].setRegNoEffect(misc_reg, val);
     }
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index ec630b31e..431705e19 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -996,7 +996,12 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
         if (src_reg < TheISA::FP_Base_DepTag) {
             flat_src_reg = TheISA::flattenIntIndex(inst->tcBase(), src_reg);
             DPRINTF(Rename, "Flattening index %d to %d.\n", (int)src_reg, (int)flat_src_reg);
+        } else {
+            // Floating point and Miscellaneous registers need their indexes
+            // adjusted to account for the expanded number of flattened int regs.
+            flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
         }
+
         inst->flattenSrcReg(src_idx, flat_src_reg);
 
         // Look up the source registers to get the phys. register they've
@@ -1033,8 +1038,13 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
         RegIndex dest_reg = inst->destRegIdx(dest_idx);
         RegIndex flat_dest_reg = dest_reg;
         if (dest_reg < TheISA::FP_Base_DepTag) {
+            // Integer registers are flattened.
             flat_dest_reg = TheISA::flattenIntIndex(inst->tcBase(), dest_reg);
             DPRINTF(Rename, "Flattening index %d to %d.\n", (int)dest_reg, (int)flat_dest_reg);
+        } else {
+            // Floating point and Miscellaneous registers need their indexes
+            // adjusted to account for the expanded number of flattened int regs.
+            flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
         }
 
         inst->flattenDestReg(dest_idx, flat_dest_reg);