65 files changed, 1651 insertions, 751 deletions
diff --git a/src/SConscript b/src/SConscript
index 5efd2f794..34c5453b7 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -30,28 +30,71 @@
 
 import os
 import sys
+import zipfile
 
+from os.path import basename
 from os.path import join as joinpath
 
+import SCons
+
 # This file defines how to build a particular configuration of M5
 # based on variable settings in the 'env' build environment.
 
 Import('*')
 
+# Children need to see the environment
+Export('env')
+
+########################################################################
+# Code for adding source files
+#
 sources = []
-def Source(*args):
-    for arg in args:
-        if isinstance(arg, (list, tuple)):
-            # Recurse to load a list
-            Source(*arg)
-        elif isinstance(arg, str):
-            sources.extend([ File(f) for f in Split(arg) ])
-        else:
-            sources.append(File(arg))
+def Source(source):
+    if isinstance(source, SCons.Node.FS.File):
+        sources.append(source)
+    else:
+        sources.append(File(source))
 
-Export('env')
+# Children should have access
 Export('Source')
 
+########################################################################
+# Code for adding python objects
+#
+py_sources = []
+py_source_packages = {}
+def PySource(package, source):
+    if not isinstance(source, SCons.Node.FS.File):
+        source = File(source)
+    py_source_packages[source] = package
+    py_sources.append(source)
+
+sim_objects = []
+def SimObject(source):
+    if not isinstance(source, SCons.Node.FS.File):
+        source = File(source)
+    PySource('m5.objects', source)
+    modname = basename(str(source))
+    sim_objects.append(modname)
+
+swig_sources = []
+swig_source_packages = {}
+def SwigSource(package, source):
+    if not isinstance(source, SCons.Node.FS.File):
+        source = File(source)
+    swig_source_packages[source] = package
+    swig_sources.append(source)
+
+# Children should have access
+Export('PySource')
+Export('SimObject')
+Export('SwigSource')
+
+########################################################################
+#
+# Set some compiler variables
+#
+
 # Include file paths are rooted in this directory.  SCons will
 # automatically expand '.' to refer to both the source directory and
 # the corresponding build directory to pick up generated include
@@ -61,7 +104,9 @@ env.Append(CPPPATH=Dir('.'))
 # Add a flag defining what THE_ISA should be for all compilation
 env.Append(CPPDEFINES=[('THE_ISA','%s_ISA' % env['TARGET_ISA'].upper())])
 
+########################################################################
 # Walk the tree and execute all SConscripts
+#
 scripts = []
 srcdir = env['SRCDIR']
 for root, dirs, files in os.walk(srcdir, topdown=True):
@@ -78,6 +123,132 @@ for root, dirs, files in os.walk(srcdir, topdown=True):
 for opt in env.ExportOptions:
     env.ConfigFile(opt)
 
+########################################################################
+#
+# Deal with python/swig, object code.  Collect .py files and
+# generating a zip archive that is appended to the m5 binary.
+#
+
+# Generate Python file that contains a dict specifying the current
+# build_env flags.
+def MakeDefinesPyFile(target, source, env):
+    f = file(str(target[0]), 'w')
+    print >>f, "m5_build_env = ", source[0]
+    f.close()
+
+optionDict = dict([(opt, env[opt]) for opt in env.ExportOptions])
+env.Command('python/m5/defines.py', Value(optionDict), MakeDefinesPyFile)
+PySource('m5', 'python/m5/defines.py')
+
+def MakeInfoPyFile(target, source, env):
+    f = file(str(target[0]), 'w')
+    for src in source:
+        data = ''.join(file(src.srcnode().abspath, 'r').xreadlines())
+        print >>f, "%s = %s" % (src, repr(data))
+    f.close()
+
+env.Command('python/m5/info.py',
+            [ '#/AUTHORS', '#/LICENSE', '#/README', '#/RELEASE_NOTES' ],
+            MakeInfoPyFile)
+PySource('m5', 'python/m5/info.py')
+
+def MakeObjectsInitFile(target, source, env):
+    f = file(str(target[0]), 'w')
+    print >>f, 'from m5.SimObject import *'
+    for src_path in source:
+        src_file = basename(src_path.get_contents())
+        assert(src_file.endswith('.py'))
+        src_module = src_file[:-3]
+        print >>f, 'from %s import *' % src_module
+    f.close()
+
+env.Command('python/m5/objects/__init__.py',
+            [ Value(o) for o in sim_objects],
+            MakeObjectsInitFile)
+PySource('m5.objects', 'python/m5/objects/__init__.py')
+
+swig_modules = []
+for source in swig_sources:
+    source.rfile() # Hack to cause the symlink to the .i file to be created
+    package = swig_source_packages[source]
+    filename = str(source)
+    module = basename(filename)
+
+    assert(module.endswith('.i'))
+    module = module[:-2]
+    cc_file = 'swig/%s_wrap.cc' % module
+    py_file = 'm5/internal/%s.py' % module
+
+    env.Command([cc_file, py_file], source,
+                '$SWIG $SWIGFLAGS -outdir ${TARGETS[1].dir} '
+                '-o ${TARGETS[0]} $SOURCES')
+    env.Depends(py_file, source)
+    env.Depends(cc_file, source)
+                
+    swig_modules.append(Value(module))
+    Source(cc_file)
+    PySource(package, py_file)
+
+def MakeSwigInit(target, source, env):
+    f = file(str(target[0]), 'w')
+    print >>f, 'extern "C" {'
+    for module in source:
+        print >>f, '    void init_%s();' % module.get_contents()
+    print >>f, '}'
+    print >>f, 'void init_swig() {'
+    for module in source:
+        print >>f, '    init_%s();' % module.get_contents()
+    print >>f, '}'
+    f.close()
+env.Command('python/swig/init.cc', swig_modules, MakeSwigInit)
+
+def CompilePyFile(target, source, env):
+    import py_compile
+    py_compile.compile(str(source[0]), str(target[0]))
+
+py_compiled = []
+py_arcname = {}
+py_zip_depends = []
+for source in py_sources:
+    filename = str(source)
+    package = py_source_packages[source]
+    arc_path = package.split('.') + [ basename(filename) + 'c' ]
+    zip_path = [ 'zip' ] + arc_path
+    arcname = joinpath(*arc_path)
+    zipname = joinpath(*zip_path)
+    f = File(zipname)
+
+    env.Command(f, source, CompilePyFile)
+    py_compiled.append(f)
+    py_arcname[f] = arcname
+
+    # make the zipfile depend on the archive name so that the archive
+    # is rebuilt if the name changes
+    py_zip_depends.append(Value(arcname))
+
+# Action function to build the zip archive.  Uses the PyZipFile module
+# included in the standard Python library.
+def buildPyZip(target, source, env):
+    zf = zipfile.ZipFile(str(target[0]), 'w')
+    for s in source:
+        arcname = py_arcname[s]
+        zipname = str(s)
+        zf.write(zipname, arcname)
+    zf.close()
+
+# Add the zip file target to the environment.
+env.Command('m5py.zip', py_compiled, buildPyZip)
+env.Depends('m5py.zip', py_zip_depends)
+
+########################################################################
+#
+# Define binaries.  Each different build type (debug, opt, etc.) gets
+# a slightly different build environment.
+#
+
+# List of constructed environments to pass back to SConstruct
+envList = []
+
 # This function adds the specified sources to the given build
 # environment, and returns a list of all the corresponding SCons
 # Object nodes (including an extra one for date.cc).  We explicitly
@@ -92,16 +263,6 @@ def make_objs(sources, env):
     objs.append(date_obj)
     return objs
 
-###################################################
-#
-# Define binaries.  Each different build type (debug, opt, etc.) gets
-# a slightly different build environment.
-#
-###################################################
-
-# List of constructed environments to pass back to SConstruct
-envList = []
-
 # Function to create a new build environment as clone of current
 # environment 'env' with modified object suffix and optional stripped
 # binary.  Additional keyword arguments are appended to corresponding
@@ -120,7 +281,7 @@ def makeEnv(label, objsfx, strip = False, **kwargs):
         else:
             newEnv.Command(stripped_bin, bin, 'strip $SOURCE -o $TARGET')
         bin = stripped_bin
-    targets = newEnv.Concat(exe, [bin, 'python/m5py.zip'])
+    targets = newEnv.Concat(exe, [bin, 'm5py.zip'])
     newEnv.M5Binary = targets[0]
     envList.append(newEnv)
 
diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py
index a0d671da1..4c8d0706d 100755
--- a/src/arch/isa_parser.py
+++ b/src/arch/isa_parser.py
@@ -866,7 +866,11 @@ class Format:
         context = {}
         updateExportContext()
         context.update(exportContext)
-        context.update({ 'name': name, 'Name': string.capitalize(name) })
+        if len(name):
+            Name = name[0].upper()
+            if len(name) > 1:
+                Name += name[1:]
+        context.update({ 'name': name, 'Name': Name })
         try:
             vars = self.func(self.user_code, context, *args[0], **args[1])
         except Exception, exc:
@@ -1028,7 +1032,7 @@ def substBitOps(code):
 # Template objects are format strings that allow substitution from
 # the attribute spaces of other objects (e.g. InstObjParams instances).
 
-labelRE = re.compile(r'[^%]%\(([^\)]+)\)[sd]')
+labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
 
 class Template:
     def __init__(self, t):
diff --git a/src/arch/sparc/isa/base.isa b/src/arch/sparc/isa/base.isa
index bba63f407..4339003e0 100644
--- a/src/arch/sparc/isa/base.isa
+++ b/src/arch/sparc/isa/base.isa
@@ -154,6 +154,76 @@ def template ROrImmDecode {{
     }
 }};
 
+output header {{
+    union DoubleSingle
+    {
+        double d;
+        uint64_t ui;
+        uint32_t s[2];
+        DoubleSingle(double _d) : d(_d)
+        {}
+        DoubleSingle(uint64_t _ui) : ui(_ui)
+        {}
+        DoubleSingle(uint32_t _s0, uint32_t _s1)
+        {
+            s[0] = _s0;
+            s[1] = _s1;
+        }
+    };
+}};
+
+let {{
+    def filterDoubles(code):
+        assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
+        for opName in ("Frd", "Frs1", "Frs2", "Frd_N"):
+            next_pos = 0
+            operandsREString = (r'''
+            (?<![\w\.])             # neg. lookbehind assertion: prevent partial matches
+            ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
+            (?![\w\.])             # neg. lookahead assertion: prevent partial matches
+            ''' % opName)
+            operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
+            is_src = False
+            is_dest = False
+            extension = None
+            foundOne = False
+            while 1:
+                match = operandsRE.search(code, next_pos)
+                if not match:
+                    break
+                foundOne = True
+                op = match.groups()
+                (op_full, op_base, op_ext) = op
+                is_dest_local = (assignRE.match(code, match.end()) != None)
+                is_dest = is_dest or is_dest_local
+                is_src = is_src or not is_dest_local
+                if extension and extension != op_ext:
+                    raise Exception, "Inconsistent extensions in double filter."
+                extension = op_ext
+                next_pos = match.end()
+            if foundOne:
+                # Get rid of any unwanted extension
+                code = operandsRE.sub(op_base, code)
+                is_int = False
+                member = "d"
+                if extension in ("sb", "ub", "shw", "uhw", "sw", "uw", "sdw", "udw"):
+                    is_int = True
+                    member = "ui"
+                if is_src:
+                    code = ("%s = DoubleSingle(%s_high, %s_low).%s;" % \
+                        (opName, opName, opName, member)) + code
+                if is_dest:
+                    code += '''
+                        %s_low = DoubleSingle(%s).s[1];
+                        %s_high = DoubleSingle(%s).s[0];''' % \
+                             (opName, opName, opName, opName)
+                if is_int:
+                    code = ("uint64_t %s;" % opName) + code
+                else:
+                    code = ("double %s;" % opName) + code
+        return code
+}};
+
 let {{
     def splitOutImm(code):
         matcher = re.compile(r'Rs(?P<rNum>\d)_or_imm(?P<iNum>\d+)(?P<typeQual>\.\w+)?')
diff --git a/src/arch/sparc/isa/formats/basic.isa b/src/arch/sparc/isa/formats/basic.isa
index 017f43780..7665d2d4f 100644
--- a/src/arch/sparc/isa/formats/basic.isa
+++ b/src/arch/sparc/isa/formats/basic.isa
@@ -97,6 +97,7 @@ def template BasicDecodeWithMnemonic {{
 
 // The most basic instruction format... used only for a few misc. insts
 def format BasicOperate(code, *flags) {{
+        code = filterDoubles(code)
         iop = InstObjParams(name, Name, 'SparcStaticInst', code, flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
@@ -140,6 +141,7 @@ def format FpBasic(code, *flags) {{
     fesetround(oldrnd);
 #endif
 """
+        fp_code = filterDoubles(fp_code)
         iop = InstObjParams(name, Name, 'SparcStaticInst', fp_code, flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
diff --git a/src/arch/sparc/isa/formats/mem/basicmem.isa b/src/arch/sparc/isa/formats/mem/basicmem.isa
index 751262811..2f62c7bef 100644
--- a/src/arch/sparc/isa/formats/mem/basicmem.isa
+++ b/src/arch/sparc/isa/formats/mem/basicmem.isa
@@ -71,6 +71,7 @@ let {{
 }};
 
 def format LoadAlt(code, asi, *opt_flags) {{
+        code = filterDoubles(code)
         (header_output,
          decoder_output,
          exec_output,
@@ -79,6 +80,7 @@ def format LoadAlt(code, asi, *opt_flags) {{
 }};
 
 def format StoreAlt(code, asi, *opt_flags) {{
+        code = filterDoubles(code)
         (header_output,
          decoder_output,
          exec_output,
@@ -87,6 +89,7 @@ def format StoreAlt(code, asi, *opt_flags) {{
 }};
 
 def format Load(code, *opt_flags) {{
+        code = filterDoubles(code)
         (header_output,
          decoder_output,
          exec_output,
@@ -95,6 +98,7 @@ def format Load(code, *opt_flags) {{
 }};
 
 def format Store(code, *opt_flags) {{
+        code = filterDoubles(code)
         (header_output,
          decoder_output,
          exec_output,
diff --git a/src/arch/sparc/isa/formats/mem/blockmem.isa b/src/arch/sparc/isa/formats/mem/blockmem.isa
index 499685a5c..e19016bd0 100644
--- a/src/arch/sparc/isa/formats/mem/blockmem.isa
+++ b/src/arch/sparc/isa/formats/mem/blockmem.isa
@@ -317,6 +317,7 @@ let {{
 }};
 
 def format BlockLoad(code, asi, *opt_flags) {{
+        code = filterDoubles(code)
         # We need to make sure to check the highest priority fault last.
         # That way, if other faults have been detected, they'll be overwritten
         # rather than the other way around.
@@ -329,6 +330,7 @@ def format BlockLoad(code, asi, *opt_flags) {{
 }};
 
 def format BlockStore(code, asi, *opt_flags) {{
+        code = filterDoubles(code)
         # We need to make sure to check the highest priority fault last.
         # That way, if other faults have been detected, they'll be overwritten
         # rather than the other way around.
diff --git a/src/arch/sparc/isa/formats/mem/swap.isa b/src/arch/sparc/isa/formats/mem/swap.isa
index 818597a84..b71542a2b 100644
--- a/src/arch/sparc/isa/formats/mem/swap.isa
+++ b/src/arch/sparc/isa/formats/mem/swap.isa
@@ -137,7 +137,7 @@ def format Swap(code, postacc_code, mem_flags, *opt_flags) {{
      decoder_output,
      exec_output,
      decode_block) = doMemFormat(code, SwapFuncs, '', name, Name, flags,
-         opt_flags, postacc_code)
+         ["IsStoreConditional"], postacc_code)
 }};
 
 def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{
@@ -148,7 +148,7 @@ def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{
      decoder_output,
      exec_output,
      decode_block) = doMemFormat(code, SwapFuncs, AlternateASIPrivFaultCheck,
-         name, Name, flags, opt_flags, postacc_code)
+         name, Name, flags, ["IsStoreConditional"], postacc_code)
 }};
 
 
@@ -163,8 +163,8 @@ let {{
         decode_block = BasicDecode.subst(iop)
         microParams = {"code": code, "postacc_code" : postacc_code,
             "ea_code" : addrCalcReg, "fault_check" : faultCode}
-        exec_output = doSplitExecute(execute, name, Name, asi, opt_flags,
-                microParams);
+        exec_output = doSplitExecute(execute, name, Name, asi,
+                ["IsStoreConditional"], microParams);
         return (header_output, decoder_output, exec_output, decode_block)
 }};
 
@@ -177,7 +177,7 @@ def format CasAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{
      decoder_output,
      exec_output,
      decode_block) = doCasFormat(code, SwapFuncs, AlternateASIPrivFaultCheck,
-         name, Name, flags, opt_flags, postacc_code)
+         name, Name, flags, ["IsStoreConditional"], postacc_code)
 }};
 
 
diff --git a/src/arch/sparc/isa/formats/micro.isa b/src/arch/sparc/isa/formats/micro.isa
index 82d7fb4cb..da0f97d1b 100644
--- a/src/arch/sparc/isa/formats/micro.isa
+++ b/src/arch/sparc/isa/formats/micro.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2006 The Regents of The University of Michigan
+// Copyright (c) 2006-2007 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -26,6 +26,33 @@
 //
 // Authors: Gabe Black
 
+//This delcares the initiateAcc function in memory operations
+def template MacroInitiateAcc {{
+    Fault initiateAcc(%(CPU_exec_context)s *, Trace::InstRecord *) const
+    {
+        panic("Tried to execute a macroop directly!\n");
+        return NoFault;
+    }
+}};
+
+def template MacroCompleteAcc {{
+    Fault completeAcc(PacketPtr, %(CPU_exec_context)s *,
+            Trace::InstRecord *) const
+    {
+        panic("Tried to execute a macroop directly!\n");
+        return NoFault;
+    }
+}};
+
+//This template provides the execute functions for a store
+def template MacroExecute {{
+    Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const
+    {
+        panic("Tried to execute a macroop directly!\n");
+        return NoFault;
+    }
+}};
+
 output header {{
 
         class SparcMacroInst : public SparcStaticInst
@@ -60,7 +87,9 @@ output header {{
                 return microOps[microPC];
             }
 
-            %(BasicExecPanic)s
+            %(MacroExecute)s
+            %(MacroInitiateAcc)s
+            %(MacroCompleteAcc)s
         };
 
         class SparcMicroInst : public SparcStaticInst
diff --git a/src/arch/sparc/isa/operands.isa b/src/arch/sparc/isa/operands.isa
index 038919bd1..58d616a7a 100644
--- a/src/arch/sparc/isa/operands.isa
+++ b/src/arch/sparc/isa/operands.isa
@@ -52,6 +52,16 @@ output header {{
     {
         return (regNum & (~1)) | ((regNum & 1) << 5);
     }
+
+    static inline unsigned int dfprl(unsigned int regNum)
+    {
+        return dfpr(regNum) & (~0x1);
+    }
+
+    static inline unsigned int dfprh(unsigned int regNum)
+    {
+        return dfpr(regNum) | 0x1;
+    }
 }};
 
 def operands {{
@@ -79,21 +89,43 @@ def operands {{
     # differently, they get different operands. The single precision versions
     # have an s post pended to their name.
     'Frds':		('FloatReg', 'sf', 'RD', 'IsFloating', 10),
-    'Frd':		('FloatReg', 'df', 'dfpr(RD)', 'IsFloating', 10),
+    #'Frd':		('FloatReg', 'df', 'dfpr(RD)', 'IsFloating', 10),
+    'Frd_low':		('FloatReg', 'uw', 'dfprl(RD)', 'IsFloating', 10),
+    'Frd_high':		('FloatReg', 'uw', 'dfprh(RD)', 'IsFloating', 10),
     # Each Frd_N refers to the Nth double precision register from Frd.
     # Note that this adds twice N to the register number.
-    'Frd_0':		('FloatReg', 'df', 'dfpr(RD)', 'IsFloating', 10),
-    'Frd_1':		('FloatReg', 'df', 'dfpr(RD) + 2', 'IsFloating', 10),
-    'Frd_2':		('FloatReg', 'df', 'dfpr(RD) + 4', 'IsFloating', 10),
-    'Frd_3':		('FloatReg', 'df', 'dfpr(RD) + 6', 'IsFloating', 10),
-    'Frd_4':		('FloatReg', 'df', 'dfpr(RD) + 8', 'IsFloating', 10),
-    'Frd_5':		('FloatReg', 'df', 'dfpr(RD) + 10', 'IsFloating', 10),
-    'Frd_6':		('FloatReg', 'df', 'dfpr(RD) + 12', 'IsFloating', 10),
-    'Frd_7':		('FloatReg', 'df', 'dfpr(RD) + 14', 'IsFloating', 10),
+    #'Frd_0':		('FloatReg', 'df', 'dfpr(RD)', 'IsFloating', 10),
+    'Frd_0_low':	('FloatReg', 'uw', 'dfprl(RD)', 'IsFloating', 10),
+    'Frd_0_high':	('FloatReg', 'uw', 'dfprh(RD)', 'IsFloating', 10),
+    #'Frd_1':		('FloatReg', 'df', 'dfpr(RD) + 2', 'IsFloating', 10),
+    'Frd_1_low':	('FloatReg', 'uw', 'dfprl(RD) + 2', 'IsFloating', 10),
+    'Frd_1_high':	('FloatReg', 'uw', 'dfprh(RD) + 2', 'IsFloating', 10),
+    #'Frd_2':		('FloatReg', 'df', 'dfpr(RD) + 4', 'IsFloating', 10),
+    'Frd_2_low':	('FloatReg', 'uw', 'dfprl(RD) + 4', 'IsFloating', 10),
+    'Frd_2_high':	('FloatReg', 'uw', 'dfprh(RD) + 4', 'IsFloating', 10),
+    #'Frd_3':		('FloatReg', 'df', 'dfpr(RD) + 6', 'IsFloating', 10),
+    'Frd_3_low':	('FloatReg', 'uw', 'dfprl(RD) + 6', 'IsFloating', 10),
+    'Frd_3_high':	('FloatReg', 'uw', 'dfprh(RD) + 6', 'IsFloating', 10),
+    #'Frd_4':		('FloatReg', 'df', 'dfpr(RD) + 8', 'IsFloating', 10),
+    'Frd_4_low':	('FloatReg', 'uw', 'dfprl(RD) + 8', 'IsFloating', 10),
+    'Frd_4_high':	('FloatReg', 'uw', 'dfprh(RD) + 8', 'IsFloating', 10),
+    #'Frd_5':		('FloatReg', 'df', 'dfpr(RD) + 10', 'IsFloating', 10),
+    'Frd_5_low':	('FloatReg', 'uw', 'dfprl(RD) + 10', 'IsFloating', 10),
+    'Frd_5_high':	('FloatReg', 'uw', 'dfprh(RD) + 10', 'IsFloating', 10),
+    #'Frd_6':		('FloatReg', 'df', 'dfpr(RD) + 12', 'IsFloating', 10),
+    'Frd_6_low':	('FloatReg', 'uw', 'dfprl(RD) + 12', 'IsFloating', 10),
+    'Frd_6_high':	('FloatReg', 'uw', 'dfprh(RD) + 12', 'IsFloating', 10),
+    #'Frd_7':		('FloatReg', 'df', 'dfpr(RD) + 14', 'IsFloating', 10),
+    'Frd_7_low':	('FloatReg', 'uw', 'dfprl(RD) + 14', 'IsFloating', 10),
+    'Frd_7_high':	('FloatReg', 'uw', 'dfprh(RD) + 14', 'IsFloating', 10),
     'Frs1s':		('FloatReg', 'sf', 'RS1', 'IsFloating', 11),
-    'Frs1':		('FloatReg', 'df', 'dfpr(RS1)', 'IsFloating', 11),
+    #'Frs1':		('FloatReg', 'df', 'dfpr(RS1)', 'IsFloating', 11),
+    'Frs1_low':		('FloatReg', 'uw', 'dfprl(RS1)', 'IsFloating', 11),
+    'Frs1_high':	('FloatReg', 'uw', 'dfprh(RS1)', 'IsFloating', 11),
     'Frs2s':		('FloatReg', 'sf', 'RS2', 'IsFloating', 12),
-    'Frs2':		('FloatReg', 'df', 'dfpr(RS2)', 'IsFloating', 12),
+    #'Frs2':		('FloatReg', 'df', 'dfpr(RS2)', 'IsFloating', 12),
+    'Frs2_low':		('FloatReg', 'uw', 'dfprl(RS2)', 'IsFloating', 12),
+    'Frs2_high':	('FloatReg', 'uw', 'dfprh(RS2)', 'IsFloating', 12),
     'NPC': 		('NPC', 'udw', None, ( None, None, 'IsControl' ), 31),
     'NNPC':		('NNPC', 'udw', None, (None, None, 'IsControl' ), 32),
     # Registers which are used explicitly in instructions
diff --git a/src/arch/x86/faults.hh b/src/arch/x86/faults.hh
index 6d89c273a..8c9afcdb5 100644
--- a/src/arch/x86/faults.hh
+++ b/src/arch/x86/faults.hh
@@ -65,12 +65,32 @@ namespace X86ISA
 {
     class X86Fault : public FaultBase
     {
+      protected:
+        const char * name()
+        {
+            return "generic_x86_fault";
+        }
+
         void invoke(ThreadContext * tc)
         {
             panic("X86 faults are not implemented!");
         }
     };
 
+    class UnimpInstFault : public FaultBase
+    {
+      public:
+        const char * name()
+        {
+            return "unimplemented_micro";
+        }
+
+        void invoke(ThreadContext * tc)
+        {
+            panic("Unimplemented instruction!");
+        }
+    };
+
     static inline Fault genPageTableFault(Addr va)
     {
         panic("Page table fault not implemented in x86!\n");
diff --git a/src/arch/x86/intregs.hh b/src/arch/x86/intregs.hh
index ed801cc48..562539de9 100644
--- a/src/arch/x86/intregs.hh
+++ b/src/arch/x86/intregs.hh
@@ -63,21 +63,89 @@ namespace X86ISA
     enum IntRegIndex
     {
         INTREG_RAX,
+        INTREG_EAX = INTREG_RAX,
+        INTREG_AX = INTREG_RAX,
+        INTREG_AL = INTREG_RAX,
+        INTREG_AH = INTREG_RAX,
+
         INTREG_RCX,
+        INTREG_ECX = INTREG_RCX,
+        INTREG_CX = INTREG_RCX,
+        INTREG_CL = INTREG_RCX,
+        INTREG_CH = INTREG_RCX,
+
         INTREG_RDX,
+        INTREG_EDX = INTREG_RDX,
+        INTREG_DX = INTREG_RDX,
+        INTREG_DL = INTREG_RDX,
+        INTREG_DH = INTREG_RDX,
+
         INTREG_RBX,
+        INTREG_EBX = INTREG_RBX,
+        INTREG_BX = INTREG_RBX,
+        INTREG_BL = INTREG_RBX,
+        INTREG_BH = INTREG_RBX,
+
         INTREG_RSP,
+        INTREG_ESP = INTREG_RSP,
+        INTREG_SP = INTREG_RSP,
+        INTREG_SPL = INTREG_RSP,
+
         INTREG_RBP,
+        INTREG_EBP = INTREG_RBP,
+        INTREG_BP = INTREG_RBP,
+        INTREG_BPL = INTREG_RBP,
+
         INTREG_RSI,
+        INTREG_ESI = INTREG_RSI,
+        INTREG_SI = INTREG_RSI,
+        INTREG_SIL = INTREG_RSI,
+
         INTREG_RDI,
-        INTREG_R8W,
-        INTREG_R9W,
-        INTREG_R10W,
-        INTREG_R11W,
-        INTREG_R12W,
-        INTREG_R13W,
-        INTREG_R14W,
-        INTREG_R15W,
+        INTREG_EDI = INTREG_RDI,
+        INTREG_DI = INTREG_RDI,
+        INTREG_DIL = INTREG_RDI,
+
+        INTREG_R8,
+        INTREG_R8D = INTREG_R8,
+        INTREG_R8W = INTREG_R8,
+        INTREG_R8B = INTREG_R8,
+
+        INTREG_R9,
+        INTREG_R9D = INTREG_R9,
+        INTREG_R9W = INTREG_R9,
+        INTREG_R9B = INTREG_R9,
+
+        INTREG_R10,
+        INTREG_R10D = INTREG_R10,
+        INTREG_R10W = INTREG_R10,
+        INTREG_R10B = INTREG_R10,
+
+        INTREG_R11,
+        INTREG_R11D = INTREG_R11,
+        INTREG_R11W = INTREG_R11,
+        INTREG_R11B = INTREG_R11,
+
+        INTREG_R12,
+        INTREG_R12D = INTREG_R12,
+        INTREG_R12W = INTREG_R12,
+        INTREG_R12B = INTREG_R12,
+
+        INTREG_R13,
+        INTREG_R13D = INTREG_R13,
+        INTREG_R13W = INTREG_R13,
+        INTREG_R13B = INTREG_R13,
+
+        INTREG_R14,
+        INTREG_R14D = INTREG_R14,
+        INTREG_R14W = INTREG_R14,
+        INTREG_R14B = INTREG_R14,
+
+        INTREG_R15,
+        INTREG_R15D = INTREG_R15,
+        INTREG_R15W = INTREG_R15,
+        INTREG_R15B = INTREG_R15,
+
         NUM_INTREGS
     };
 };
diff --git a/src/arch/x86/isa/base.isa b/src/arch/x86/isa/base.isa
index 4776f7a7e..eba24f709 100644
--- a/src/arch/x86/isa/base.isa
+++ b/src/arch/x86/isa/base.isa
@@ -58,6 +58,38 @@
 // Base class for sparc instructions, and some support functions
 //
 
+let {{
+    # This class will help make dealing with output a little less verbose
+    class OutputBlocks(object):
+        def __init__(self, header_output="",
+                           decoder_output="",
+                           decode_block="",
+                           exec_output=""):
+            self.header_output = header_output
+            self.decoder_output = decoder_output
+            self.decode_block = decode_block
+            self.exec_output = exec_output
+
+        def append(self, blocks):
+            if isinstance(blocks, list) or isinstance(blocks, tuple):
+                assert(len(blocks) == 4)
+                self.header_output += blocks[0]
+                self.decoder_output += blocks[1]
+                self.decode_block += blocks[2]
+                self.exec_output += blocks[3]
+            else:
+                self.header_output += blocks.header_output
+                self.decoder_output += blocks.decoder_output
+                self.decode_block += blocks.decode_block
+                self.exec_output += blocks.exec_output
+
+        def makeList(self):
+            return (self.header_output,
+                    self.decoder_output,
+                    self.decode_block,
+                    self.exec_output)
+}};
+
 output header {{
 
         /**
@@ -79,6 +111,13 @@ output header {{
             void printReg(std::ostream &os, int reg) const;
             void printSrcReg(std::ostream &os, int reg) const;
             void printDestReg(std::ostream &os, int reg) const;
+
+            inline uint64_t merge(uint64_t into, uint64_t val, int size) const
+            {
+                //FIXME This needs to be significantly more sophisticated
+                return val;
+            }
+
         };
 }};
 
diff --git a/src/arch/x86/isa/decoder/one_byte_opcodes.isa b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
index b4aeece07..4e044363b 100644
--- a/src/arch/x86/isa/decoder/one_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
@@ -61,14 +61,12 @@
 0x1: decode OPCODE_OP_TOP5 {
     format WarnUnimpl {
         0x00: decode OPCODE_OP_BOTTOM3 {
+            0x4: Inst::ADD(rAl,Ib);
+            0x5: Inst::ADD(rAx,Iz);
             0x6: push_ES();
             0x7: pop_ES();
-            default: MultiOp::add(
-                {{Add %0 %0 %1}},
-                OPCODE_OP_BOTTOM3,
-                [[Eb,Gb],[Ev,Gv],
-                 [Gb,Eb],[Gv,Ev],
-                 [Al,Ib],[rAx,Iz]]);
+            default: MultiInst::ADD(OPCODE_OP_BOTTOM3,
+                             [Eb,Gb],[Ev,Gv],[Gb,Eb],[Gv,Ev]);
         }
         0x01: decode OPCODE_OP_BOTTOM3 {
             0x0: or_Eb_Gb();
@@ -125,15 +123,13 @@
             0x7: das();
         }
         0x06: decode OPCODE_OP_BOTTOM3 {
-            0x0: xor_Eb_Gb();
-            0x1: xor_Ev_Gv();
-            0x2: xor_Gb_Eb();
-            0x3: xor_Gv_Ev();
-            0x4: xor_Al_Ib();
-            0x5: xor_rAX_Iz();
+            0x4: Inst::XOR(rAl,Ib);
+            0x5: Inst::XOR(rAx,Iz);
             0x6: M5InternalError::error(
                 {{"Tried to execute the SS segment override prefix!"}});
             0x7: aaa();
+            default: MultiInst::XOR(OPCODE_OP_BOTTOM3,
+                                    [Eb,Gb],[Ev,Gv],[Gb,Eb],[Gv,Ev]);
         }
         0x07: decode OPCODE_OP_BOTTOM3 {
             0x0: cmp_Eb_Gb();
@@ -241,11 +237,11 @@
             0x7: xchg_Ev_Gv();
         }
         0x11: decode OPCODE_OP_BOTTOM3 {
-            0x0: mov_Eb_Gb();
-            0x1: mov_Ev_Gv();
-            0x2: mov_Gb_Eb();
-            0x3: mov_Gv_Ev();
-            0x4: mov_MwRv_Sw();
+            0x0: Inst::MOV(); //mov_Eb_Gb();
+            0x1: Inst::MOV(); //mov_Ev_Gv();
+            0x2: Inst::MOV(); //mov_Gb_Eb();
+            0x3: Inst::MOV(); //mov_Gv_Ev();
+            0x4: Inst::MOV(); //mov_MwRv_Sw();
             0x5: lea_Gv_M();
             0x6: mov_Sw_MwRv();
             0x7: group10_Ev(); //Make sure this is Ev
diff --git a/src/arch/x86/isa/formats/multi.isa b/src/arch/x86/isa/formats/multi.isa
index c14e80095..8f91c249c 100644
--- a/src/arch/x86/isa/formats/multi.isa
+++ b/src/arch/x86/isa/formats/multi.isa
@@ -60,95 +60,27 @@
 // Instructions that do the same thing to multiple sets of arguments.
 //
 
-output header {{
-}};
-
-output decoder {{
-}};
-
-output exec {{
-}};
-
 let {{
-    multiops = {}
+    def doInst(name, Name, opTypeSet):
+        if not instDict.has_key(Name):
+            raise Exception, "Unrecognized instruction: %s" % Name
+        inst = instDict[Name]()
+        return inst.emit(opTypeSet)
 }};
 
-def format MultiOp(code, switchVal, opTags, *opt_flags) {{
-    # These are C++ statements to create each type of static int. Since we
-    # don't know what will be microcoded and what won't, we can't assume a
-    # particular set of arguments for the constructor.
-    instNew = []
-    orig_code = code
-    opRe = re.compile(r"%(?P<operandNum>[0-9]*)")
-    # Get all the labels out of the code and make a dict for them. We'll do
-    # this once since the position of labels shouldn't need to change at all.
-    ops = assembleMicro(code)
-    labels = buildLabelDict(ops)
-    for tagSet in opTags:
-        # A list of strings which either have the register number to use, or
-        # a piece of code for calculating it.
-        regNums = []
-        code = orig_code
-        # Build up a name for this instructions class using the argument
-        # types. Each variation will get its own name this way.
-        postfix = ''
-        for tag in tagSet:
-            postfix += '_' + tag
-
-        # Figure out what register indexes to use for each operand. This
-        # is where loads/stores could be set up. I need to distinguish
-        # between inputs and outputs.
-        # For right now, the indexes are just an increasing sequence
-        counter = 0
-        for tag in tagSet:
-            regNums.append("%d" % counter)
-            counter += 1
-
-        # Replace the placeholders %0, %1, etc., with the right register
-        # indexes.
-        opMatch = opRe.search(code)
-        while opMatch:
-            opNum = opMatch.group("operandNum")
-            opNum = int(opNum)
-            if opNum > len(regNums):
-                print "No operand type specified for operand %d!" % opNum
-                print "I should bail out here too!"
-            regNum = regNums[opNum]
-            code = opRe.sub(regNum, code, 1)
-            opMatch = opRe.search(code)
-
-        # All the loads which feed this instruction
-        loads = []
-        # All the ops that make up the instruction proper.
-        ops = assembleMicro(code)
-        # Get all the labels out and make a dict for them
-        # All the stores for this instruction's results
-        stores = []
-
-        # Various counts
-        numLoads = len(loads)
-        numOps = len(ops)
-        numStores = len(stores)
-        totalOps = numLoads + numOps + numStores
-        print "There are %d total ops" % totalOps
-
-        # If we can implement this instruction with exactly one microop, just
-        # use that directly.
-        newStmnt = ''
-        if totalOps == 1:
-            newStmnt = ops[0].getAllocator(labels)
-        else:
-            # Build up a macro op. We'll punt on this for now
-            pass
-
-        instNew.append(newStmnt)
+def format Inst(*opTypeSet) {{
+    (header_output,
+     decoder_output,
+     decode_block,
+     exce_output) = doInst(name, Name, list(opTypeSet)).makeList()
+}};
 
-    decodeBlob = 'switch(%s) {\n' % switchVal
-    counter = 0
-    for newStmnt in instNew:
-        decodeBlob += 'case %d: return (X86StaticInst *)(%s);\n' % \
-                      (counter, newStmnt)
-        counter += 1
-    decodeBlob += '}\n'
-    decode_block = decodeBlob
+def format MultiInst(switchVal, *opTypeSets) {{
+    switcher = {}
+    for (count, opTypeSet) in zip(xrange(len(opTypeSets)), opTypeSets):
+        switcher[count] = (opTypeSet,)
+    (header_output,
+     decoder_output,
+     decode_block,
+     exec_output) = doSplitDecode(name, Name, doInst, switchVal, switcher).makeList()
 }};
diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa
index 65e735b03..3440ec5da 100644
--- a/src/arch/x86/isa/includes.isa
+++ b/src/arch/x86/isa/includes.isa
@@ -83,9 +83,14 @@
 
 ////////////////////////////////////////////////////////////////////
 //
-// Output include file directives.
+// Output include file directives. Also import the python modules we
+// need for all the x86 custom decoder stuff
 //
 
+let {{
+    import copy
+}};
+
 output header {{
 #include <cstring>
 #include <sstream>
diff --git a/src/arch/x86/isa/macroop.isa b/src/arch/x86/isa/macroop.isa
new file mode 100644
index 000000000..663ec7aee
--- /dev/null
+++ b/src/arch/x86/isa/macroop.isa
@@ -0,0 +1,164 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Architecture independent
+//
+
+// Execute method for macroops.
+def template MacroExecPanic {{
+        Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const
+        {
+            panic("Tried to execute macroop directly!");
+            return NoFault;
+        }
+}};
+
+output header {{
+
+        // Base class for macroops
+        class MacroOp : public StaticInst
+        {
+          protected:
+            const uint32_t numMicroOps;
+
+            //Constructor.
+            MacroOp(const char *mnem, ExtMachInst _machInst,
+                    uint32_t _numMicroOps)
+                        : StaticInst(mnem, _machInst, No_OpClass),
+                        numMicroOps(_numMicroOps)
+            {
+                assert(numMicroOps);
+                microOps = new StaticInstPtr[numMicroOps];
+                flags[IsMacroOp] = true;
+            }
+
+            ~MacroOp()
+            {
+                delete [] microOps;
+            }
+
+            StaticInstPtr * microOps;
+
+            StaticInstPtr fetchMicroOp(MicroPC microPC)
+            {
+                assert(microPC < numMicroOps);
+                return microOps[microPC];
+            }
+
+            std::string generateDisassembly(Addr pc,
+                    const SymbolTable *symtab) const
+            {
+                return mnemonic;
+            }
+
+            %(MacroExecPanic)s
+        };
+}};
+
+// Basic instruction class declaration template.
+def template MacroDeclare {{
+        /**
+         * Static instruction class for "%(mnemonic)s".
+         */
+        class %(class_name)s : public %(base_class)s
+        {
+          public:
+            // Constructor.
+            %(class_name)s(ExtMachInst machInst);
+        };
+}};
+
+// Basic instruction class constructor template.
+def template MacroConstructor {{
+        inline %(class_name)s::%(class_name)s(ExtMachInst machInst)
+            : %(base_class)s("%(mnemonic)s", machInst, %(num_micro_ops)s)
+        {
+                %(constructor)s;
+                //alloc_micro_ops is the code that sets up the microOps
+                //array in the parent class.
+                %(alloc_micro_ops)s;
+        }
+}};
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  X86 specific
+//
+
+let {{
+    def genMacroOp(name, Name, opSeq):
+        numMicroOps = len(opSeq)
+        allocMicroOps = ''
+        micropc = 0
+        for op in opSeq:
+            allocMicroOps += \
+                "microOps[%d] = %s;\n" % \
+                (micropc, op.getAllocator('"' + name + '"', True, False, #op.delayed,
+                                          micropc == 0,
+                                          micropc == numMicroOps - 1))
+            micropc += 1
+        iop = InstObjParams(name, Name, 'MacroOp',
+                {'code' : '', 'num_micro_ops' : numMicroOps,
+                'alloc_micro_ops' : allocMicroOps})
+        header_output = MacroDeclare.subst(iop)
+        decoder_output = MacroConstructor.subst(iop)
+        decode_block = BasicDecode.subst(iop)
+        exec_output = ''
+        return (header_output, decoder_output, decode_block, exec_output)
+}};
diff --git a/src/arch/x86/isa/main.isa b/src/arch/x86/isa/main.isa
index fe1d4e515..063d7125d 100644
--- a/src/arch/x86/isa/main.isa
+++ b/src/arch/x86/isa/main.isa
@@ -72,23 +72,55 @@
 
 namespace X86ISA;
 
-//Include the simple microcode assembler
-##include "microasm.isa"
+////////////////////////////////////////////////////////////////////
+//
+// General infrastructure code. These files provide infrastructure
+// which was developed to support x86 but isn't specific to it.
+//
 
-//Include the bitfield definitions
-##include "bitfields.isa"
+//Include code to build macroops.
+##include "macroop.isa"
 
-//Include the operand_types and operand definitions
-##include "operands.isa"
+//Include the simple microcode assembler. This will hopefully stay
+//unspecialized for x86 and can later be made available to other ISAs.
+##include "microasm.isa"
+
+////////////////////////////////////////////////////////////////////
+//
+// X86 only infrastructure code.
+//
 
-//Include the base class for x86 instructions, and some support code
+//Include the base class for x86 instructions, and some support code.
 ##include "base.isa"
 
+//Include code to specialize an instruction template to operate on
+//a particular set of operands. This is specific to x86 and the x86
+//microcode ISA.
+##include "specialize.isa"
+
+////////////////////////////////////////////////////////////////////
+//
+// Code which directly specifies isa components like instructions
+// microops, and the decoder.
+//
+
 //Include the definitions for the instruction formats
 ##include "formats/formats.isa"
 
-//Include the definitions of the micro ops
+//Include the operand_types and operand definitions. These are needed by
+//the microop definitions.
+##include "operands.isa"
+
+//Include the definitions of the micro ops.
+//These are StaticInst classes which stand on their own and make up an
+//internal instruction set.
 ##include "microops/microops.isa"
 
+//Include the instruction definitions which are microop assembler programs.
+##include "insts/insts.isa"
+
+//Include the bitfield definitions
+##include "bitfields.isa"
+
 //Include the decoder definition
 ##include "decoder/decoder.isa"
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index 2abce6e7f..9d21b6bcc 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -57,33 +57,59 @@
 
 ////////////////////////////////////////////////////////////////////
 //
-//  Code to "assemble" microcode sequences
+//  The microcode assembler
 //
 
 let {{
-    class MicroOpStatement:
+    # These are used when setting up microops so that they can specialize their
+    # base class template properly.
+    RegOpType = "RegisterOperand"
+    ImmOpType = "ImmediateOperand"
+}};
+
+let {{
+    class MicroOpStatement(object):
         def __init__(self):
             self.className = ''
             self.label = ''
             self.args = []
 
-        def getAllocator(self, labelDict = {}):
+        # This converts a list of python bools into
+        # a comma seperated list of C++ bools.
+        def microFlagsText(self, vals):
+            text = ""
+            for val in vals:
+                if val:
+                    text += ", true"
+                else:
+                    text += ", false"
+            return text
+
+        def getAllocator(self, mnemonic, *microFlags):
             args = ''
+            signature = "<"
+            emptySig = True
             for arg in self.args:
-                if arg.has_key("operandConst"):
-                    args += ", %s" % arg["operandConst"]
-                elif arg.has_key("operandCode"):
-                    args += ", %s" % arg["operandCode"]
+                if not emptySig:
+                    signature += ", "
+                emptySig = False
+                if arg.has_key("operandImm"):
+                    args += ", %s" % arg["operandImm"]
+                    signature += ImmOpType
+                elif arg.has_key("operandReg"):
+                    args += ", %s" % arg["operandReg"]
+                    signature += RegOpType
                 elif arg.has_key("operandLabel"):
-                    if not labelDict.has_key(arg["operandLabel"]):
-                        print "Unrecognized label %s!" % arg["operandLabel"]
-                    args += ", %s" % labelDict[arg["operandLabel"]]
+                    raise Exception, "Found a label while creating allocator string."
                 else:
-                    print "Unrecognized operand type!"
-            return 'new %s(machInst %s)' % (self.className, args)
+                    raise Exception, "Unrecognized operand type."
+            signature += ">"
+            return 'new %s%s(machInst, %s%s%s)' % (self.className, signature, mnemonic, self.microFlagsText(microFlags), args)
+}};
 
+let{{
+    def assembleMicro(name, Name, code):
 
-    def assembleMicro(code):
         # This function takes in a block of microcode assembly and returns
         # a python list of objects which describe it.
 
@@ -96,8 +122,8 @@ let {{
         # time. Each expression expects the thing it's looking for to be at
         # the beginning of the line, so the previous component is stripped
         # before continuing.
-        labelRe = re.compile(r'^[ \t]*(?P<label>[a-zA-Z_]\w*)[ \t]:')
-        lineRe = re.compile(r'^(?P<line>[^\n][^\n]*)$')
+        labelRe = re.compile(r'^[ \t]*(?P<label>\w\w*)[ \t]:')
+        lineRe = re.compile(r'^(?P<line>..*)(\n|$)')
         classRe = re.compile(r'^[ \t]*(?P<className>[a-zA-Z_]\w*)')
         # This recognizes three different flavors of operands:
         # 1. Raw decimal numbers composed of digits between 0 and 9
@@ -107,31 +133,37 @@ let {{
         #    underscore, which is optionally followed by a sequence of
         #    capital or small letters, underscores, or digts between 0 and 9
         opRe = re.compile( \
-            r'^[ \t]*((?P<operandLabel>[a-zA-Z_]\w*)|(?P<operandConst>[0-9][0-9]*)|(\{(?P<operandCode>[^}]*)\}))')
+            r'^[ \t]*((\@(?P<operandLabel0>\w\w*))|' +
+                    r'(\@\{(?P<operandLabel1>[^}]*)\})|' +
+                    r'(\%(?P<operandReg0>\w\w*))|' +
+                    r'(\%\{(?P<operandReg1>[^}]*)\})|' +
+                    r'(\$(?P<operandImm0>\w\w*))|' +
+                    r'(\$\{(?P<operandImm1>[^}]*)\}))')
         lineMatch = lineRe.search(code)
         while lineMatch != None:
             statement = MicroOpStatement()
             # Get a line and seperate it from the rest of the code
             line = lineMatch.group("line")
-            print "Parsing line %s" % line
+            orig_line = line
+            #print "Parsing line %s" % line
             code = lineRe.sub('', code, 1)
 
             # Find the label, if any
             labelMatch = labelRe.search(line)
             if labelMatch != None:
                 statement.label = labelMatch.group("label")
-                print "Found label %s." % statement.label
+                #print "Found label %s." % statement.label
             # Clear the label from the statement
             line = labelRe.sub('', line, 1)
 
             # Find the class name which is roughly equivalent to the op name
             classMatch = classRe.search(line)
             if classMatch == None:
-                print "Oh no! I can't find what instruction you want!"
-                print "I should really bail out here, but I don't know how!"
+                raise Exception, "Couldn't find class name in statement: %s" \
+                        % orig_line
             else:
                 statement.className = classMatch.group("className")
-                print "Found class name %s." % statement.className
+                #print "Found class name %s." % statement.className
 
             # Clear the class name from the statement
             line = classRe.sub('', line, 1)
@@ -145,28 +177,55 @@ let {{
                 # representations of operand values. Different forms might be
                 # needed in different places, for instance to replace a label
                 # with an offset.
-                for opType in ("operandLabel", "operandConst", "operandCode"):
+                for opType in ("operandLabel0", "operandReg0", "operandImm0",
+                               "operandLabel1", "operandReg1", "operandImm1"):
                     if opMatch.group(opType):
-                        statement.args[-1][opType] = opMatch.group(opType)
+                        statement.args[-1][opType[:-1]] = opMatch.group(opType)
                 if len(statement.args[-1]) == 0:
-                    print "I had a problem parsing an operand!"
+                    print "Problem parsing operand in statement: %s" \
+                            % orig_line
                 line = opRe.sub('', line, 1)
-                print "Found operand %s." % statement.args[-1]
+                #print "Found operand %s." % statement.args[-1]
                 opMatch = opRe.search(line)
-            print "Found operands", statement.args
+            #print "Found operands", statement.args
 
             # Add this statement to our collection
             statements.append(statement)
 
             # Get the next line
             lineMatch = lineRe.search(code)
-        return statements
 
-    def buildLabelDict(ops):
+        # Decode the labels into displacements
+
         labels = {}
-        count = 0
-        for op in ops:
-            if op.label:
-                labels[op.label] = count
-            count += 1
+        micropc = 0
+        for statement in statements:
+            if statement.label:
+                labels[statement.label] = count
+            micropc += 1
+        micropc = 0
+        for statement in statements:
+            for arg in statement.args:
+                if arg.has_key("operandLabel"):
+                    if not labels.has_key(arg["operandLabel"]):
+                        raise Exception, "Unrecognized label: %s." % arg["operandLabel"]
+                    # This is assuming that intra microcode branches go to
+                    # the next micropc + displacement, or
+                    # micropc + 1 + displacement.
+                    arg["operandImm"] = labels[arg["operandLabel"]] - micropc - 1
+            micropc += 1
+
+        if len(statements) == 0:
+            raise Exception, "Didn't find any microops in microcode: \n%s" % orig_code
+
+        # If we can implement this instruction with exactly one microop, just
+        # use that directly.
+        if len(statements) == 1:
+            decode_block = "return %s;" % \
+                            statements[0].getAllocator('"' + name + '"')
+            return ('', '', decode_block, '')
+        else:
+            # Build a macroop to contain the sequence of microops we've
+            # been given.
+            return genMacroOp(name, Name, statements)
 }};
diff --git a/src/arch/x86/isa/microops/base.isa b/src/arch/x86/isa/microops/base.isa
new file mode 100644
index 000000000..f0aab7872
--- /dev/null
+++ b/src/arch/x86/isa/microops/base.isa
@@ -0,0 +1,194 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+//The operand types a microop template can be specialized with
+output header {{
+    enum OperandType {
+        RegisterOperand,
+        ImmediateOperand
+    };
+}};
+
+//A class which is the base of all x86 micro ops. It provides a function to
+//set necessary flags appropriately.
+output header {{
+    class X86MicroOpBase : public X86StaticInst
+    {
+      protected:
+        const char * instMnem;
+        uint8_t opSize;
+        uint8_t addrSize;
+
+        X86MicroOpBase(ExtMachInst _machInst,
+                const char *mnem, const char *_instMnem,
+                bool isMicro, bool isDelayed,
+                bool isFirst, bool isLast,
+                OpClass __opClass) :
+            X86StaticInst(mnem, _machInst, __opClass),
+            instMnem(_instMnem)
+        {
+            flags[IsMicroOp] = isMicro;
+            flags[IsDelayedCommit] = isDelayed;
+            flags[IsFirstMicroOp] = isFirst;
+            flags[IsLastMicroOp] = isLast;
+        }
+
+        std::string generateDisassembly(Addr pc,
+                const SymbolTable *symtab) const
+        {
+            std::stringstream ss;
+
+            ccprintf(ss, "\t%s.%s", instMnem, mnemonic);
+
+            return ss.str();
+        }
+    };
+}};
+
+// This sets up a class which is templated on the type of
+// arguments a particular flavor of a microcode instruction
+// can accept. It's parameters are specialized to create polymorphic
+// behavior in microops.
+def template BaseMicroOpTemplateDeclare {{
+    template%(signature)s
+    class %(class_name)s;
+}};
+
+let {{
+    def buildBaseMicroOpTemplate(Name, numParams):
+        assert(numParams > 0)
+        signature = "<"
+        signature += "int SignatureOperandTypeSpecifier0"
+        for count in xrange(1,numParams):
+            signature += \
+                ", int SingatureOperandTypeSpecifier%d" % count
+        signature += ">"
+        subs = {"signature" : signature, "class_name" : Name}
+        return BaseMicroOpTemplateDeclare.subst(subs)
+}};
+
+let {{
+    def buildMicroOpTemplateDict(*params):
+        signature = "<"
+        if len(params):
+            signature += params[0]
+            if len(params) > 1:
+                for param in params[1:]:
+                    signature += ", %s" % param
+        signature += ">"
+        subs = {"param_dec" : "", "param_arg_dec" : "",
+                "param_init" : "", "signature" : signature}
+        for count in xrange(len(params)):
+            subs["param_dec"] += "uint64_t param%d;\n" % count
+            subs["param_arg_dec"] += ", uint64_t _param%d" % count
+            subs["param_init"] += ", param%d(_param%d)" % (count, count)
+        return subs
+}};
+
+// A tmeplate for building a specialized version of the microcode
+// instruction which specifies which arguments it wants
+def template MicroOpDeclare {{
+    template<>
+    class %(class_name)s%(signature)s : public X86MicroOpBase
+    {
+      protected:
+        %(param_dec)s
+        void buildMe();
+
+      public:
+        %(class_name)s(ExtMachInst _machInst,
+                const char * instMnem,
+                bool isMicro, bool isDelayed,
+                bool isFirst, bool isLast
+                %(param_arg_dec)s);
+
+        %(class_name)s(ExtMachInst _machInst,
+                const char * instMnem
+                %(param_arg_dec)s);
+
+        %(BasicExecDeclare)s
+    };
+}};
+
+def template MicroOpConstructor {{
+
+    inline void %(class_name)s%(signature)s::buildMe()
+    {
+        %(constructor)s;
+    }
+
+    inline %(class_name)s%(signature)s::%(class_name)s(
+            ExtMachInst machInst, const char * instMnem
+            %(param_arg_dec)s) :
+        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
+                false, false, false, false, %(op_class)s)
+                %(param_init)s
+    {
+        buildMe();
+    }
+
+    inline %(class_name)s%(signature)s::%(class_name)s(
+            ExtMachInst machInst, const char * instMnem,
+            bool isMicro, bool isDelayed, bool isFirst, bool isLast
+            %(param_arg_dec)s)
+        : %(base_class)s(machInst, "%(mnemonic)s", instMnem,
+                isMicro, isDelayed, isFirst, isLast, %(op_class)s)
+                %(param_init)s
+    {
+        buildMe();
+    }
+}};
diff --git a/src/arch/x86/isa/microops/microops.isa b/src/arch/x86/isa/microops/microops.isa
index bbf26f605..d877152eb 100644
--- a/src/arch/x86/isa/microops/microops.isa
+++ b/src/arch/x86/isa/microops/microops.isa
@@ -53,5 +53,11 @@
 //
 // Authors: Gabe Black
 
-//Micro ops
+//Common microop stuff
+##include "base.isa"
+
+//A microop that generates a specified fault
+##include "fault.isa"
+
+//Integer microop definitions
 ##include "int.isa"
diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa
index 36b0ee4df..af469ab3d 100644
--- a/src/arch/x86/isa/operands.isa
+++ b/src/arch/x86/isa/operands.isa
@@ -96,7 +96,7 @@ def operand_types {{
 }};
 
 def operands {{
-        'IntRegOp0':     ('IntReg', 'udw', 'regIndex0', 'IsInteger', 1),
-        'IntRegOp1':     ('IntReg', 'udw', 'regIndex1', 'IsInteger', 2),
-        'IntRegOp2':     ('IntReg', 'udw', 'regIndex2', 'IsInteger', 2),
+        'IntRegOp0':     ('IntReg', 'udw', 'param0', 'IsInteger', 1),
+        'IntRegOp1':     ('IntReg', 'udw', 'param1', 'IsInteger', 2),
+        'IntRegOp2':     ('IntReg', 'udw', 'param2', 'IsInteger', 2),
 }};
diff --git a/src/arch/x86/isa/specialize.isa b/src/arch/x86/isa/specialize.isa
new file mode 100644
index 000000000..ff92c3551
--- /dev/null
+++ b/src/arch/x86/isa/specialize.isa
@@ -0,0 +1,164 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+//  Code to "specialize" a microcode sequence to use a particular
+//  variety of operands
+//
+
+let {{
+    # This code builds up a decode block which decodes based on switchval.
+    # vals is a dict which matches case values with what should be decoded to.
+    # builder is called on the exploded contents of "vals" values to generate
+    # whatever code should be used.
+    def doSplitDecode(name, Name, builder, switchVal, vals, default = None):
+        blocks = OutputBlocks()
+        blocks.decode_block += 'switch(%s) {\n' % switchVal
+        for (val, todo) in vals.items():
+            built = builder(name, Name, *todo)
+            built.decode_block = '\tcase %s: %s\n' % (val, built.decode_block)
+            blocks.append(built)
+        if default:
+            built = builder(name, Name, *default)
+            built.decode_block = '\tdefault: %s\n' % built.decode_block
+            blocks.append(built)
+        blocks.decode_block += '}\n'
+        return blocks
+}};
+
+let {{
+    class OpType(object):
+        parser = re.compile(r"(?P<tag>[A-Z][A-Z]*)(?P<size>[a-z][a-z]*)|(r(?P<reg>[A-Za-z0-9][A-Za-z0-9]*))")
+        def __init__(self, opTypeString):
+            match = OpType.parser.search(opTypeString)
+            if match == None:
+                raise Exception, "Problem parsing operand type %s" % opTypeString
+            self.reg = match.group("reg")
+            self.tag = match.group("tag")
+            self.size = match.group("size")
+
+    # This function specializes the given piece of code to use a particular
+    # set of argument types described by "opTypes". These are "implemented"
+    # in reverse order.
+    def specializeInst(name, Name, code, opTypes):
+        opNum = len(opTypes) - 1
+        while len(opTypes):
+            # print "Building a composite op with tags", opTypes
+            # print "And code", code
+            opNum = len(opTypes) - 1
+            # A regular expression to find the operand placeholders we're
+            # interested in.
+            opRe = re.compile("\\^(?P<operandNum>%d)(?=[^0-9]|$)" % opNum)
+
+            # Parse the operand type strign we're working with
+            opType = OpType(opTypes[opNum])
+
+            if opType.reg:
+                #Figure out what to do with fixed register operands
+                if opType.reg in ("Ax", "Bx", "Cx", "Dx"):
+                    code = opRe.sub("%%{INTREG_R%s}" % opType.reg.upper(), code)
+                elif opType.reg == "Al":
+                    # We need a way to specify register width
+                    code = opRe.sub("%{INTREG_RAX}", code)
+                else:
+                    print "Didn't know how to encode fixed register %s!" % opType.reg
+            elif opType.tag == None or opType.size == None:
+                raise Exception, "Problem parsing operand tag: %s" % opType.tag
+            elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"):
+                # Use the "reg" field of the ModRM byte to select the register
+                code = opRe.sub("%{(uint8_t)MODRM_REG}", code)
+            elif opType.tag in ("E", "Q", "W"):
+                # This might refer to memory or to a register. We need to
+                # divide it up farther.
+                regCode = opRe.sub("%{(uint8_t)MODRM_RM}", code)
+                regTypes = copy.copy(opTypes)
+                regTypes.pop(-1)
+                # This needs to refer to memory, but we'll fill in the details
+                # later. It needs to take into account unaligned memory
+                # addresses.
+                code = "GenFault ${new UnimpInstFault}\n" + code
+                memCode = opRe.sub("%0", code)
+                memTypes = copy.copy(opTypes)
+                memTypes.pop(-1)
+                return doSplitDecode(name, Name, specializeInst, "MODRM_MOD",
+                    {"3" : (regCode, regTypes)}, (memCode, memTypes))
+            elif opType.tag in ("I", "J"):
+                # Immediates are already in the instruction, so don't leave in
+                # those parameters
+                code = opRe.sub("${IMMEDIATE}", code)
+            elif opType.tag == "M":
+                # This needs to refer to memory, but we'll fill in the details
+                # later. It needs to take into account unaligned memory
+                # addresses.
+                code = "GenFault ${new UnimpInstFault}\n" + code
+                code = opRe.sub("%0", code)
+            elif opType.tag in ("PR", "R", "VR"):
+                # There should probably be a check here to verify that mod
+                # is equal to 11b
+                code = opRe.sub("%{(uint8_t)MODRM_RM}", code)
+            else:
+                raise Exception, "Unrecognized tag %s." % opType.tag
+            opTypes.pop(-1)
+
+        # At this point, we've built up "code" to have all the necessary extra
+        # instructions needed to implement whatever types of operands were
+        # specified. Now we'll assemble it it into a StaticInst.
+        blocks = OutputBlocks()
+        blocks.append(assembleMicro(name, Name, code))
+        return blocks
+}};
diff --git a/src/arch/x86/predecoder.cc b/src/arch/x86/predecoder.cc
index fbed4fe41..573012ee6 100644
--- a/src/arch/x86/predecoder.cc
+++ b/src/arch/x86/predecoder.cc
@@ -117,37 +117,33 @@ namespace X86ISA
             //Operand size override prefixes
           case OperandSizeOverride:
             DPRINTF(Predecoder, "Found operand size override prefix.\n");
+            emi.legacy.op = true;
             break;
           case AddressSizeOverride:
             DPRINTF(Predecoder, "Found address size override prefix.\n");
+            emi.legacy.addr = true;
             break;
             //Segment override prefixes
           case CSOverride:
-            DPRINTF(Predecoder, "Found cs segment override.\n");
-            break;
           case DSOverride:
-            DPRINTF(Predecoder, "Found ds segment override.\n");
-            break;
           case ESOverride:
-            DPRINTF(Predecoder, "Found es segment override.\n");
-            break;
           case FSOverride:
-            DPRINTF(Predecoder, "Found fs segment override.\n");
-            break;
           case GSOverride:
-            DPRINTF(Predecoder, "Found gs segment override.\n");
-            break;
           case SSOverride:
-            DPRINTF(Predecoder, "Found ss segment override.\n");
+            DPRINTF(Predecoder, "Found segment override.\n");
+            emi.legacy.seg = prefix;
             break;
           case Lock:
             DPRINTF(Predecoder, "Found lock prefix.\n");
+            emi.legacy.lock = true;
             break;
           case Rep:
             DPRINTF(Predecoder, "Found rep prefix.\n");
+            emi.legacy.rep = true;
             break;
           case Repne:
             DPRINTF(Predecoder, "Found repne prefix.\n");
+            emi.legacy.repne = true;
             break;
           case RexPrefix:
             DPRINTF(Predecoder, "Found Rex prefix %#x.\n", nextByte);
@@ -198,25 +194,50 @@ namespace X86ISA
             displacementCollected = 0;
             emi.displacement = 0;
 
+            //Figure out the effective operand size. This can be overriden to
+            //a fixed value at the decoder level.
+            if(/*FIXME long mode*/1)
+            {
+                if(emi.rex && emi.rex.w)
+                    emi.opSize = 3; // 64 bit operand size
+                else if(emi.legacy.op)
+                    emi.opSize = 1; // 16 bit operand size
+                else
+                    emi.opSize = 2; // 32 bit operand size
+            }
+            else if(/*FIXME default 32*/1)
+            {
+                if(emi.legacy.op)
+                    emi.opSize = 1; // 16 bit operand size
+                else
+                    emi.opSize = 2; // 32 bit operand size
+            }
+            else // 16 bit default operand size
+            {
+                if(emi.legacy.op)
+                    emi.opSize = 2; // 32 bit operand size
+                else
+                    emi.opSize = 1; // 16 bit operand size
+            }
+
             //Figure out how big of an immediate we'll retreive based
             //on the opcode.
-            int immType = ImmediateType[
-                emi.opcode.num - 1][nextByte];
-            if(0) //16 bit mode
-                immediateSize = ImmediateTypeToSize[0][immType];
-            else if(!(emi.rex & 0x4)) //32 bit mode
-                immediateSize = ImmediateTypeToSize[1][immType];
-            else //64 bit mode
-                immediateSize = ImmediateTypeToSize[2][immType];
+            int immType = ImmediateType[emi.opcode.num - 1][nextByte];
+            immediateSize = SizeTypeToSize[emi.opSize - 1][immType];
 
             //Determine what to expect next
             if (UsesModRM[emi.opcode.num - 1][nextByte]) {
                 nextState = ModRMState;
-            } else if(immediateSize) {
-                nextState = ImmediateState;
             } else {
-                emiIsReady = true;
-                nextState = PrefixState;
+                //If there's no modRM byte, set it to 0 so we can detect
+                //that later.
+                emi.modRM = 0;
+                if(immediateSize) {
+                    nextState = ImmediateState;
+                } else {
+                    emiIsReady = true;
+                    nextState = PrefixState;
+                }
             }
         }
         return nextState;
@@ -241,11 +262,11 @@ namespace X86ISA
                 displacementSize = 0;
         } else {
             //figure out 32/64 bit displacement size
-            if(nextByte & 0xC7 == 0x05 ||
+            if(nextByte & 0xC6 == 0x04 ||
                     nextByte & 0xC0 == 0x80)
                 displacementSize = 4;
             else if(nextByte & 0xC0 == 0x40)
-                displacementSize = 2;
+                displacementSize = 1;
             else
                 displacementSize = 0;
         }
@@ -346,6 +367,16 @@ namespace X86ISA
 
         if(immediateSize == immediateCollected)
         {
+            //XXX Warning! The following is an observed pattern and might
+            //not always be true!
+
+            //Instructions which use 64 bit operands but 32 bit immediates
+            //need to have the immediate sign extended to 64 bits.
+            //Instructions which use true 64 bit immediates won't be
+            //affected, and instructions that use true 32 bit immediates
+            //won't notice.
+            if(immediateSize == 4)
+                emi.immediate = sext<32>(emi.immediate);
             DPRINTF(Predecoder, "Collected immediate %#x.\n",
                     emi.immediate);
             emiIsReady = true;
diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh
index 1df17d6d2..6562ab9f5 100644
--- a/src/arch/x86/predecoder.hh
+++ b/src/arch/x86/predecoder.hh
@@ -73,7 +73,7 @@ namespace X86ISA
         static const uint8_t Prefixes[256];
         static const uint8_t UsesModRM[2][256];
         static const uint8_t ImmediateType[2][256];
-        static const uint8_t ImmediateTypeToSize[3][10];
+        static const uint8_t SizeTypeToSize[3][10];
 
       protected:
         ThreadContext * tc;
diff --git a/src/arch/x86/predecoder_tables.cc b/src/arch/x86/predecoder_tables.cc
index f233ad234..38b9c57a3 100644
--- a/src/arch/x86/predecoder_tables.cc
+++ b/src/arch/x86/predecoder_tables.cc
@@ -141,7 +141,7 @@ namespace X86ISA
         }
     };
 
-    enum ImmediateTypes {
+    enum SizeType {
         NoImm,
         NI = NoImm,
         ByteImm,
@@ -158,19 +158,19 @@ namespace X86ISA
         VW = VWordImm,
         ZWordImm,
         ZW = ZWordImm,
-        Pointer,
-        PO = Pointer,
         //The enter instruction takes -2- immediates for a total of 3 bytes
         Enter,
-        EN = Enter
+        EN = Enter,
+        Pointer,
+        PO = Pointer
     };
 
-    const uint8_t Predecoder::ImmediateTypeToSize[3][10] =
+    const uint8_t Predecoder::SizeTypeToSize[3][10] =
     {
-//       noimm byte word dword qword oword vword zword enter
-        {0,    1,   2,   4,    8,    16,   2,    2,    3,    4}, //16 bit
-        {0,    1,   2,   4,    8,    16,   4,    4,    3,    6}, //32 bit
-        {0,    1,   2,   4,    8,    16,   4,    8,    3,    0}  //64 bit
+//       noimm byte word dword qword oword vword zword enter pointer
+        {0,    1,   2,   4,    8,    16,   2,    2,    3,    4      }, //16 bit
+        {0,    1,   2,   4,    8,    16,   4,    4,    3,    6      }, //32 bit
+        {0,    1,   2,   4,    8,    16,   4,    8,    3,    0      }  //64 bit
     };
 
     //This table determines the immediate type. The first index is the
diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh
index ca4a15d24..022f20ee5 100644
--- a/src/arch/x86/types.hh
+++ b/src/arch/x86/types.hh
@@ -70,25 +70,31 @@ namespace X86ISA
     typedef uint64_t MachInst;
 
     enum Prefixes {
-        NoOverride = 0,
-        CSOverride = 1,
-        DSOverride = 2,
-        ESOverride = 3,
-        FSOverride = 4,
-        GSOverride = 5,
-        SSOverride = 6,
-        //The Rex prefix obviously doesn't fit in with the above, but putting
-        //it here lets us save double the space the enums take up.
-        RexPrefix = 7,
+        NoOverride,
+        CSOverride,
+        DSOverride,
+        ESOverride,
+        FSOverride,
+        GSOverride,
+        SSOverride,
+        RexPrefix,
+        OperandSizeOverride,
+        AddressSizeOverride,
+        Lock,
+        Rep,
+        Repne
+    };
+
+    BitUnion8(LegacyPrefixVector)
+        Bitfield<7> repne;
+        Bitfield<6> rep;
+        Bitfield<5> lock;
+        Bitfield<4> addr;
+        Bitfield<3> op;
         //There can be only one segment override, so they share the
         //first 3 bits in the legacyPrefixes bitfield.
-        SegmentOverride = 0x7,
-        OperandSizeOverride = 8,
-        AddressSizeOverride = 16,
-        Lock = 32,
-        Rep = 64,
-        Repne = 128
-    };
+        Bitfield<2,0> seg;
+    EndBitUnion(LegacyPrefixVector)
 
     BitUnion8(ModRM)
         Bitfield<7,6> mod;
@@ -118,7 +124,7 @@ namespace X86ISA
     struct ExtMachInst
     {
         //Prefixes
-        uint8_t legacy;
+        LegacyPrefixVector legacy;
         Rex rex;
         //This holds all of the bytes of the opcode
         struct
@@ -140,6 +146,10 @@ namespace X86ISA
         //Immediate fields
         uint64_t immediate;
         uint64_t displacement;
+
+        //The effective operand size.
+        uint8_t opSize;
+        //The
     };
 
     inline static std::ostream &
@@ -161,7 +171,26 @@ namespace X86ISA
     inline static bool
         operator == (const ExtMachInst &emi1, const ExtMachInst &emi2)
     {
-        //Since this is empty, it's always equal
+        if(emi1.legacy != emi2.legacy)
+            return false;
+        if(emi1.rex != emi2.rex)
+            return false;
+        if(emi1.opcode.num != emi2.opcode.num)
+            return false;
+        if(emi1.opcode.op != emi2.opcode.op)
+            return false;
+        if(emi1.opcode.prefixA != emi2.opcode.prefixA)
+            return false;
+        if(emi1.opcode.prefixB != emi2.opcode.prefixB)
+            return false;
+        if(emi1.modRM != emi2.modRM)
+            return false;
+        if(emi1.sib != emi2.sib)
+            return false;
+        if(emi1.immediate != emi2.immediate)
+            return false;
+        if(emi1.displacement != emi2.displacement)
+            return false;
         return true;
     }
 
diff --git a/src/arch/x86/utility.hh b/src/arch/x86/utility.hh
index d89e223de..1c98e7fbc 100644
--- a/src/arch/x86/utility.hh
+++ b/src/arch/x86/utility.hh
@@ -70,8 +70,16 @@ namespace __hash_namespace {
     template<>
     struct hash<X86ISA::ExtMachInst> {
         size_t operator()(const X86ISA::ExtMachInst &emi) const {
-            //Because these are all the same, return 0
-            return 0;
+            return (((uint64_t)emi.legacy << 56) |
+                    ((uint64_t)emi.rex  << 48) |
+                    ((uint64_t)emi.modRM << 40) |
+                    ((uint64_t)emi.sib << 32) |
+                    ((uint64_t)emi.opcode.num << 24) |
+                    ((uint64_t)emi.opcode.prefixA << 16) |
+                    ((uint64_t)emi.opcode.prefixB << 8) |
+                    ((uint64_t)emi.opcode.op)) ^
+                    emi.immediate ^ emi.displacement ^
+                    emi.opSize;
         };
     };
 }
diff --git a/src/base/SConscript b/src/base/SConscript
index 6fc140145..5e4aaafc2 100644
--- a/src/base/SConscript
+++ b/src/base/SConscript
@@ -97,3 +97,5 @@ Source('stats/visit.cc')
 if env['USE_MYSQL']:
     Source('mysql.cc')
     Source('stats/mysql.cc')
+
+PySource('m5', 'traceflags.py')
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 6c6d90076..eed05c2f1 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -877,6 +877,11 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
         effAddrValid = true;
         physEffAddr = req->getPaddr();
         memReqFlags = req->getFlags();
+
+        if (req->isCondSwap()) {
+            assert(res);
+            req->setExtraData(*res);
+        }
 #if 0
         if (cpu->system->memctrl->badaddr(physEffAddr)) {
             fault = TheISA::genMachineCheckFault();
diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc
index c568b1439..3e2b0f03e 100644
--- a/src/cpu/exetrace.cc
+++ b/src/cpu/exetrace.cc
@@ -652,7 +652,7 @@ Trace::InstRecord::dump()
                         predecoder.setTC(thread);
                         predecoder.moreBytes(m5Pc, 0, shared_data->instruction);
 
-                        assert(predecoder.extMachInstRead());
+                        assert(predecoder.extMachInstReady());
 
                         StaticInstPtr legionInst =
                             StaticInst::decode(predecoder.getExtMachInst());
diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh
index 304ee6c38..1754300d2 100644
--- a/src/cpu/o3/alpha/cpu_impl.hh
+++ b/src/cpu/o3/alpha/cpu_impl.hh
@@ -55,7 +55,7 @@
 #endif
 
 template <class Impl>
-AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params)
+AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(this, params)
 {
     DPRINTF(O3CPU, "Creating AlphaO3CPU object.\n");
 
@@ -124,17 +124,6 @@ AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params)
         this->thread[i]->setFuncExeInst(0);
     }
 
-    // Sets CPU pointers. These must be set at this level because the CPU
-    // pointers are defined to be the highest level of CPU class.
-    this->fetch.setCPU(this);
-    this->decode.setCPU(this);
-    this->rename.setCPU(this);
-    this->iew.setCPU(this);
-    this->commit.setCPU(this);
-
-    this->rob.setCPU(this);
-    this->regFile.setCPU(this);
-
     lockAddr = 0;
     lockFlag = false;
 }
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index e2ad23954..fba618c14 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -136,7 +136,7 @@ class DefaultCommit
 
   public:
     /** Construct a DefaultCommit with the given parameters. */
-    DefaultCommit(Params *params);
+    DefaultCommit(O3CPU *_cpu, Params *params);
 
     /** Returns the name of the DefaultCommit. */
     std::string name() const;
@@ -144,9 +144,6 @@ class DefaultCommit
     /** Registers statistics. */
     void regStats();
 
-    /** Sets the CPU pointer. */
-    void setCPU(O3CPU *cpu_ptr);
-
     /** Sets the list of threads. */
     void setThreads(std::vector<Thread *> &threads);
 
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 3fd85595f..65625065d 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -71,8 +71,9 @@ DefaultCommit<Impl>::TrapEvent::description()
 }
 
 template <class Impl>
-DefaultCommit<Impl>::DefaultCommit(Params *params)
-    : squashCounter(0),
+DefaultCommit<Impl>::DefaultCommit(O3CPU *_cpu, Params *params)
+    : cpu(_cpu),
+      squashCounter(0),
       iewToCommitDelay(params->iewToCommitDelay),
       commitToIEWDelay(params->commitToIEWDelay),
       renameToROBDelay(params->renameToROBDelay),
@@ -227,20 +228,6 @@ DefaultCommit<Impl>::regStats()
 
 template <class Impl>
 void
-DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr)
-{
-    DPRINTF(Commit, "Commit: Setting CPU pointer.\n");
-    cpu = cpu_ptr;
-
-    // Commit must broadcast the number of free entries it has at the start of
-    // the simulation, so it starts as active.
-    cpu->activateStage(O3CPU::CommitIdx);
-
-    trapLatency = cpu->cycles(trapLatency);
-}
-
-template <class Impl>
-void
 DefaultCommit<Impl>::setThreads(std::vector<Thread *> &threads)
 {
     thread = threads;
@@ -250,7 +237,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to send information back to IEW.
@@ -264,7 +250,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting fetch queue pointer.\n");
     fetchQueue = fq_ptr;
 
     // Setup wire to get instructions from rename (for the ROB).
@@ -275,7 +260,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting rename queue pointer.\n");
     renameQueue = rq_ptr;
 
     // Setup wire to get instructions from rename (for the ROB).
@@ -286,7 +270,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n");
     iewQueue = iq_ptr;
 
     // Setup wire to get instructions from IEW.
@@ -304,7 +287,6 @@ template<class Impl>
 void
 DefaultCommit<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -312,8 +294,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setRenameMap(RenameMap rm_ptr[])
 {
-    DPRINTF(Commit, "Setting rename map pointers.\n");
-
     for (int i=0; i < numThreads; i++) {
         renameMap[i] = &rm_ptr[i];
     }
@@ -323,7 +303,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setROB(ROB *rob_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting ROB pointer.\n");
     rob = rob_ptr;
 }
 
@@ -341,7 +320,12 @@ DefaultCommit<Impl>::initStage()
         toIEW->commitInfo[i].emptyROB = true;
     }
 
+    // Commit must broadcast the number of free entries it has at the
+    // start of the simulation, so it starts as active.
+    cpu->activateStage(O3CPU::CommitIdx);
+
     cpu->activityThisCycle();
+    trapLatency = cpu->cycles(trapLatency);
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 354e3c490..2e6a43f9c 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -148,7 +148,7 @@ FullO3CPU<Impl>::DeallocateContextEvent::description()
 }
 
 template <class Impl>
-FullO3CPU<Impl>::FullO3CPU(Params *params)
+FullO3CPU<Impl>::FullO3CPU(O3CPU *o3_cpu, Params *params)
     : BaseO3CPU(params),
 #if FULL_SYSTEM
       itb(params->itb),
@@ -156,19 +156,21 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
 #endif
       tickEvent(this),
       removeInstsThisCycle(false),
-      fetch(params),
-      decode(params),
-      rename(params),
-      iew(params),
-      commit(params),
+      fetch(o3_cpu, params),
+      decode(o3_cpu, params),
+      rename(o3_cpu, params),
+      iew(o3_cpu, params),
+      commit(o3_cpu, params),
 
-      regFile(params->numPhysIntRegs, params->numPhysFloatRegs),
+      regFile(o3_cpu, params->numPhysIntRegs,
+              params->numPhysFloatRegs),
 
       freeList(params->numberOfThreads,
                TheISA::NumIntRegs, params->numPhysIntRegs,
                TheISA::NumFloatRegs, params->numPhysFloatRegs),
 
-      rob(params->numROBEntries, params->squashWidth,
+      rob(o3_cpu,
+          params->numROBEntries, params->squashWidth,
           params->smtROBPolicy, params->smtROBThreshold,
           params->numberOfThreads),
 
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 0ab20ba2a..e71d05c8e 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -95,6 +95,7 @@ class FullO3CPU : public BaseO3CPU
     typedef typename Impl::CPUPol CPUPolicy;
     typedef typename Impl::Params Params;
     typedef typename Impl::DynInstPtr DynInstPtr;
+    typedef typename Impl::O3CPU O3CPU;
 
     typedef O3ThreadState<Impl> Thread;
 
@@ -256,7 +257,7 @@ class FullO3CPU : public BaseO3CPU
 
   public:
     /** Constructs a CPU with the given parameters. */
-    FullO3CPU(Params *params);
+    FullO3CPU(O3CPU *o3_cpu, Params *params);
     /** Destructor. */
     ~FullO3CPU();
 
diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh
index 4a845e670..3e82033ca 100644
--- a/src/cpu/o3/decode.hh
+++ b/src/cpu/o3/decode.hh
@@ -86,7 +86,7 @@ class DefaultDecode
 
   public:
     /** DefaultDecode constructor. */
-    DefaultDecode(Params *params);
+    DefaultDecode(O3CPU *_cpu, Params *params);
 
     /** Returns the name of decode. */
     std::string name() const;
@@ -94,9 +94,6 @@ class DefaultDecode
     /** Registers statistics. */
     void regStats();
 
-    /** Sets CPU pointer. */
-    void setCPU(O3CPU *cpu_ptr);
-
     /** Sets the main backwards communication time buffer pointer. */
     void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
 
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 79a0bfdbf..314864f94 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -31,8 +31,9 @@
 #include "cpu/o3/decode.hh"
 
 template<class Impl>
-DefaultDecode<Impl>::DefaultDecode(Params *params)
-    : renameToDecodeDelay(params->renameToDecodeDelay),
+DefaultDecode<Impl>::DefaultDecode(O3CPU *_cpu, Params *params)
+    : cpu(_cpu),
+      renameToDecodeDelay(params->renameToDecodeDelay),
       iewToDecodeDelay(params->iewToDecodeDelay),
       commitToDecodeDelay(params->commitToDecodeDelay),
       fetchToDecodeDelay(params->fetchToDecodeDelay),
@@ -112,17 +113,8 @@ DefaultDecode<Impl>::regStats()
 
 template<class Impl>
 void
-DefaultDecode<Impl>::setCPU(O3CPU *cpu_ptr)
-{
-    DPRINTF(Decode, "Setting CPU pointer.\n");
-    cpu = cpu_ptr;
-}
-
-template<class Impl>
-void
 DefaultDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(Decode, "Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to write information back to fetch.
@@ -138,7 +130,6 @@ template<class Impl>
 void
 DefaultDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
 {
-    DPRINTF(Decode, "Setting decode queue pointer.\n");
     decodeQueue = dq_ptr;
 
     // Setup wire to write information to proper place in decode queue.
@@ -149,7 +140,6 @@ template<class Impl>
 void
 DefaultDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 {
-    DPRINTF(Decode, "Setting fetch queue pointer.\n");
     fetchQueue = fq_ptr;
 
     // Setup wire to read information from fetch queue.
@@ -160,7 +150,6 @@ template<class Impl>
 void
 DefaultDecode<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Decode, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 811f4d2bc..241935416 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -160,7 +160,7 @@ class DefaultFetch
 
   public:
     /** DefaultFetch constructor. */
-    DefaultFetch(Params *params);
+    DefaultFetch(O3CPU *_cpu, Params *params);
 
     /** Returns the name of fetch. */
     std::string name() const;
@@ -171,9 +171,6 @@ class DefaultFetch
     /** Returns the icache port. */
     Port *getIcachePort() { return icachePort; }
 
-    /** Sets CPU pointer. */
-    void setCPU(O3CPU *cpu_ptr);
-
     /** Sets the main backwards communication time buffer pointer. */
     void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer);
 
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 34b06420d..e16f97558 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -110,8 +110,9 @@ DefaultFetch<Impl>::IcachePort::recvRetry()
 }
 
 template<class Impl>
-DefaultFetch<Impl>::DefaultFetch(Params *params)
-    : branchPred(params),
+DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, Params *params)
+    : cpu(_cpu),
+      branchPred(params),
       predecoder(NULL),
       decodeToFetchDelay(params->decodeToFetchDelay),
       renameToFetchDelay(params->renameToFetchDelay),
@@ -163,6 +164,17 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
 
     // Get the size of an instruction.
     instSize = sizeof(TheISA::MachInst);
+
+    // Name is finally available, so create the port.
+    icachePort = new IcachePort(this);
+
+    icachePort->snoopRangeSent = false;
+
+#if USE_CHECKER
+    if (cpu->checker) {
+        cpu->checker->setIcachePort(icachePort);
+    }
+#endif
 }
 
 template <class Impl>
@@ -264,35 +276,8 @@ DefaultFetch<Impl>::regStats()
 
 template<class Impl>
 void
-DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
-{
-    DPRINTF(Fetch, "Setting the CPU pointer.\n");
-    cpu = cpu_ptr;
-
-    // Name is finally available, so create the port.
-    icachePort = new IcachePort(this);
-
-    icachePort->snoopRangeSent = false;
-
-#if USE_CHECKER
-    if (cpu->checker) {
-        cpu->checker->setIcachePort(icachePort);
-    }
-#endif
-
-    // Schedule fetch to get the correct PC from the CPU
-    // scheduleFetchStartupEvent(1);
-
-    // Fetch needs to start fetching instructions at the very beginning,
-    // so it must start up in active state.
-    switchToActive();
-}
-
-template<class Impl>
-void
 DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
 {
-    DPRINTF(Fetch, "Setting the time buffer pointer.\n");
     timeBuffer = time_buffer;
 
     // Create wires to get information from proper places in time buffer.
@@ -306,7 +291,6 @@ template<class Impl>
 void
 DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Fetch, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -314,7 +298,6 @@ template<class Impl>
 void
 DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 {
-    DPRINTF(Fetch, "Setting the fetch queue pointer.\n");
     fetchQueue = fq_ptr;
 
     // Create wire to write information to proper place in fetch queue.
@@ -345,6 +328,13 @@ DefaultFetch<Impl>::initStage()
         stalls[tid].iew = false;
         stalls[tid].commit = false;
     }
+
+    // Schedule fetch to get the correct PC from the CPU
+    // scheduleFetchStartupEvent(1);
+
+    // Fetch needs to start fetching instructions at the very beginning,
+    // so it must start up in active state.
+    switchToActive();
 }
 
 template<class Impl>
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index a400c9fa8..ce2991cfb 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -115,7 +115,7 @@ class DefaultIEW
 
   public:
     /** Constructs a DefaultIEW with the given parameters. */
-    DefaultIEW(Params *params);
+    DefaultIEW(O3CPU *_cpu, Params *params);
 
     /** Returns the name of the DefaultIEW stage. */
     std::string name() const;
@@ -129,9 +129,6 @@ class DefaultIEW
     /** Returns the dcache port. */
     Port *getDcachePort() { return ldstQueue.getDcachePort(); }
 
-    /** Sets CPU pointer for IEW, IQ, and LSQ. */
-    void setCPU(O3CPU *cpu_ptr);
-
     /** Sets main time buffer used for backwards communication. */
     void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
 
@@ -367,16 +364,6 @@ class DefaultIEW
     /** Scoreboard pointer. */
     Scoreboard* scoreboard;
 
-  public:
-    /** Instruction queue. */
-    IQ instQueue;
-
-    /** Load / store queue. */
-    LSQ ldstQueue;
-
-    /** Pointer to the functional unit pool. */
-    FUPool *fuPool;
-
   private:
     /** CPU pointer. */
     O3CPU *cpu;
@@ -398,6 +385,14 @@ class DefaultIEW
     void printAvailableInsts();
 
   public:
+    /** Instruction queue. */
+    IQ instQueue;
+
+    /** Load / store queue. */
+    LSQ ldstQueue;
+
+    /** Pointer to the functional unit pool. */
+    FUPool *fuPool;
     /** Records if the LSQ needs to be updated on the next cycle, so that
      * IEW knows if there will be activity on the next cycle.
      */
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 4883e5a5c..62e656e93 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -39,10 +39,11 @@
 #include "cpu/o3/iew.hh"
 
 template<class Impl>
-DefaultIEW<Impl>::DefaultIEW(Params *params)
+DefaultIEW<Impl>::DefaultIEW(O3CPU *_cpu, Params *params)
     : issueToExecQueue(params->backComSize, params->forwardComSize),
-      instQueue(params),
-      ldstQueue(params),
+      cpu(_cpu),
+      instQueue(_cpu, this, params),
+      ldstQueue(_cpu, this, params),
       fuPool(params->fuPool),
       commitToIEWDelay(params->commitToIEWDelay),
       renameToIEWDelay(params->renameToIEWDelay),
@@ -64,9 +65,6 @@ DefaultIEW<Impl>::DefaultIEW(Params *params)
     // Instruction queue needs the queue between issue and execute.
     instQueue.setIssueToExecuteQueue(&issueToExecQueue);
 
-    instQueue.setIEW(this);
-    ldstQueue.setIEW(this);
-
     for (int i=0; i < numThreads; i++) {
         dispatchStatus[i] = Running;
         stalls[i].commit = false;
@@ -276,17 +274,6 @@ DefaultIEW<Impl>::initStage()
         toRename->iewInfo[tid].freeLSQEntries =
             ldstQueue.numFreeEntries(tid);
     }
-}
-
-template<class Impl>
-void
-DefaultIEW<Impl>::setCPU(O3CPU *cpu_ptr)
-{
-    DPRINTF(IEW, "Setting CPU pointer.\n");
-    cpu = cpu_ptr;
-
-    instQueue.setCPU(cpu_ptr);
-    ldstQueue.setCPU(cpu_ptr);
 
     cpu->activateStage(O3CPU::IEWIdx);
 }
@@ -295,7 +282,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(IEW, "Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to read information from time buffer, from commit.
@@ -314,7 +300,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
 {
-    DPRINTF(IEW, "Setting rename queue pointer.\n");
     renameQueue = rq_ptr;
 
     // Setup wire to read information from rename queue.
@@ -325,7 +310,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
 {
-    DPRINTF(IEW, "Setting IEW queue pointer.\n");
     iewQueue = iq_ptr;
 
     // Setup wire to write instructions to commit.
@@ -336,7 +320,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(IEW, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 
     ldstQueue.setActiveThreads(at_ptr);
@@ -347,7 +330,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr)
 {
-    DPRINTF(IEW, "Setting scoreboard pointer.\n");
     scoreboard = sb_ptr;
 }
 
diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh
index 3dd4dc658..9d7c457ca 100644
--- a/src/cpu/o3/inst_queue.hh
+++ b/src/cpu/o3/inst_queue.hh
@@ -110,7 +110,7 @@ class InstructionQueue
     };
 
     /** Constructs an IQ. */
-    InstructionQueue(Params *params);
+    InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, Params *params);
 
     /** Destructs the IQ. */
     ~InstructionQueue();
@@ -124,15 +124,9 @@ class InstructionQueue
     /** Resets all instruction queue state. */
     void resetState();
 
-    /** Sets CPU pointer. */
-    void setCPU(O3CPU *_cpu) { cpu = _cpu; }
-
     /** Sets active threads list. */
     void setActiveThreads(std::list<unsigned> *at_ptr);
 
-    /** Sets the IEW pointer. */
-    void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; }
-
     /** Sets the timer buffer between issue and execute. */
     void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
 
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 79e03d4bf..10c3287f2 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -64,8 +64,11 @@ InstructionQueue<Impl>::FUCompletion::description()
 }
 
 template <class Impl>
-InstructionQueue<Impl>::InstructionQueue(Params *params)
-    : fuPool(params->fuPool),
+InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
+                                         Params *params)
+    : cpu(cpu_ptr),
+      iewStage(iew_ptr),
+      fuPool(params->fuPool),
       numEntries(params->numIQEntries),
       totalWidth(params->issueWidth),
       numPhysIntRegs(params->numPhysIntRegs),
@@ -81,8 +84,6 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
     // Set the number of physical registers as the number of int + float
     numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
 
-    DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs);
-
     //Create an entry for each physical register within the
     //dependency graph.
     dependGraph.resize(numPhysRegs);
@@ -126,7 +127,6 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
 
         DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
                 "%i entries per thread.\n",part_amt);
-
     } else if (policy == "threshold") {
         iqPolicy = Threshold;
 
@@ -360,7 +360,6 @@ template <class Impl>
 void
 InstructionQueue<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(IQ, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -368,15 +367,13 @@ template <class Impl>
 void
 InstructionQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
 {
-    DPRINTF(IQ, "Set the issue to execute queue.\n");
-    issueToExecuteQueue = i2e_ptr;
+      issueToExecuteQueue = i2e_ptr;
 }
 
 template <class Impl>
 void
 InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(IQ, "Set the time buffer.\n");
     timeBuffer = tb_ptr;
 
     fromCommit = timeBuffer->getWire(-commitToIEWDelay);
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 80f53a726..fd8f878a7 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -57,7 +57,7 @@ class LSQ {
     };
 
     /** Constructs an LSQ with the given parameters. */
-    LSQ(Params *params);
+    LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, Params *params);
 
     /** Returns the name of the LSQ. */
     std::string name() const;
@@ -74,10 +74,6 @@ class LSQ {
 
     /** Sets the pointer to the list of active threads. */
     void setActiveThreads(std::list<unsigned> *at_ptr);
-    /** Sets the CPU pointer. */
-    void setCPU(O3CPU *cpu_ptr);
-    /** Sets the IEW stage pointer. */
-    void setIEW(IEW *iew_ptr);
     /** Switches out the LSQ. */
     void switchOut();
     /** Takes over execution from another CPU's thread. */
@@ -283,6 +279,12 @@ class LSQ {
     template <class T>
     Fault write(RequestPtr req, T &data, int store_idx);
 
+    /** The CPU pointer. */
+    O3CPU *cpu;
+
+    /** The IEW stage pointer. */
+    IEW *iewStage;
+
     /** DcachePort class for this LSQ.  Handles doing the
      * communication with the cache/memory.
      */
@@ -295,7 +297,7 @@ class LSQ {
       public:
         /** Default constructor. */
         DcachePort(LSQ *_lsq)
-            : lsq(_lsq)
+            : Port(_lsq->name() + "-dport"), lsq(_lsq)
         { }
 
         bool snoopRangeSent;
@@ -341,12 +343,6 @@ class LSQ {
     /** The LSQ units for individual threads. */
     LSQUnit thread[Impl::MaxThreads];
 
-    /** The CPU pointer. */
-    O3CPU *cpu;
-
-    /** The IEW stage pointer. */
-    IEW *iewStage;
-
     /** List of Active Threads in System. */
     std::list<unsigned> *activeThreads;
 
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index d4994fcb7..b4a6a02da 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -107,13 +107,13 @@ LSQ<Impl>::DcachePort::recvRetry()
 }
 
 template <class Impl>
-LSQ<Impl>::LSQ(Params *params)
-    : dcachePort(this), LQEntries(params->LQEntries),
-      SQEntries(params->SQEntries), numThreads(params->numberOfThreads),
+LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, Params *params)
+    : cpu(cpu_ptr), iewStage(iew_ptr), dcachePort(this),
+      LQEntries(params->LQEntries),
+      SQEntries(params->SQEntries),
+      numThreads(params->numberOfThreads),
       retryTid(-1)
 {
-    DPRINTF(LSQ, "Creating LSQ object.\n");
-
     dcachePort.snoopRangeSent = false;
 
     //**********************************************/
@@ -133,7 +133,6 @@ LSQ<Impl>::LSQ(Params *params)
         maxSQEntries = SQEntries;
 
         DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
-
     } else if (policy == "partitioned") {
         lsqPolicy = Partitioned;
 
@@ -144,7 +143,6 @@ LSQ<Impl>::LSQ(Params *params)
         DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
                 "%i entries per LQ | %i entries per SQ",
                 maxLQEntries,maxSQEntries);
-
     } else if (policy == "threshold") {
         lsqPolicy = Threshold;
 
@@ -160,7 +158,6 @@ LSQ<Impl>::LSQ(Params *params)
         DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
                 "%i entries per LQ | %i entries per SQ",
                 maxLQEntries,maxSQEntries);
-
     } else {
         assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
                     "Partitioned, Threshold}");
@@ -168,7 +165,8 @@ LSQ<Impl>::LSQ(Params *params)
 
     //Initialize LSQs
     for (int tid=0; tid < numThreads; tid++) {
-        thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid);
+        thread[tid].init(cpu, iew_ptr, params, this,
+                         maxLQEntries, maxSQEntries, tid);
         thread[tid].setDcachePort(&dcachePort);
     }
 }
@@ -199,30 +197,6 @@ LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
     assert(activeThreads != 0);
 }
 
-template<class Impl>
-void
-LSQ<Impl>::setCPU(O3CPU *cpu_ptr)
-{
-    cpu = cpu_ptr;
-
-    dcachePort.setName(name());
-
-    for (int tid=0; tid < numThreads; tid++) {
-        thread[tid].setCPU(cpu_ptr);
-    }
-}
-
-template<class Impl>
-void
-LSQ<Impl>::setIEW(IEW *iew_ptr)
-{
-    iewStage = iew_ptr;
-
-    for (int tid=0; tid < numThreads; tid++) {
-        thread[tid].setIEW(iew_ptr);
-    }
-}
-
 template <class Impl>
 void
 LSQ<Impl>::switchOut()
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 1b10843f5..f24de20d9 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -73,8 +73,8 @@ class LSQUnit {
     LSQUnit();
 
     /** Initializes the LSQ unit with the specified number of entries. */
-    void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
-              unsigned maxSQEntries, unsigned id);
+    void init(O3CPU *cpu_ptr, IEW *iew_ptr, Params *params, LSQ *lsq_ptr,
+              unsigned maxLQEntries, unsigned maxSQEntries, unsigned id);
 
     /** Returns the name of the LSQ unit. */
     std::string name() const;
@@ -82,16 +82,8 @@ class LSQUnit {
     /** Registers statistics. */
     void regStats();
 
-    /** Sets the CPU pointer. */
-    void setCPU(O3CPU *cpu_ptr);
-
-    /** Sets the IEW stage pointer. */
-    void setIEW(IEW *iew_ptr)
-    { iewStage = iew_ptr; }
-
     /** Sets the pointer to the dcache port. */
-    void setDcachePort(Port *dcache_port)
-    { dcachePort = dcache_port; }
+    void setDcachePort(Port *dcache_port);
 
     /** Switches out LSQ unit. */
     void switchOut();
@@ -297,15 +289,19 @@ class LSQUnit {
     struct SQEntry {
         /** Constructs an empty store queue entry. */
         SQEntry()
-            : inst(NULL), req(NULL), size(0), data(0),
+            : inst(NULL), req(NULL), size(0),
               canWB(0), committed(0), completed(0)
-        { }
+        {
+            bzero(data, sizeof(data));
+        }
 
         /** Constructs a store queue entry for a given instruction. */
         SQEntry(DynInstPtr &_inst)
-            : inst(_inst), req(NULL), size(0), data(0),
+            : inst(_inst), req(NULL), size(0),
               canWB(0), committed(0), completed(0)
-        { }
+        {
+            bzero(data, sizeof(data));
+        }
 
         /** The store instruction. */
         DynInstPtr inst;
@@ -314,7 +310,7 @@ class LSQUnit {
         /** The size of the store. */
         int size;
         /** The store data. */
-        IntReg data;
+        char data[sizeof(IntReg)];
         /** Whether or not the store can writeback. */
         bool canWB;
         /** Whether or not the store is committed. */
@@ -562,22 +558,14 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
         if ((store_has_lower_limit && store_has_upper_limit)) {
             // Get shift amount for offset into the store's data.
             int shift_amt = req->getVaddr() & (store_size - 1);
-            // @todo: Magic number, assumes byte addressing
-            shift_amt = shift_amt << 3;
 
-            // Cast this to type T?
-            data = storeQueue[store_idx].data >> shift_amt;
-
-            // When the data comes from the store queue entry, it's in host
-            // order. When it gets sent to the load, it needs to be in guest
-            // order so when the load converts it again, it ends up back
-            // in host order like the inst expects.
-            data = TheISA::htog(data);
+            memcpy(&data, storeQueue[store_idx].data + shift_amt, sizeof(T));
 
             assert(!load_inst->memData);
             load_inst->memData = new uint8_t[64];
 
-            memcpy(load_inst->memData, &data, req->getSize());
+            memcpy(load_inst->memData,
+                    storeQueue[store_idx].data + shift_amt, req->getSize());
 
             DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
                     "addr %#x, data %#x\n",
@@ -724,7 +712,10 @@ LSQUnit<Impl>::write(Request *req, T &data, int store_idx)
 
     storeQueue[store_idx].req = req;
     storeQueue[store_idx].size = sizeof(T);
-    storeQueue[store_idx].data = data;
+    assert(sizeof(T) <= sizeof(storeQueue[store_idx].data));
+
+    T gData = htog(data);
+    memcpy(storeQueue[store_idx].data, &gData, sizeof(T));
 
     // This function only writes the data to the store queue, so no fault
     // can happen here.
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index e70c960b3..44e2cea76 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -57,6 +57,11 @@ LSQUnit<Impl>::WritebackEvent::process()
     if (!lsqPtr->isSwitchedOut()) {
         lsqPtr->writeback(inst, pkt);
     }
+
+    if (pkt->senderState)
+        delete pkt->senderState;
+
+    delete pkt->req;
     delete pkt;
 }
 
@@ -80,10 +85,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
 
     if (isSwitchedOut() || inst->isSquashed()) {
         iewStage->decrWb(inst->seqNum);
-        delete state;
-        delete pkt->req;
-        delete pkt;
-        return;
     } else {
         if (!state->noWB) {
             writeback(inst, pkt);
@@ -109,9 +110,12 @@ LSQUnit<Impl>::LSQUnit()
 
 template<class Impl>
 void
-LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
-                    unsigned maxSQEntries, unsigned id)
+LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, Params *params, LSQ *lsq_ptr,
+                    unsigned maxLQEntries, unsigned maxSQEntries, unsigned id)
 {
+    cpu = cpu_ptr;
+    iewStage = iew_ptr;
+
     DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
 
     switchedOut = false;
@@ -141,19 +145,6 @@ LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
 }
 
 template<class Impl>
-void
-LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr)
-{
-    cpu = cpu_ptr;
-
-#if USE_CHECKER
-    if (cpu->checker) {
-        cpu->checker->setDcachePort(dcachePort);
-    }
-#endif
-}
-
-template<class Impl>
 std::string
 LSQUnit<Impl>::name() const
 {
@@ -211,6 +202,19 @@ LSQUnit<Impl>::regStats()
 
 template<class Impl>
 void
+LSQUnit<Impl>::setDcachePort(Port *dcache_port)
+{
+    dcachePort = dcache_port;
+
+#if USE_CHECKER
+    if (cpu->checker) {
+        cpu->checker->setDcachePort(dcachePort);
+    }
+#endif
+}
+
+template<class Impl>
+void
 LSQUnit<Impl>::clearLQ()
 {
     loadQueue.clear();
@@ -641,22 +645,10 @@ LSQUnit<Impl>::writebackStores()
         assert(!inst->memData);
         inst->memData = new uint8_t[64];
 
-        TheISA::IntReg convertedData =
-            TheISA::htog(storeQueue[storeWBIdx].data);
-
-        //FIXME This is a hack to get SPARC working. It, along with endianness
-        //in the memory system in general, need to be straightened out more
-        //formally. The problem is that the data's endianness is swapped when
-        //it's in the 64 bit data field in the store queue. The data that you
-        //want won't start at the beginning of the field anymore unless it was
-        //a 64 bit access.
-        memcpy(inst->memData,
-                (uint8_t *)&convertedData +
-                (TheISA::ByteOrderDiffers ?
-                 (sizeof(TheISA::IntReg) - req->getSize()) : 0),
-                req->getSize());
-
-        PacketPtr data_pkt = new Packet(req, MemCmd::WriteReq,
+        memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize());
+
+        MemCmd command = req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq;
+        PacketPtr data_pkt = new Packet(req, command,
                                         Packet::Broadcast);
         data_pkt->dataStatic(inst->memData);
 
@@ -673,7 +665,7 @@ LSQUnit<Impl>::writebackStores()
                 inst->seqNum);
 
         // @todo: Remove this SC hack once the memory system handles it.
-        if (req->isLocked()) {
+        if (inst->isStoreConditional()) {
             // Disable recording the result temporarily.  Writing to
             // misc regs normally updates the result, but this is not
             // the desired behavior when handling store conditionals.
diff --git a/src/cpu/o3/mips/cpu_builder.cc b/src/cpu/o3/mips/cpu_builder.cc
index 66741aee9..c6acc0bfb 100644
--- a/src/cpu/o3/mips/cpu_builder.cc
+++ b/src/cpu/o3/mips/cpu_builder.cc
@@ -51,6 +51,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
 Param<int> clock;
 Param<int> phase;
 Param<int> numThreads;
+Param<int> cpu_id;
 Param<int> activity;
 
 SimObjectVectorParam<Process *> workload;
@@ -149,6 +150,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
     INIT_PARAM(clock, "clock speed"),
     INIT_PARAM_DFLT(phase, "clock phase", 0),
     INIT_PARAM(numThreads, "number of HW thread contexts"),
+    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM_DFLT(activity, "Initial activity count", 0),
 
     INIT_PARAM(workload, "Processes to run"),
@@ -275,9 +277,11 @@ CREATE_SIM_OBJECT(DerivO3CPU)
     MipsSimpleParams *params = new MipsSimpleParams;
 
     params->clock = clock;
+    params->phase = phase;
 
     params->name = getInstanceName();
     params->numberOfThreads = actual_num_threads;
+    params->cpu_id = cpu_id;
     params->activity = activity;
 
     params->workload = workload;
diff --git a/src/cpu/o3/mips/cpu_impl.hh b/src/cpu/o3/mips/cpu_impl.hh
index 317fd748e..d1135f048 100644
--- a/src/cpu/o3/mips/cpu_impl.hh
+++ b/src/cpu/o3/mips/cpu_impl.hh
@@ -47,7 +47,7 @@
 
 template <class Impl>
 MipsO3CPU<Impl>::MipsO3CPU(Params *params)
-    : FullO3CPU<Impl>(params)
+    : FullO3CPU<Impl>(this, params)
 {
     DPRINTF(O3CPU, "Creating MipsO3CPU object.\n");
 
@@ -95,6 +95,7 @@ MipsO3CPU<Impl>::MipsO3CPU(Params *params)
 
         // Give the thread the TC.
         this->thread[i]->tc = tc;
+        this->thread[i]->setCpuId(params->cpu_id);
 
         // Add the TC to the CPU's list of TC's.
         this->threadContexts.push_back(tc);
@@ -104,17 +105,6 @@ MipsO3CPU<Impl>::MipsO3CPU(Params *params)
         this->thread[i]->setFuncExeInst(0);
     }
 
-    // Sets CPU pointers. These must be set at this level because the CPU
-    // pointers are defined to be the highest level of CPU class.
-    this->fetch.setCPU(this);
-    this->decode.setCPU(this);
-    this->rename.setCPU(this);
-    this->iew.setCPU(this);
-    this->commit.setCPU(this);
-
-    this->rob.setCPU(this);
-    this->regFile.setCPU(this);
-
     lockAddr = 0;
     lockFlag = false;
 }
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index c4f8f3a9f..b5b1cd021 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -76,7 +76,7 @@ class PhysRegFile
      * Constructs a physical register file with the specified amount of
      * integer and floating point registers.
      */
-    PhysRegFile(unsigned _numPhysicalIntRegs,
+    PhysRegFile(O3CPU *_cpu, unsigned _numPhysicalIntRegs,
                 unsigned _numPhysicalFloatRegs);
 
     //Everything below should be pretty well identical to the normal
@@ -174,7 +174,7 @@ class PhysRegFile
         // Remove the base Float reg dependency.
         reg_idx = reg_idx - numPhysicalIntRegs;
 
-        assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+        assert(reg_idx < numPhysicalFloatRegs);
 
         DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                 int(reg_idx), (uint64_t)val);
@@ -189,7 +189,7 @@ class PhysRegFile
         // Remove the base Float reg dependency.
         reg_idx = reg_idx - numPhysicalIntRegs;
 
-        assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+        assert(reg_idx < numPhysicalFloatRegs);
 
         DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                 int(reg_idx), (uint64_t)val);
@@ -204,7 +204,7 @@ class PhysRegFile
         // Remove the base Float reg dependency.
         reg_idx = reg_idx - numPhysicalIntRegs;
 
-        assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+        assert(reg_idx < numPhysicalFloatRegs);
 
         DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                 int(reg_idx), (uint64_t)val);
@@ -217,7 +217,7 @@ class PhysRegFile
         // Remove the base Float reg dependency.
         reg_idx = reg_idx - numPhysicalIntRegs;
 
-        assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+        assert(reg_idx < numPhysicalFloatRegs);
 
         DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                 int(reg_idx), (uint64_t)val);
@@ -232,11 +232,11 @@ class PhysRegFile
 
     MiscReg readMiscReg(int misc_reg, unsigned thread_id)
     {
-        return miscRegs[thread_id].readReg(misc_reg,
-                                                     cpu->tcBase(thread_id));
+        return miscRegs[thread_id].readReg(misc_reg, cpu->tcBase(thread_id));
     }
 
-    void setMiscRegNoEffect(int misc_reg, const MiscReg &val, unsigned thread_id)
+    void setMiscRegNoEffect(int misc_reg,
+            const MiscReg &val, unsigned thread_id)
     {
         miscRegs[thread_id].setRegNoEffect(misc_reg, val);
     }
@@ -268,9 +268,6 @@ class PhysRegFile
     O3CPU *cpu;
 
   public:
-    /** Sets the CPU pointer. */
-    void setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; }
-
     /** Number of physical integer registers. */
     unsigned numPhysicalIntRegs;
     /** Number of physical floating point registers. */
@@ -278,9 +275,9 @@ class PhysRegFile
 };
 
 template <class Impl>
-PhysRegFile<Impl>::PhysRegFile(unsigned _numPhysicalIntRegs,
+PhysRegFile<Impl>::PhysRegFile(O3CPU *_cpu, unsigned _numPhysicalIntRegs,
                                unsigned _numPhysicalFloatRegs)
-    : numPhysicalIntRegs(_numPhysicalIntRegs),
+    : cpu(_cpu), numPhysicalIntRegs(_numPhysicalIntRegs),
       numPhysicalFloatRegs(_numPhysicalFloatRegs)
 {
     intRegFile = new IntReg[numPhysicalIntRegs];
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 6b4628f92..b2faffe43 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -107,7 +107,7 @@ class DefaultRename
 
   public:
     /** DefaultRename constructor. */
-    DefaultRename(Params *params);
+    DefaultRename(O3CPU *_cpu, Params *params);
 
     /** Returns the name of rename. */
     std::string name() const;
@@ -115,9 +115,6 @@ class DefaultRename
     /** Registers statistics. */
     void regStats();
 
-    /** Sets CPU pointer. */
-    void setCPU(O3CPU *cpu_ptr);
-
     /** Sets the main backwards communication time buffer pointer. */
     void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
 
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index e303f1cee..431705e19 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -37,8 +37,9 @@
 #include "cpu/o3/rename.hh"
 
 template <class Impl>
-DefaultRename<Impl>::DefaultRename(Params *params)
-    : iewToRenameDelay(params->iewToRenameDelay),
+DefaultRename<Impl>::DefaultRename(O3CPU *_cpu, Params *params)
+    : cpu(_cpu),
+      iewToRenameDelay(params->iewToRenameDelay),
       decodeToRenameDelay(params->decodeToRenameDelay),
       commitToRenameDelay(params->commitToRenameDelay),
       renameWidth(params->renameWidth),
@@ -166,17 +167,8 @@ DefaultRename<Impl>::regStats()
 
 template <class Impl>
 void
-DefaultRename<Impl>::setCPU(O3CPU *cpu_ptr)
-{
-    DPRINTF(Rename, "Setting CPU pointer.\n");
-    cpu = cpu_ptr;
-}
-
-template <class Impl>
-void
 DefaultRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(Rename, "Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to read information from time buffer, from IEW stage.
@@ -193,7 +185,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
 {
-    DPRINTF(Rename, "Setting rename queue pointer.\n");
     renameQueue = rq_ptr;
 
     // Setup wire to write information to future stages.
@@ -204,7 +195,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
 {
-    DPRINTF(Rename, "Setting decode queue pointer.\n");
     decodeQueue = dq_ptr;
 
     // Setup wire to get information from decode.
@@ -228,7 +218,6 @@ template<class Impl>
 void
 DefaultRename<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Rename, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -237,8 +226,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setRenameMap(RenameMap rm_ptr[])
 {
-    DPRINTF(Rename, "Setting rename map pointers.\n");
-
     for (int i=0; i<numThreads; i++) {
         renameMap[i] = &rm_ptr[i];
     }
@@ -248,7 +235,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setFreeList(FreeList *fl_ptr)
 {
-    DPRINTF(Rename, "Setting free list pointer.\n");
     freeList = fl_ptr;
 }
 
@@ -256,7 +242,6 @@ template<class Impl>
 void
 DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
 {
-    DPRINTF(Rename, "Setting scoreboard pointer.\n");
     scoreboard = _scoreboard;
 }
 
@@ -707,7 +692,7 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
             DPRINTF(Rename, "Blocking due to lack of free "
                     "physical registers to rename to.\n");
             blockThisCycle = true;
-
+            insts_to_rename.push_front(inst);
             ++renameFullRegistersEvents;
 
             break;
@@ -1011,7 +996,12 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
         if (src_reg < TheISA::FP_Base_DepTag) {
             flat_src_reg = TheISA::flattenIntIndex(inst->tcBase(), src_reg);
             DPRINTF(Rename, "Flattening index %d to %d.\n", (int)src_reg, (int)flat_src_reg);
+        } else {
+            // Floating point and Miscellaneous registers need their indexes
+            // adjusted to account for the expanded number of flattened int regs.
+            flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
         }
+
         inst->flattenSrcReg(src_idx, flat_src_reg);
 
         // Look up the source registers to get the phys. register they've
@@ -1048,8 +1038,13 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
         RegIndex dest_reg = inst->destRegIdx(dest_idx);
         RegIndex flat_dest_reg = dest_reg;
         if (dest_reg < TheISA::FP_Base_DepTag) {
+            // Integer registers are flattened.
             flat_dest_reg = TheISA::flattenIntIndex(inst->tcBase(), dest_reg);
             DPRINTF(Rename, "Flattening index %d to %d.\n", (int)dest_reg, (int)flat_dest_reg);
+        } else {
+            // Floating point and Miscellaneous registers need their indexes
+            // adjusted to account for the expanded number of flattened int regs.
+            flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
         }
 
         inst->flattenDestReg(dest_idx, flat_dest_reg);
diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh
index 7cd5a5143..00329abb0 100644
--- a/src/cpu/o3/rob.hh
+++ b/src/cpu/o3/rob.hh
@@ -82,17 +82,12 @@ class ROB
      *  @param _smtROBThreshold Max Resources(by %) a thread can have in the ROB.
      *  @param _numThreads      The number of active threads.
      */
-    ROB(unsigned _numEntries, unsigned _squashWidth, std::string smtROBPolicy,
-        unsigned _smtROBThreshold, unsigned _numThreads);
+    ROB(O3CPU *_cpu, unsigned _numEntries, unsigned _squashWidth,
+        std::string smtROBPolicy, unsigned _smtROBThreshold,
+        unsigned _numThreads);
 
     std::string name() const;
 
-    /** Function to set the CPU pointer, necessary due to which object the ROB
-     *  is created within.
-     *  @param cpu_ptr Pointer to the implementation specific full CPU object.
-     */
-    void setCPU(O3CPU *cpu_ptr);
-
     /** Sets pointer to the list of active threads.
      *  @param at_ptr Pointer to the list of active threads.
      */
diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh
index fde636754..7ff3aa274 100644
--- a/src/cpu/o3/rob_impl.hh
+++ b/src/cpu/o3/rob_impl.hh
@@ -35,10 +35,11 @@
 #include <list>
 
 template <class Impl>
-ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
+ROB<Impl>::ROB(O3CPU *_cpu, unsigned _numEntries, unsigned _squashWidth,
                std::string _smtROBPolicy, unsigned _smtROBThreshold,
                unsigned _numThreads)
-    : numEntries(_numEntries),
+    : cpu(_cpu),
+      numEntries(_numEntries),
       squashWidth(_squashWidth),
       numInstsInROB(0),
       numThreads(_numThreads)
@@ -90,20 +91,6 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
         assert(0 && "Invalid ROB Sharing Policy.Options Are:{Dynamic,"
                     "Partitioned, Threshold}");
     }
-}
-
-template <class Impl>
-std::string
-ROB<Impl>::name() const
-{
-    return cpu->name() + ".rob";
-}
-
-template <class Impl>
-void
-ROB<Impl>::setCPU(O3CPU *cpu_ptr)
-{
-    cpu = cpu_ptr;
 
     // Set the per-thread iterators to the end of the instruction list.
     for (int i=0; i < numThreads;i++) {
@@ -117,6 +104,13 @@ ROB<Impl>::setCPU(O3CPU *cpu_ptr)
 }
 
 template <class Impl>
+std::string
+ROB<Impl>::name() const
+{
+    return cpu->name() + ".rob";
+}
+
+template <class Impl>
 void
 ROB<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
diff --git a/src/cpu/o3/sparc/cpu_builder.cc b/src/cpu/o3/sparc/cpu_builder.cc
index 3cac89bad..35badce2c 100644
--- a/src/cpu/o3/sparc/cpu_builder.cc
+++ b/src/cpu/o3/sparc/cpu_builder.cc
@@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
     Param<int> clock;
     Param<int> phase;
     Param<int> numThreads;
+    Param<int> cpu_id;
     Param<int> activity;
 
 #if FULL_SYSTEM
     SimObjectParam<System *> system;
-    Param<int> cpu_id;
     SimObjectParam<SparcISA::ITB *> itb;
     SimObjectParam<SparcISA::DTB *> dtb;
     Param<Tick> profile;
@@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
     INIT_PARAM(clock, "clock speed"),
     INIT_PARAM_DFLT(phase, "clock phase", 0),
     INIT_PARAM(numThreads, "number of HW thread contexts"),
+    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM_DFLT(activity, "Initial activity count", 0),
 
 #if FULL_SYSTEM
     INIT_PARAM(system, "System object"),
-    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(itb, "Instruction translation buffer"),
     INIT_PARAM(dtb, "Data translation buffer"),
     INIT_PARAM(profile, ""),
@@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU)
     SparcSimpleParams *params = new SparcSimpleParams;
 
     params->clock = clock;
+    params->phase = phase;
 
     params->name = getInstanceName();
     params->numberOfThreads = actual_num_threads;
+    params->cpu_id = cpu_id;
     params->activity = activity;
 
 #if FULL_SYSTEM
     params->system = system;
-    params->cpu_id = cpu_id;
     params->itb = itb;
     params->dtb = dtb;
     params->profile = profile;
diff --git a/src/cpu/o3/sparc/cpu_impl.hh b/src/cpu/o3/sparc/cpu_impl.hh
index a425a8a56..50d980f55 100644
--- a/src/cpu/o3/sparc/cpu_impl.hh
+++ b/src/cpu/o3/sparc/cpu_impl.hh
@@ -55,7 +55,7 @@
 #endif
 
 template <class Impl>
-SparcO3CPU<Impl>::SparcO3CPU(Params *params) : FullO3CPU<Impl>(params)
+SparcO3CPU<Impl>::SparcO3CPU(Params *params) : FullO3CPU<Impl>(this, params)
 {
     DPRINTF(O3CPU, "Creating SparcO3CPU object.\n");
 
@@ -113,6 +113,7 @@ SparcO3CPU<Impl>::SparcO3CPU(Params *params) : FullO3CPU<Impl>(params)
 #endif
         // Give the thread the TC.
         this->thread[i]->tc = tc;
+        this->thread[i]->setCpuId(params->cpu_id);
 
         // Add the TC to the CPU's list of TC's.
         this->threadContexts.push_back(tc);
@@ -122,17 +123,6 @@ SparcO3CPU<Impl>::SparcO3CPU(Params *params) : FullO3CPU<Impl>(params)
         this->thread[i]->setFuncExeInst(0);
     }
 
-    // Sets CPU pointers. These must be set at this level because the CPU
-    // pointers are defined to be the highest level of CPU class.
-    this->fetch.setCPU(this);
-    this->decode.setCPU(this);
-    this->rename.setCPU(this);
-    this->iew.setCPU(this);
-    this->commit.setCPU(this);
-
-    this->rob.setCPU(this);
-    this->regFile.setCPU(this);
-
     lockAddr = 0;
     lockFlag = false;
 }
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 6f69b5ac4..b0a01c3a3 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -557,7 +557,7 @@ AtomicSimpleCPU::tick()
             }
 
         }
-        if(predecoder.needMoreBytes())
+        if(predecoder.needMoreBytes() || fault != NoFault)
             advancePC(fault);
     }
 
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 6e6ba2380..b0636ecc2 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -52,9 +52,19 @@ Bus::getPort(const std::string &if_name, int idx)
         } else
             fatal("Default port already set\n");
     }
+    int id;
+    if (if_name == "functional") {
+        if (!funcPort) {
+            id = maxId++;
+            funcPort = new BusPort(csprintf("%s-p%d-func", name(), id), this, id);
+            funcPortId = id;
+            interfaces[id] = funcPort;
+        }
+        return funcPort;
+    }
 
     // if_name ignored?  forced to be empty?
-    int id = maxId++;
+    id = maxId++;
     assert(maxId < std::numeric_limits<typeof(maxId)>::max());
     BusPort *bp = new BusPort(csprintf("%s-p%d", name(), id), this, id);
     interfaces[id] = bp;
@@ -64,10 +74,15 @@ Bus::getPort(const std::string &if_name, int idx)
 void
 Bus::deletePortRefs(Port *p)
 {
+
     BusPort *bp =  dynamic_cast<BusPort*>(p);
     if (bp == NULL)
         panic("Couldn't convert Port* to BusPort*\n");
+    // If this is our one functional port
+    if (funcPort == bp)
+        return;
     interfaces.erase(bp->getId());
+    delete bp;
 }
 
 /** Get the ranges of anyone other buses that we are connected to. */
@@ -520,7 +535,7 @@ Bus::recvStatusChange(Port::Status status, int id)
     m5::hash_map<short,BusPort*>::iterator intIter;
 
     for (intIter = interfaces.begin(); intIter != interfaces.end(); intIter++)
-        if (intIter->first != id)
+        if (intIter->first != id && intIter->first != funcPortId)
             intIter->second->sendStatusChange(Port::RangeChange);
 
     if (id != defaultId && defaultPort)
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index 6706b6c77..0dd7547c5 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -63,6 +63,7 @@ class Bus : public MemObject
 
     Event * drainEvent;
 
+
     static const int defaultId = -3; //Make it unique from Broadcast
 
     struct DevMap {
@@ -249,6 +250,9 @@ class Bus : public MemObject
     /** Port that handles requests that don't match any of the interfaces.*/
     BusPort *defaultPort;
 
+    BusPort *funcPort;
+    int funcPortId;
+
     /** Has the user specified their own default responder? */
     bool responderSet;
 
@@ -266,7 +270,8 @@ class Bus : public MemObject
         bool responder_set)
         : MemObject(n), busId(bus_id), clock(_clock), width(_width),
           tickNextIdle(0), drainEvent(NULL), busIdle(this), inRetry(false),
-          maxId(0), defaultPort(NULL), responderSet(responder_set)
+          maxId(0), defaultPort(NULL), funcPort(NULL), funcPortId(-4),
+          responderSet(responder_set)
     {
         //Both the width and clock period must be positive
         if (width <= 0)
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index ec0ef1be4..0a528aa5d 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1183,7 +1183,8 @@ Cache<TagStore,Coherence>::deletePortRefs(Port *p)
 {
     if (cpuSidePort == p || memSidePort == p)
         panic("Can only delete functional ports\n");
-    // nothing else to do
+
+    delete p;
 }
 
 
diff --git a/src/mem/port.cc b/src/mem/port.cc
index e75e50e4d..e6ea773f2 100644
--- a/src/mem/port.cc
+++ b/src/mem/port.cc
@@ -51,7 +51,6 @@ Port::removeConn()
 {
     if (peer->getOwner())
         peer->getOwner()->deletePortRefs(peer);
-    delete peer;
     peer = NULL;
 }
 
diff --git a/src/python/SConscript b/src/python/SConscript
index 6662c8a45..e1095eabe 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -30,129 +30,91 @@
 #          Nathan Binkert
 
 import os
-import zipfile
-
-# handy function for path joins
-def join(*args):
-    return os.path.normpath(os.path.join(*args))
-
 Import('*')
 
-# This SConscript is in charge of collecting .py files and generating
-# a zip archive that is appended to the m5 binary.
-
-# List of files & directories to include in the zip file.  To include
-# a package, list only the root directory of the package, not any
-# internal .py files (else they will get the path stripped off when
-# they are imported into the zip file).
-pyzip_files = []
-
-# List of additional files on which the zip archive depends, but which
-# are not included in pyzip_files... i.e. individual .py files within
-# a package.
-pyzip_dep_files = []
-
-# Add the specified package to the zip archive.  Adds the directory to
-# pyzip_files and all included .py files to pyzip_dep_files.
-def addPkg(pkgdir):
-    pyzip_files.append(pkgdir)
-    origdir = os.getcwd()
-    srcdir = join(Dir('.').srcnode().abspath, pkgdir)
-    os.chdir(srcdir)
-    for path, dirs, files in os.walk('.'):
-        for i,dir in enumerate(dirs):
-            if dir == 'SCCS':
-                del dirs[i]
-                break
-
-        for f in files:
-            if f.endswith('.py'):
-                pyzip_dep_files.append(join(pkgdir, path, f))
-
-    os.chdir(origdir)
-
-# Generate Python file that contains a dict specifying the current
-# build_env flags.
-def MakeDefinesPyFile(target, source, env):
-    f = file(str(target[0]), 'w')
-    print >>f, "m5_build_env = ", source[0]
-    f.close()
-
-optionDict = dict([(opt, env[opt]) for opt in env.ExportOptions])
-env.Command('m5/defines.py', Value(optionDict), MakeDefinesPyFile)
-
-def MakeInfoPyFile(target, source, env):
-    f = file(str(target[0]), 'w')
-    for src in source:
-        data = ''.join(file(src.srcnode().abspath, 'r').xreadlines())
-        print >>f, "%s = %s" % (src, repr(data))
-    f.close()
-
-env.Command('m5/info.py',
-            [ '#/AUTHORS', '#/LICENSE', '#/README', '#/RELEASE_NOTES' ],
-            MakeInfoPyFile)
-
-# Now specify the packages & files for the zip archive.
-addPkg('m5')
-pyzip_files.append('m5/defines.py')
-pyzip_files.append('m5/info.py')
-pyzip_files.append(join(env['ROOT'], 'util/pbs/jobfile.py'))
-pyzip_files.append(join(env['ROOT'], 'src/base/traceflags.py'))
-
-swig_modules = []
-def swig_it(module):
-    env.Command(['swig/%s_wrap.cc' % module, 'm5/internal/%s.py' % module],
-                'swig/%s.i' % module,
-                '$SWIG $SWIGFLAGS -outdir ${TARGETS[1].dir} '
-                '-o ${TARGETS[0]} $SOURCES')
-    swig_modules.append(module)
-    Source('swig/%s_wrap.cc' % module)
-
 Source('swig/init.cc')
 Source('swig/pyevent.cc')
 Source('swig/pyobject.cc')
 
-swig_it('core')
-swig_it('debug')
-swig_it('event')
-swig_it('random')
-swig_it('sim_object')
-swig_it('stats')
-swig_it('trace')
-
-# Automatically generate m5/internals/__init__.py
-def MakeInternalsInit(target, source, env):
-    f = file(str(target[0]), 'w')
-    for m in swig_modules:
-        print >>f, 'import %s' % m
-    f.close()
-
-swig_py_files = [ 'm5/internal/%s.py' % m for m in swig_modules ]
-env.Command('m5/internal/__init__.py', swig_py_files, MakeInternalsInit)
-pyzip_dep_files.append('m5/internal/__init__.py')
-
-def MakeSwigInit(target, source, env):
-    f = file(str(target[0]), 'w')
-    print >>f, 'extern "C" {'
-    for m in swig_modules:
-        print >>f, '    void init_%s();' % m
-    print >>f, '}'
-    print >>f, 'void init_swig() {'
-    for m in swig_modules:
-        print >>f, '    init_%s();' % m
-    print >>f, '}'
-    f.close()
-
-swig_cc_files = [ 'swig/%s_wrap.cc' % m for m in swig_modules ]
-env.Command('swig/init.cc', swig_cc_files, MakeSwigInit)
-
-# Action function to build the zip archive.  Uses the PyZipFile module
-# included in the standard Python library.
-def buildPyZip(target, source, env):
-    pzf = zipfile.PyZipFile(str(target[0]), 'w')
-    for s in source:
-        pzf.writepy(str(s))
-
-# Add the zip file target to the environment.
-env.Command('m5py.zip', pyzip_files, buildPyZip)
-env.Depends('m5py.zip', pyzip_dep_files)
+PySource('m5', 'm5/__init__.py')
+PySource('m5', 'm5/SimObject.py')
+PySource('m5', 'm5/attrdict.py')
+PySource('m5', 'm5/convert.py')
+PySource('m5', 'm5/event.py')
+PySource('m5', 'm5/main.py')
+PySource('m5', 'm5/multidict.py')
+PySource('m5', 'm5/params.py')
+PySource('m5', 'm5/proxy.py')
+PySource('m5', 'm5/smartdict.py')
+PySource('m5', 'm5/stats.py')
+PySource('m5', 'm5/ticks.py')
+PySource('m5', 'm5/util.py')
+
+PySource('m5', os.path.join(env['ROOT'], 'util/pbs/jobfile.py'))
+
+SwigSource('m5.internal', 'swig/core.i')
+SwigSource('m5.internal', 'swig/debug.i')
+SwigSource('m5.internal', 'swig/event.i')
+SwigSource('m5.internal', 'swig/random.i')
+SwigSource('m5.internal', 'swig/sim_object.i')
+SwigSource('m5.internal', 'swig/stats.i')
+SwigSource('m5.internal', 'swig/trace.i')
+PySource('m5.internal', 'm5/internal/__init__.py')
+
+SimObject('m5/objects/AlphaConsole.py')
+SimObject('m5/objects/AlphaTLB.py')
+SimObject('m5/objects/BadDevice.py')
+SimObject('m5/objects/BaseCPU.py')
+SimObject('m5/objects/BaseCache.py')
+SimObject('m5/objects/BaseHier.py')
+SimObject('m5/objects/BaseMem.py')
+SimObject('m5/objects/BaseMemory.py')
+SimObject('m5/objects/BranchPred.py')
+SimObject('m5/objects/Bridge.py')
+SimObject('m5/objects/Bus.py')
+SimObject('m5/objects/Checker.py')
+SimObject('m5/objects/CoherenceProtocol.py')
+SimObject('m5/objects/DRAMMemory.py')
+SimObject('m5/objects/Device.py')
+SimObject('m5/objects/DiskImage.py')
+SimObject('m5/objects/Ethernet.py')
+SimObject('m5/objects/FUPool.py')
+SimObject('m5/objects/FastCPU.py')
+#SimObject('m5/objects/FreebsdSystem.py')
+SimObject('m5/objects/FullCPU.py')
+SimObject('m5/objects/FuncUnit.py')
+SimObject('m5/objects/FuncUnitConfig.py')
+SimObject('m5/objects/FunctionalMemory.py')
+SimObject('m5/objects/HierParams.py')
+SimObject('m5/objects/Ide.py')
+SimObject('m5/objects/IntrControl.py')
+SimObject('m5/objects/LinuxSystem.py')
+SimObject('m5/objects/MainMemory.py')
+SimObject('m5/objects/MemObject.py')
+SimObject('m5/objects/MemTest.py')
+SimObject('m5/objects/MemoryController.py')
+SimObject('m5/objects/O3CPU.py')
+SimObject('m5/objects/OzoneCPU.py')
+SimObject('m5/objects/Pci.py')
+SimObject('m5/objects/PhysicalMemory.py')
+SimObject('m5/objects/PipeTrace.py')
+SimObject('m5/objects/Platform.py')
+SimObject('m5/objects/Process.py')
+SimObject('m5/objects/Repl.py')
+SimObject('m5/objects/Root.py')
+SimObject('m5/objects/Sampler.py')
+SimObject('m5/objects/Scsi.py')
+SimObject('m5/objects/SimConsole.py')
+SimObject('m5/objects/SimpleCPU.py')
+SimObject('m5/objects/SimpleDisk.py')
+#SimObject('m5/objects/SimpleOzoneCPU.py')
+SimObject('m5/objects/SparcTLB.py')
+SimObject('m5/objects/System.py')
+SimObject('m5/objects/T1000.py')
+#SimObject('m5/objects/Tru64System.py')
+SimObject('m5/objects/Tsunami.py')
+SimObject('m5/objects/Uart.py')
+
+if env['ALPHA_TLASER']:
+    SimObject('m5/objects/DmaEngine.py')
+    SimObject('m5/objects/Turbolaser.py')
diff --git a/src/python/m5/internal/__init__.py b/src/python/m5/internal/__init__.py
new file mode 100644
index 000000000..6b7859cd7
--- /dev/null
+++ b/src/python/m5/internal/__init__.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+import core
+import debug
+import event
+import random
+import sim_object
+import stats
+import trace
diff --git a/src/python/m5/params.py b/src/python/m5/params.py
index 9892df97c..da7ddd65e 100644
--- a/src/python/m5/params.py
+++ b/src/python/m5/params.py
@@ -51,6 +51,7 @@ import sys
 import time
 
 import convert
+import proxy
 import ticks
 from util import *
 
@@ -477,12 +478,13 @@ def IncEthernetAddr(addr, val = 1):
     assert(bytes[0] <= 255)
     return ':'.join(map(lambda x: '%02x' % x, bytes))
 
-class NextEthernetAddr(object):
-    addr = "00:90:00:00:00:01"
+_NextEthernetAddr = "00:90:00:00:00:01"
+def NextEthernetAddr():
+    global _NextEthernetAddr
 
-    def __init__(self, inc = 1):
-        self.value = NextEthernetAddr.addr
-        NextEthernetAddr.addr = IncEthernetAddr(NextEthernetAddr.addr, inc)
+    value = _NextEthernetAddr
+    _NextEthernetAddr = IncEthernetAddr(_NextEthernetAddr, 1)
+    return value
 
 class EthernetAddr(ParamValue):
     cxx_type = 'Net::EthAddr'
@@ -508,17 +510,11 @@ class EthernetAddr(ParamValue):
 
     def unproxy(self, base):
         if self.value == NextEthernetAddr:
-            self.addr = self.value().value
+            return EthernetAddr(self.value())
         return self
 
-    def __str__(self):
-        if self.value == NextEthernetAddr:
-            if hasattr(self, 'addr'):
-                return self.addr
-            else:
-                return "NextEthernetAddr (unresolved)"
-        else:
-            return self.value
+    def ini_str(self):
+        return self.value
 
 time_formats = [ "%a %b %d %H:%M:%S %Z %Y",
                  "%a %b %d %H:%M:%S %Z %Y",
@@ -1028,6 +1024,5 @@ __all__ = ['Param', 'VectorParam',
 
 # see comment on imports at end of __init__.py.
 from SimObject import isSimObject, isSimObjectSequence, isSimObjectClass
-import proxy
 import objects
 import internal
diff --git a/src/sim/process.cc b/src/sim/process.cc
index 2b283c9d1..68239fa52 100644
--- a/src/sim/process.cc
+++ b/src/sim/process.cc
@@ -157,12 +157,12 @@ Process::registerThreadContext(ThreadContext *tc)
     int myIndex = threadContexts.size();
     threadContexts.push_back(tc);
 
-    RemoteGDB *rgdb = new RemoteGDB(system, tc);
-    GDBListener *gdbl = new GDBListener(rgdb, 7000 + myIndex);
-    gdbl->listen();
+//    RemoteGDB *rgdb = new RemoteGDB(system, tc);
+//    GDBListener *gdbl = new GDBListener(rgdb, 7000 + myIndex);
+//    gdbl->listen();
     //gdbl->accept();
 
-    remoteGDB.push_back(rgdb);
+//    remoteGDB.push_back(rgdb);
 
     // return CPU number to caller
     return myIndex;