83 files changed, 3672 insertions, 925 deletions
diff --git a/SConstruct b/SConstruct
index 50089700a..dac4d137c 100644
--- a/SConstruct
+++ b/SConstruct
@@ -347,7 +347,10 @@ sticky_opts.AddOptions(
     ('CC', 'C compiler', os.environ.get('CC', env['CC'])),
     ('CXX', 'C++ compiler', os.environ.get('CXX', env['CXX'])),
     BoolOption('BATCH', 'Use batch pool for build and tests', False),
-    ('BATCH_CMD', 'Batch pool submission command name', 'qdo')
+    ('BATCH_CMD', 'Batch pool submission command name', 'qdo'),
+    ('PYTHONHOME',
+     'Override the default PYTHONHOME for this system (use with caution)',
+     '%s:%s' % (sys.prefix, sys.exec_prefix))
     )
 
 # Non-sticky options only apply to the current build.
@@ -359,7 +362,7 @@ nonsticky_opts.AddOptions(
 # These options get exported to #defines in config/*.hh (see src/SConscript).
 env.ExportOptions = ['FULL_SYSTEM', 'ALPHA_TLASER', 'USE_FENV', \
                      'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP', \
-                     'USE_CHECKER']
+                     'USE_CHECKER', 'PYTHONHOME']
 
 # Define a handy 'no-op' action
 def no_action(target, source, env):
@@ -399,8 +402,13 @@ def config_emitter(target, source, env):
     option = str(target[0])
     # True target is config header file
     target = os.path.join('config', option.lower() + '.hh')
-    # Force value to 0/1 even if it's a Python bool
-    val = int(eval(str(env[option])))
+    val = env[option]
+    if isinstance(val, bool):
+        # Force value to 0/1
+        val = int(val)
+    elif isinstance(val, str):
+        val = '"' + val + '"'
+        
     # Sources are option name & value (packaged in SCons Value nodes)
     return ([target], [Value(option), Value(val)])
 
diff --git a/configs/boot/mutex-test.rcS b/configs/boot/mutex-test.rcS
new file mode 100644
index 000000000..acf875368
--- /dev/null
+++ b/configs/boot/mutex-test.rcS
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+cd /benchmarks/tests
+/sbin/m5 resetstats 
+./pthread_mutex_test 4 10000
+/sbin/m5 exit
diff --git a/configs/common/Benchmarks.py b/configs/common/Benchmarks.py
index 1f272517a..eda0e80f9 100644
--- a/configs/common/Benchmarks.py
+++ b/configs/common/Benchmarks.py
@@ -97,6 +97,8 @@ Benchmarks = {
     'ValStreamScale':	[SysConfig('micro_streamscale.rcS', '512MB')],
     'ValStreamCopy':	[SysConfig('micro_streamcopy.rcS', '512MB')],
 
+    'MutexTest':        [SysConfig('mutex-test.rcS', '128MB')],
+
     'bnAn': [SysConfig('/z/saidi/work/m5.newmem.head/configs/boot/bn-app.rcS',
                        '128MB', '/z/saidi/work/bottleneck/bnimg.img')]
 }
diff --git a/configs/example/fs.py b/configs/example/fs.py
index a5b8772af..a9daf63be 100644
--- a/configs/example/fs.py
+++ b/configs/example/fs.py
@@ -40,26 +40,49 @@ if not m5.build_env['FULL_SYSTEM']:
 
 parser = optparse.OptionParser()
 
-parser.add_option("-d", "--detailed", action="store_true")
-parser.add_option("-t", "--timing", action="store_true")
-parser.add_option("-n", "--num_cpus", type="int", default=1)
-parser.add_option("--caches", action="store_true")
-parser.add_option("-m", "--maxtick", type="int")
-parser.add_option("--maxtime", type="float")
+# Benchmark options
 parser.add_option("--dual", action="store_true",
                   help="Simulate two systems attached with an ethernet link")
 parser.add_option("-b", "--benchmark", action="store", type="string",
                   dest="benchmark",
                   help="Specify the benchmark to run. Available benchmarks: %s"\
                   % DefinedBenchmarks)
+
+# system options
+parser.add_option("-d", "--detailed", action="store_true")
+parser.add_option("-t", "--timing", action="store_true")
+parser.add_option("-n", "--num_cpus", type="int", default=1)
+parser.add_option("--caches", action="store_true")
+
+# Run duration options
+parser.add_option("-m", "--maxtick", type="int")
+parser.add_option("--maxtime", type="float")
+
+# Metafile options
 parser.add_option("--etherdump", action="store", type="string", dest="etherdump",
                   help="Specify the filename to dump a pcap capture of the" \
                   "ethernet traffic")
+
+# Checkpointing options
+###Note that performing checkpointing via python script files will override
+###checkpoint instructions built into binaries.
+parser.add_option("--take_checkpoints", action="store", type="string",
+                  help="<M,N> will take checkpoint at cycle M and every N cycles \
+                  thereafter")
+parser.add_option("--max_checkpoints", action="store", type="int",
+                  help="the maximum number of checkpoints to drop",
+                  default=5)
 parser.add_option("--checkpoint_dir", action="store", type="string",
                   help="Place all checkpoints in this absolute directory")
-parser.add_option("-c", "--checkpoint", action="store", type="int",
+parser.add_option("-r", "--checkpoint_restore", action="store", type="int",
                   help="restore from checkpoint <N>")
 
+# CPU Switching - default switch model goes from a checkpoint
+# to a timing simple CPU with caches to warm up, then to detailed CPU for
+# data measurement
+parser.add_option("-s", "--standard_switch", action="store_true",
+                  help="switch from one cpu mode to another")
+
 (options, args) = parser.parse_args()
 
 if args:
@@ -74,23 +97,24 @@ class MyCache(BaseCache):
     tgts_per_mshr = 5
     protocol = CoherenceProtocol(protocol='moesi')
 
-# client system CPU is always simple... note this is an assignment of
+# driver system CPU is always simple... note this is an assignment of
 # a class, not an instance.
-ClientCPUClass = AtomicSimpleCPU
-client_mem_mode = 'atomic'
+DriveCPUClass = AtomicSimpleCPU
+drive_mem_mode = 'atomic'
 
+# system under test can be any of these CPUs
 if options.detailed:
-    ServerCPUClass = DerivO3CPU
-    server_mem_mode = 'timing'
+    TestCPUClass = DerivO3CPU
+    test_mem_mode = 'timing'
 elif options.timing:
-    ServerCPUClass = TimingSimpleCPU
-    server_mem_mode = 'timing'
+    TestCPUClass = TimingSimpleCPU
+    test_mem_mode = 'timing'
 else:
-    ServerCPUClass = AtomicSimpleCPU
-    server_mem_mode = 'atomic'
+    TestCPUClass = AtomicSimpleCPU
+    test_mem_mode = 'atomic'
 
-ServerCPUClass.clock = '2GHz'
-ClientCPUClass.clock = '2GHz'
+TestCPUClass.clock = '2GHz'
+DriveCPUClass.clock = '2GHz'
 
 if options.benchmark:
     try:
@@ -105,38 +129,59 @@ else:
     else:
         bm = [SysConfig()]
 
-server_sys = makeLinuxAlphaSystem(server_mem_mode, bm[0])
+test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0])
 np = options.num_cpus
-server_sys.cpu = [ServerCPUClass(cpu_id=i) for i in xrange(np)]
+test_sys.cpu = [TestCPUClass(cpu_id=i) for i in xrange(np)]
 for i in xrange(np):
-    if options.caches:
-        server_sys.cpu[i].addPrivateSplitL1Caches(MyCache(size = '32kB'),
+    if options.caches and not options.standard_switch:
+        test_sys.cpu[i].addPrivateSplitL1Caches(MyCache(size = '32kB'),
                                                   MyCache(size = '64kB'))
-    server_sys.cpu[i].connectMemPorts(server_sys.membus)
-    server_sys.cpu[i].mem = server_sys.physmem
+    test_sys.cpu[i].connectMemPorts(test_sys.membus)
+    test_sys.cpu[i].mem = test_sys.physmem
 
 if len(bm) == 2:
-    client_sys = makeLinuxAlphaSystem(client_mem_mode, bm[1])
-    client_sys.cpu = ClientCPUClass(cpu_id=0)
-    client_sys.cpu.connectMemPorts(client_sys.membus)
-    client_sys.cpu.mem = client_sys.physmem
-    root = makeDualRoot(server_sys, client_sys, options.etherdump)
+    drive_sys = makeLinuxAlphaSystem(drive_mem_mode, bm[1])
+    drive_sys.cpu = DriveCPUClass(cpu_id=0)
+    drive_sys.cpu.connectMemPorts(drive_sys.membus)
+    drive_sys.cpu.mem = drive_sys.physmem
+    root = makeDualRoot(test_sys, drive_sys, options.etherdump)
 elif len(bm) == 1:
-    root = Root(clock = '1THz', system = server_sys)
+    root = Root(clock = '1THz', system = test_sys)
 else:
     print "Error I don't know how to create more than 2 systems."
     sys.exit(1)
 
+if options.standard_switch:
+    switch_cpus = [TimingSimpleCPU(defer_registration=True, cpu_id=(np+i) for i in xrange(np))]
+    switch_cpus1 = [DerivO3CPU(defer_registration=True, cpu_id=(2*np+i) for i in xrange(np))]
+    for i in xrange(np):
+        switch_cpus[i].system =  test_sys
+        switch_cpus1[i].system =  test_sys
+        switch_cpus[i].clock = TestCPUClass.clock
+        switch_cpus1[i].clock = TestCPUClass.clock
+        if options.caches:
+            switch_cpus[i].addPrivateSplitL1Caches(MyCache(size = '32kB'),
+                                                    MyCache(size = '64kB'))
+
+        switch_cpus[i].mem = test_sys.physmem
+        switch_cpus1[i].mem = test_sys.physmem
+        switch_cpus[i].connectMemPorts(test_sys.membus)
+        root.switch_cpus = switch_cpus
+        root.switch_cpus1 = switch_cpus1
+        switch_cpu_list = [(test_sys.cpu[i], switch_cpus[i]) for i in xrange(np)]
+        switch_cpu_list1 = [(switch_cpus[i], switch_cpus1[i]) for i in xrange(np)]
+
 m5.instantiate(root)
 
-if options.checkpoint:
+if options.checkpoint_dir:
+    cptdir = options.checkpoint_dir
+else:
+    cptdir = getcwd()
+
+if options.checkpoint_restore:
     from os.path import isdir
     from os import listdir, getcwd
     import re
-    if options.checkpoint_dir:
-        cptdir = options.checkpoint_dir
-    else:
-        cptdir = getcwd()
 
     if not isdir(cptdir):
         m5.panic("checkpoint dir %s does not exist!" % cptdir)
@@ -149,10 +194,26 @@ if options.checkpoint:
         if match:
             cpts.append(match.group(1))
 
-    if options.checkpoint > len(cpts):
-        m5.panic('Checkpoint %d not found' % options.checkpoint)
+    cpts.sort(lambda a,b: cmp(long(a), long(b)))
 
-    m5.restoreCheckpoint(root, "/".join([cptdir, "cpt.%s" % cpts[options.checkpoint - 1]]))
+    if options.checkpoint_restore > len(cpts):
+        m5.panic('Checkpoint %d not found' % options.checkpoint_restore)
+
+    m5.restoreCheckpoint(root, "/".join([cptdir, "cpt.%s" % cpts[options.checkpoint_restore - 1]]))
+
+if options.standard_switch:
+    exit_event = m5.simulate(1000)
+    ## when you change to Timing (or Atomic), you halt the system given
+    ## as argument.  When you are finished with the system changes
+    ## (including switchCpus), you must resume the system manually.
+    ## You DON'T need to resume after just switching CPUs if you haven't
+    ## changed anything on the system level.
+    m5.changeToTiming(test_sys)
+    m5.switchCpus(switch_cpu_list)
+    m5.resume(test_sys)
+
+    exit_event = m5.simulate(500000000000)
+    m5.switchCpus(switch_cpu_list1)
 
 if options.maxtick:
     maxtick = options.maxtick
@@ -163,17 +224,56 @@ elif options.maxtime:
 else:
     maxtick = -1
 
-exit_event = m5.simulate(maxtick)
+num_checkpoints = 0
 
-while exit_event.getCause() == "checkpoint":
-    if options.checkpoint_dir:
-        m5.checkpoint(root, "/".join([options.checkpoint_dir, "cpt.%d"]))
-    else:
-        m5.checkpoint(root, "cpt.%d")
+exit_cause = ''
 
-    if maxtick == -1:
-        exit_event = m5.simulate(maxtick)
-    else:
-        exit_event = m5.simulate(maxtick - m5.curTick())
+if options.take_checkpoints:
+    [when, period] = options.take_checkpoints.split(",", 1)
+    when = int(when)
+    period = int(period)
+
+    exit_event = m5.simulate(when)
+    while exit_event.getCause() == "checkpoint":
+        exit_event = m5.simulate(when - m5.curTick())
+
+    if exit_event.getCause() == "simulate() limit reached":
+        m5.checkpoint(root, cptdir + "cpt.%d")
+        num_checkpoints += 1
+
+    sim_ticks = when
+    exit_cause = "maximum %d checkpoints dropped" % options.max_checkpoints
+    while num_checkpoints < options.max_checkpoints:
+        if (sim_ticks + period) > maxtick and maxtick != -1:
+            exit_event = m5.simulate(maxtick - sim_ticks)
+            exit_cause = exit_event.getCause()
+            break
+        else:
+            exit_event = m5.simulate(period)
+            sim_ticks += period
+            while exit_event.getCause() == "checkpoint":
+                exit_event = m5.simulate(period - m5.curTick())
+            if exit_event.getCause() == "simulate() limit reached":
+                m5.checkpoint(root, cptdir + "cpt.%d")
+                num_checkpoints += 1
+
+else: #no checkpoints being taken via this script
+    exit_event = m5.simulate(maxtick)
+
+    while exit_event.getCause() == "checkpoint":
+        m5.checkpoint(root, cptdir + "cpt.%d")
+        num_checkpoints += 1
+        if num_checkpoints == options.max_checkpoints:
+            exit_cause =  "maximum %d checkpoints dropped" % options.max_checkpoints
+            break
+
+        if maxtick == -1:
+            exit_event = m5.simulate(maxtick)
+        else:
+            exit_event = m5.simulate(maxtick - m5.curTick())
+
+        exit_cause = exit_event.getCause()
 
-print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause()
+if exit_cause == '':
+    exit_cause = exit_event.getCause()
+print 'Exiting @ cycle', m5.curTick(), 'because ', exit_cause
diff --git a/configs/example/memtest.py b/configs/example/memtest.py
new file mode 100644
index 000000000..141ecfd8e
--- /dev/null
+++ b/configs/example/memtest.py
@@ -0,0 +1,138 @@
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Ron Dreslinski
+
+import m5
+from m5.objects import *
+import os, optparse, sys
+m5.AddToPath('../common')
+
+parser = optparse.OptionParser()
+
+parser.add_option("--caches", action="store_true")
+parser.add_option("-t", "--timing", action="store_true")
+parser.add_option("-m", "--maxtick", type="int")
+parser.add_option("-l", "--maxloads", default = "1000000000000", type="int")
+parser.add_option("-n", "--numtesters", default = "8", type="int")
+parser.add_option("-p", "--protocol",
+                  default="moesi",
+                  help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)")
+
+(options, args) = parser.parse_args()
+
+if args:
+     print "Error: script doesn't take any positional arguments"
+     sys.exit(1)
+
+# --------------------
+# Base L1 Cache
+# ====================
+
+class L1(BaseCache):
+    latency = 1
+    block_size = 64
+    mshrs = 12
+    tgts_per_mshr = 8
+    protocol = CoherenceProtocol(protocol=options.protocol)
+
+# ----------------------
+# Base L2 Cache
+# ----------------------
+
+class L2(BaseCache):
+    block_size = 64
+    latency = 10
+    mshrs = 92
+    tgts_per_mshr = 16
+    write_buffers = 8
+
+#MAX CORES IS 8 with the false sharing method
+if options.numtesters > 8:
+     print "Error: NUmber of testers limited to 8 because of false sharing"
+     sys,exit(1)
+
+if options.timing:
+     cpus = [ MemTest(atomic=False, max_loads=options.maxloads, percent_functional=50,
+                      percent_uncacheable=10, progress_interval=1000)
+              for i in xrange(options.numtesters) ]
+else:
+     cpus = [ MemTest(atomic=True, max_loads=options.maxloads, percent_functional=50,
+                      percent_uncacheable=10, progress_interval=1000)
+              for i in xrange(options.numtesters) ]
+# system simulated
+system = System(cpu = cpus, funcmem = PhysicalMemory(),
+                physmem = PhysicalMemory(latency = "50ps"), membus = Bus(clock="500GHz", width=16))
+
+# l2cache & bus
+if options.caches:
+    system.toL2Bus = Bus(clock="500GHz", width=16)
+    system.l2c = L2(size='64kB', assoc=8)
+    system.l2c.cpu_side = system.toL2Bus.port
+
+    # connect l2c to membus
+    system.l2c.mem_side = system.membus.port
+
+which_port = 0
+# add L1 caches
+for cpu in cpus:
+    if options.caches:
+         cpu.l1c = L1(size = '32kB', assoc = 4)
+         cpu.test = cpu.l1c.cpu_side
+         cpu.l1c.mem_side = system.toL2Bus.port
+    else:
+         cpu.test = system.membus.port
+    if  which_port == 0:
+         system.funcmem.port = cpu.functional
+         which_port = 1
+    else:
+         system.funcmem.functional = cpu.functional
+
+
+# connect memory to membus
+system.physmem.port = system.membus.port
+
+
+# -----------------------
+# run simulation
+# -----------------------
+
+root = Root( system = system )
+if options.timing:
+    root.system.mem_mode = 'timing'
+else:
+    root.system.mem_mode = 'atomic'
+
+# instantiate configuration
+m5.instantiate(root)
+
+# simulate until program terminates
+if options.maxtick:
+    exit_event = m5.simulate(options.maxtick)
+else:
+    exit_event = m5.simulate()
+
+print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()
diff --git a/configs/example/se.py b/configs/example/se.py
index 6a941b9da..2e63e27da 100644
--- a/configs/example/se.py
+++ b/configs/example/se.py
@@ -37,6 +37,7 @@ m5.AddToPath('../common')
 
 parser = optparse.OptionParser()
 
+# Benchmark options
 parser.add_option("-c", "--cmd",
                   default="../../tests/test-progs/hello/bin/alpha/linux/hello",
                   help="The binary to run in syscall emulation mode.")
@@ -45,9 +46,35 @@ parser.add_option("-o", "--options", default="",
                         string.")
 parser.add_option("-i", "--input", default="",
                   help="A file of input to give to the binary.")
+
+# System options
 parser.add_option("-d", "--detailed", action="store_true")
 parser.add_option("-t", "--timing", action="store_true")
+parser.add_option("--caches", action="store_true")
+
+# Run duration options
 parser.add_option("-m", "--maxtick", type="int")
+parser.add_option("--maxtime", type="float")
+
+#Checkpointing options
+###Note that performing checkpointing via python script files will override
+###checkpoint instructions built into binaries.
+parser.add_option("--take_checkpoints", action="store", type="string",
+                  help="<M,N> will take checkpoint at cycle M and every N cycles \
+                  thereafter")
+parser.add_option("--max_checkpoints", action="store", type="int",
+                  help="the maximum number of checkpoints to drop",
+                  default=5)
+parser.add_option("--checkpoint_dir", action="store", type="string",
+                  help="Place all checkpoints in this absolute directory")
+parser.add_option("-r", "--checkpoint_restore", action="store", type="int",
+                  help="restore from checkpoint <N>")
+
+#CPU Switching - default switch model generally goes from a checkpoint
+#to a timing simple CPU with caches to warm up, then to detailed CPU for
+#data measurement
+parser.add_option("-s", "--standard_switch", action="store_true",
+                  help="switch from one cpu mode to another")
 
 (options, args) = parser.parse_args()
 
@@ -55,6 +82,13 @@ if args:
     print "Error: script doesn't take any positional arguments"
     sys.exit(1)
 
+class MyCache(BaseCache):
+    assoc = 2
+    block_size = 64
+    latency = 1
+    mshrs = 10
+    tgts_per_mshr = 5
+
 process = LiveProcess()
 process.executable = options.cmd
 process.cmd = options.cmd + " " + options.options
@@ -93,25 +127,149 @@ cpu.workload = process
 cpu.cpu_id = 0
 
 system = System(cpu = cpu,
-                physmem = PhysicalMemory(),
+                physmem = PhysicalMemory(range=AddrRange("512MB")),
                 membus = Bus())
 system.physmem.port = system.membus.port
 system.cpu.connectMemPorts(system.membus)
 system.cpu.mem = system.physmem
+system.cpu.clock = '2GHz'
+if options.caches and not options.standard_switch:
+    system.cpu.addPrivateSplitL1Caches(MyCache(size = '32kB'),
+                                       MyCache(size = '64kB'))
 
 root = Root(system = system)
 
 if options.timing or options.detailed:
     root.system.mem_mode = 'timing'
 
+if options.standard_switch:
+    switch_cpu = TimingSimpleCPU(defer_registration=True, cpu_id=1)
+    switch_cpu1 = DerivO3CPU(defer_registration=True, cpu_id=2)
+    switch_cpu.system =  system
+    switch_cpu1.system =  system
+    switch_cpu.clock = cpu.clock
+    switch_cpu1.clock = cpu.clock
+    if options.caches:
+        switch_cpu.addPrivateSplitL1Caches(MyCache(size = '32kB'),
+                                           MyCache(size = '64kB'))
+
+    switch_cpu.workload = process
+    switch_cpu1.workload = process
+    switch_cpu.mem = system.physmem
+    switch_cpu1.mem = system.physmem
+    switch_cpu.connectMemPorts(system.membus)
+    root.switch_cpu = switch_cpu
+    root.switch_cpu1 = switch_cpu1
+    switch_cpu_list = [(system.cpu, switch_cpu)]
+    switch_cpu_list1 = [(switch_cpu, switch_cpu1)]
+
 # instantiate configuration
 m5.instantiate(root)
 
-# simulate until program terminates
+if options.checkpoint_dir:
+    cptdir = options.checkpoint_dir
+else:
+    cptdir = getcwd()
+
+if options.checkpoint_restore:
+    from os.path import isdir
+    from os import listdir, getcwd
+    import re
+
+    if not isdir(cptdir):
+        m5.panic("checkpoint dir %s does not exist!" % cptdir)
+
+    dirs = listdir(cptdir)
+    expr = re.compile('cpt.([0-9]*)')
+    cpts = []
+    for dir in dirs:
+        match = expr.match(dir)
+        if match:
+            cpts.append(match.group(1))
+
+    cpts.sort(lambda a,b: cmp(long(a), long(b)))
+
+    if options.checkpoint_restore > len(cpts):
+        m5.panic('Checkpoint %d not found' % options.checkpoint_restore)
+
+    print "restoring checkpoint from ","/".join([cptdir, "cpt.%s" % cpts[options.checkpoint_restore - 1]])
+    m5.restoreCheckpoint(root, "/".join([cptdir, "cpt.%s" % cpts[options.checkpoint_restore - 1]]))
+
+if options.standard_switch:
+    exit_event = m5.simulate(10000)
+    ## when you change to Timing (or Atomic), you halt the system given
+    ## as argument.  When you are finished with the system changes
+    ## (including switchCpus), you must resume the system manually.
+    ## You DON'T need to resume after just switching CPUs if you haven't
+    ## changed anything on the system level.
+    m5.changeToTiming(system)
+    m5.switchCpus(switch_cpu_list)
+    m5.resume(system)
+
+    exit_event = m5.simulate(500000000000)
+    m5.switchCpus(switch_cpu_list1)
+
 if options.maxtick:
-    exit_event = m5.simulate(options.maxtick)
+    maxtick = options.maxtick
+elif options.maxtime:
+    simtime = int(options.maxtime * root.clock.value)
+    print "simulating for: ", simtime
+    maxtick = simtime
 else:
-    exit_event = m5.simulate()
+    maxtick = -1
+
+num_checkpoints = 0
+
+exit_cause = ''
+
+if options.take_checkpoints:
+    [when, period] = options.take_checkpoints.split(",", 1)
+    when = int(when)
+    period = int(period)
+
+    exit_event = m5.simulate(when)
+    while exit_event.getCause() == "checkpoint":
+        exit_event = m5.simulate(when - m5.curTick())
+
+    if exit_event.getCause() == "simulate() limit reached":
+        m5.checkpoint(root, cptdir + "cpt.%d")
+        num_checkpoints += 1
+
+    sim_ticks = when
+    exit_cause = "maximum %d checkpoints dropped" % options.max_checkpoints
+    while num_checkpoints < options.max_checkpoints:
+        if (sim_ticks + period) > maxtick and maxtick != -1:
+            exit_event = m5.simulate(maxtick - sim_ticks)
+            exit_cause = exit_event.getCause()
+            break
+        else:
+            exit_event = m5.simulate(period)
+            sim_ticks += period
+            while exit_event.getCause() == "checkpoint":
+                exit_event = m5.simulate(period - m5.curTick())
+            if exit_event.getCause() == "simulate() limit reached":
+                m5.checkpoint(root, cptdir + "cpt.%d")
+                num_checkpoints += 1
+
+else: #no checkpoints being taken via this script
+    exit_event = m5.simulate(maxtick)
+
+    while exit_event.getCause() == "checkpoint":
+        m5.checkpoint(root, cptdir + "cpt.%d")
+        num_checkpoints += 1
+        if num_checkpoints == options.max_checkpoints:
+            exit_cause =  "maximum %d checkpoints dropped" % options.max_checkpoints
+            break
+
+        if maxtick == -1:
+            exit_event = m5.simulate(maxtick)
+        else:
+            exit_event = m5.simulate(maxtick - m5.curTick())
+
+        exit_cause = exit_event.getCause()
+
+if exit_cause == '':
+    exit_cause = exit_event.getCause()
+print 'Exiting @ cycle', m5.curTick(), 'because ', exit_cause
 
-print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()
 
diff --git a/configs/splash2/run.py b/configs/splash2/run.py
index ebbe14939..93b166d77 100644
--- a/configs/splash2/run.py
+++ b/configs/splash2/run.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2005 The Regents of The University of Michigan
+# Copyright (c) 2005-2006 The Regents of The University of Michigan
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -26,54 +26,243 @@
 #
 # Authors: Ron Dreslinski
 
-from m5 import *
-import Splash2
-
-if 'SYSTEM' not in env:
-    panic("The SYSTEM environment variable must be set!\ne.g -ESYSTEM=Detailed\n")
-
-if env['SYSTEM'] == 'Simple':
-    from SimpleConfig import *
-    BaseCPU.workload = Parent.workload
-    SimpleStandAlone.cpu = [ CPU() for i in xrange(int(env['NP'])) ]
-    root = SimpleStandAlone()
-elif env['SYSTEM'] == 'Detailed':
-    from DetailedConfig import *
-    BaseCPU.workload = Parent.workload
-    DetailedStandAlone.cpu = [ DetailedCPU() for i in xrange(int(env['NP'])) ]
-    root = DetailedStandAlone()
+# Splash2 Run Script
+#
+
+import m5
+from m5.objects import *
+import os, optparse, sys
+m5.AddToPath('../common')
+
+# --------------------
+# Define Command Line Options
+# ====================
+
+parser = optparse.OptionParser()
+
+parser.add_option("-d", "--detailed", action="store_true")
+parser.add_option("-t", "--timing", action="store_true")
+parser.add_option("-m", "--maxtick", type="int")
+parser.add_option("-n", "--numcpus",
+                  help="Number of cpus in total", type="int")
+parser.add_option("-f", "--frequency",
+                  default = "1GHz",
+                  help="Frequency of each CPU")
+parser.add_option("-p", "--protocol",
+                  default="moesi",
+                  help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)")
+parser.add_option("--l1size",
+                  default = "32kB")
+parser.add_option("--l1latency",
+                  default = 1)
+parser.add_option("--l2size",
+                  default = "256kB")
+parser.add_option("--l2latency",
+                  default = 10)
+parser.add_option("--rootdir",
+                  help="ROot directory of Splash2",
+                  default="/dist/splash2/codes")
+parser.add_option("-b", "--benchmark",
+                  help="Splash 2 benchmark to run")
+
+(options, args) = parser.parse_args()
+
+if args:
+    print "Error: script doesn't take any positional arguments"
+    sys.exit(1)
+
+if not options.numcpus:
+    print "Specify the number of cpus with -n"
+    sys.exit(1)
+
+# --------------------
+# Define Splash2 Benchmarks
+# ====================
+class Cholesky(LiveProcess):
+        executable = options.rootdir + '/kernels/cholesky/CHOLESKY'
+        cmd = 'CHOLESKY -p' + str(options.numcpus) + ' '\
+             + options.rootdir + '/kernels/cholesky/inputs/tk23.O'
+
+class FFT(LiveProcess):
+        executable = options.rootdir + '/kernels/fft/FFT'
+        cmd = 'FFT -p' + str(options.numcpus) + ' -m18'
+
+class LU_contig(LiveProcess):
+        executable = options.rootdir + '/kernels/lu/contiguous_blocks/LU'
+        cmd = 'LU -p' + str(options.numcpus)
+
+class LU_noncontig(LiveProcess):
+        executable = options.rootdir + '/kernels/lu/non_contiguous_blocks/LU'
+        cmd = 'LU -p' + str(options.numcpus)
+
+class Radix(LiveProcess):
+        executable = options.rootdir + '/kernels/radix/RADIX'
+        cmd = 'RADIX -n524288 -p' + str(options.numcpus)
+
+class Barnes(LiveProcess):
+        executable = options.rootdir + '/apps/barnes/BARNES'
+        cmd = 'BARNES'
+        input = options.rootdir + '/apps/barnes/input.p' + str(options.numcpus)
+
+class FMM(LiveProcess):
+        executable = options.rootdir + '/apps/fmm/FMM'
+        cmd = 'FMM'
+        input = options.rootdir + '/apps/fmm/inputs/input.2048.p' + str(options.numcpus)
+
+class Ocean_contig(LiveProcess):
+        executable = options.rootdir + '/apps/ocean/contiguous_partitions/OCEAN'
+        cmd = 'OCEAN -p' + str(options.numcpus)
+
+class Ocean_noncontig(LiveProcess):
+        executable = options.rootdir + '/apps/ocean/non_contiguous_partitions/OCEAN'
+        cmd = 'OCEAN -p' + str(options.numcpus)
+
+class Raytrace(LiveProcess):
+        executable = options.rootdir + '/apps/raytrace/RAYTRACE'
+        cmd = 'RAYTRACE -p' + str(options.numcpus) + ' ' \
+             + options.rootdir + 'apps/raytrace/inputs/teapot.env'
+
+class Water_nsquared(LiveProcess):
+        executable = options.rootdir + '/apps/water-nsquared/WATER-NSQUARED'
+        cmd = 'WATER-NSQUARED'
+        input = options.rootdir + '/apps/water-nsquared/input.p' + str(options.numcpus)
+
+class Water_spatial(LiveProcess):
+        executable = options.rootdir + '/apps/water-spatial/WATER-SPATIAL'
+        cmd = 'WATER-SPATIAL'
+        input = options.rootdir + '/apps/water-spatial/input.p' + str(options.numcpus)
+
+
+# --------------------
+# Base L1 Cache Definition
+# ====================
+
+class L1(BaseCache):
+    latency = options.l1latency
+    block_size = 64
+    mshrs = 12
+    tgts_per_mshr = 8
+    protocol = CoherenceProtocol(protocol=options.protocol)
+
+# ----------------------
+# Base L2 Cache Definition
+# ----------------------
+
+class L2(BaseCache):
+    block_size = 64
+    latency = options.l2latency
+    mshrs = 92
+    tgts_per_mshr = 16
+    write_buffers = 8
+
+# ----------------------
+# Define the cpus
+# ----------------------
+
+busFrequency = Frequency(options.frequency)
+
+if options.timing:
+    cpus = [TimingSimpleCPU(cpu_id = i,
+                            clock=options.frequency)
+            for i in xrange(options.numcpus)]
+elif options.detailed:
+    cpus = [DerivO3CPU(cpu_id = i,
+                       clock=options.frequency)
+            for i in xrange(options.numcpus)]
 else:
-    panic("The SYSTEM environment variable was set to something improper.\n Use Simple or Detailed\n")
-
-if 'BENCHMARK' not in env:
-        panic("The BENCHMARK environment variable must be set!\ne.g. -EBENCHMARK=Cholesky\n")
-
-if env['BENCHMARK'] == 'Cholesky':
-    root.workload = Splash2.Cholesky()
-elif env['BENCHMARK'] == 'FFT':
-    root.workload = Splash2.FFT()
-elif env['BENCHMARK'] == 'LUContig':
-    root.workload = Splash2.LU_contig()
-elif env['BENCHMARK'] == 'LUNoncontig':
-    root.workload = Splash2.LU_noncontig()
-elif env['BENCHMARK'] == 'Radix':
-    root.workload = Splash2.Radix()
-elif env['BENCHMARK'] == 'Barnes':
-    root.workload = Splash2.Barnes()
-elif env['BENCHMARK'] == 'FMM':
-    root.workload = Splash2.FMM()
-elif env['BENCHMARK'] == 'OceanContig':
-    root.workload = Splash2.Ocean_contig()
-elif env['BENCHMARK'] == 'OceanNoncontig':
-    root.workload = Splash2.Ocean_noncontig()
-elif env['BENCHMARK'] == 'Raytrace':
-    root.workload = Splash2.Raytrace()
-elif env['BENCHMARK'] == 'WaterNSquared':
-    root.workload = Splash2.Water_nsquared()
-elif env['BENCHMARK'] == 'WaterSpatial':
-    root.workload = Splash2.Water_spatial()
+    cpus = [AtomicSimpleCPU(cpu_id = i,
+                            clock=options.frequency)
+            for i in xrange(options.numcpus)]
+
+# ----------------------
+# Create a system, and add system wide objects
+# ----------------------
+system = System(cpu = cpus, physmem = PhysicalMemory(),
+                membus = Bus(clock = busFrequency))
+
+system.toL2bus = Bus(clock = busFrequency)
+system.l2 = L2(size = options.l2size, assoc = 8)
+
+# ----------------------
+# Connect the L2 cache and memory together
+# ----------------------
+
+system.physmem.port = system.membus.port
+system.l2.cpu_side = system.toL2bus.port
+system.l2.mem_side = system.membus.port
+
+# ----------------------
+# Connect the L2 cache and clusters together
+# ----------------------
+for cpu in cpus:
+    cpu.addPrivateSplitL1Caches(L1(size = options.l1size, assoc = 1),
+                                L1(size = options.l1size, assoc = 4))
+    cpu.mem = cpu.dcache
+    # connect cpu level-1 caches to shared level-2 cache
+    cpu.connectMemPorts(system.toL2bus)
+
+
+# ----------------------
+# Define the root
+# ----------------------
+
+root = Root(system = system)
+
+# --------------------
+# Pick the correct Splash2 Benchmarks
+# ====================
+if options.benchmark == 'Cholesky':
+    root.workload = Cholesky()
+elif options.benchmark == 'FFT':
+    root.workload = FFT()
+elif options.benchmark == 'LUContig':
+    root.workload = LU_contig()
+elif options.benchmark == 'LUNoncontig':
+    root.workload = LU_noncontig()
+elif options.benchmark == 'Radix':
+    root.workload = Radix()
+elif options.benchmark == 'Barnes':
+    root.workload = Barnes()
+elif options.benchmark == 'FMM':
+    root.workload = FMM()
+elif options.benchmark == 'OceanContig':
+    root.workload = Ocean_contig()
+elif options.benchmark == 'OceanNoncontig':
+    root.workload = Ocean_noncontig()
+elif options.benchmark == 'Raytrace':
+    root.workload = Raytrace()
+elif options.benchmark == 'WaterNSquared':
+    root.workload = Water_nsquared()
+elif options.benchmark == 'WaterSpatial':
+    root.workload = Water_spatial()
 else:
-    panic("The BENCHMARK environment variable was set to something" \
+    panic("The --benchmark environment variable was set to something" \
           +" improper.\nUse Cholesky, FFT, LUContig, LUNoncontig, Radix" \
           +", Barnes, FMM, OceanContig,\nOceanNoncontig, Raytrace," \
           +" WaterNSquared, or WaterSpatial\n")
+
+# --------------------
+# Assign the workload to the cpus
+# ====================
+
+for cpu in cpus:
+    cpu.workload = root.workload
+
+# ----------------------
+# Run the simulation
+# ----------------------
+
+if options.timing or options.detailed:
+    root.system.mem_mode = 'timing'
+
+# instantiate configuration
+m5.instantiate(root)
+
+# simulate until program terminates
+if options.maxtick:
+    exit_event = m5.simulate(options.maxtick)
+else:
+    exit_event = m5.simulate()
+
+print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()
+
diff --git a/src/arch/alpha/faults.cc b/src/arch/alpha/faults.cc
index eef4361fd..7179bf025 100644
--- a/src/arch/alpha/faults.cc
+++ b/src/arch/alpha/faults.cc
@@ -194,7 +194,8 @@ void PageTableFault::invoke(ThreadContext *tc)
 
     // We've accessed the next page
     if (vaddr > p->stack_min - PageBytes) {
-        warn("Increasing stack %#x:%#x to %#x:%#x because of access to %#x",
+        DPRINTF(Stack,
+                "Increasing stack %#x:%#x to %#x:%#x because of access to %#x",
                 p->stack_min, p->stack_base, p->stack_min - PageBytes,
                 p->stack_base, vaddr);
         p->stack_min -= PageBytes;
@@ -202,6 +203,7 @@ void PageTableFault::invoke(ThreadContext *tc)
             fatal("Over max stack size for one thread\n");
         p->pTable->allocate(p->stack_min, PageBytes);
     } else {
+        warn("Page fault on address %#x\n", vaddr);
         FaultBase::invoke(tc);
     }
 }
diff --git a/src/arch/alpha/utility.hh b/src/arch/alpha/utility.hh
index d3ccc0444..0304d1c3a 100644
--- a/src/arch/alpha/utility.hh
+++ b/src/arch/alpha/utility.hh
@@ -37,16 +37,17 @@
 #include "arch/alpha/isa_traits.hh"
 #include "arch/alpha/regfile.hh"
 #include "base/misc.hh"
+#include "cpu/thread_context.hh"
 
 namespace AlphaISA
 {
 
     static inline ExtMachInst
-    makeExtMI(MachInst inst, const uint64_t &pc) {
+    makeExtMI(MachInst inst, ThreadContext * xc) {
 #if FULL_SYSTEM
         ExtMachInst ext_inst = inst;
-        if (pc && 0x1)
-            return ext_inst|=(static_cast<ExtMachInst>(pc & 0x1) << 32);
+        if (xc->readPC() && 0x1)
+            return ext_inst|=(static_cast<ExtMachInst>(xc->readPC() & 0x1) << 32);
         else
             return ext_inst;
 #else
diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py
index 4d522e18a..b235398f1 100755
--- a/src/arch/isa_parser.py
+++ b/src/arch/isa_parser.py
@@ -1636,7 +1636,7 @@ opClassRE = re.compile(r'.*Op|No_OpClass')
 
 class InstObjParams:
     def __init__(self, mnem, class_name, base_class = '',
-                 code = None, opt_args = [], *extras):
+                 code = None, opt_args = [], extras = {}):
         self.mnemonic = mnem
         self.class_name = class_name
         self.base_class = base_class
@@ -1648,13 +1648,23 @@ class InstObjParams:
             else:
                 origCode = code
                 codeBlock = CodeBlock(code)
-            compositeCode = '\n'.join([origCode] +
-                    [pair[1] for pair in extras])
+            stringExtras = {}
+            otherExtras = {}
+            for (k, v) in extras.items():
+                if type(v) == str:
+                    stringExtras[k] = v
+                else:
+                    otherExtras[k] = v
+            compositeCode = "\n".join([origCode] + stringExtras.values())
+            # compositeCode = '\n'.join([origCode] +
+            #	    [pair[1] for pair in extras])
             compositeBlock = CodeBlock(compositeCode)
             for code_attr in compositeBlock.__dict__.keys():
                 setattr(self, code_attr, getattr(compositeBlock, code_attr))
-            for (key, snippet) in extras:
+            for (key, snippet) in stringExtras.items():
                 setattr(self, key, CodeBlock(snippet).code)
+            for (key, item) in otherExtras.items():
+                setattr(self, key, item)
             self.code = codeBlock.code
             self.orig_code = origCode
         else:
diff --git a/src/arch/mips/utility.hh b/src/arch/mips/utility.hh
index ae2fe1aea..56689ba4d 100644
--- a/src/arch/mips/utility.hh
+++ b/src/arch/mips/utility.hh
@@ -35,6 +35,7 @@
 #include "arch/mips/types.hh"
 #include "base/misc.hh"
 #include "config/full_system.hh"
+#include "cpu/thread_context.hh"
 //XXX This is needed for size_t. We should use something other than size_t
 //#include "kern/linux/linux.hh"
 #include "sim/host.hh"
@@ -86,11 +87,11 @@ namespace MipsISA {
     }
 
     static inline ExtMachInst
-    makeExtMI(MachInst inst, const uint64_t &pc) {
+    makeExtMI(MachInst inst, ThreadContext * xc) {
 #if FULL_SYSTEM
         ExtMachInst ext_inst = inst;
-        if (pc && 0x1)
-            return ext_inst|=(static_cast<ExtMachInst>(pc & 0x1) << 32);
+        if (xc->readPC() && 0x1)
+            return ext_inst|=(static_cast<ExtMachInst>(xc->readPC() & 0x1) << 32);
         else
             return ext_inst;
 #else
diff --git a/src/arch/sparc/SConscript b/src/arch/sparc/SConscript
index 66f2b57e0..e317502e0 100644
--- a/src/arch/sparc/SConscript
+++ b/src/arch/sparc/SConscript
@@ -44,6 +44,7 @@ Import('env')
 
 # Base sources used by all configurations.
 base_sources = Split('''
+	asi.cc
 	faults.cc
 	floatregfile.cc
 	intregfile.cc
diff --git a/src/arch/sparc/asi.cc b/src/arch/sparc/asi.cc
new file mode 100644
index 000000000..00c9e041e
--- /dev/null
+++ b/src/arch/sparc/asi.cc
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/sparc/asi.hh"
+
+namespace SparcISA
+{
+    bool AsiIsBlock(ASI asi)
+    {
+        return
+            (asi == ASI_BLK_AIUP) ||
+            (asi == ASI_BLK_AIUS) ||
+            (asi == ASI_BLK_AIUPL) ||
+            (asi == ASI_BLK_AIUSL) ||
+            (asi == ASI_BLK_P) ||
+            (asi == ASI_BLK_S) ||
+            (asi == ASI_BLK_PL) ||
+            (asi == ASI_BLK_SL);
+    }
+
+    bool AsiIsPrimary(ASI asi)
+    {
+        return
+            (asi == ASI_AIUP) ||
+            (asi == ASI_BLK_AIUP) ||
+            (asi == ASI_AIUPL) ||
+            (asi == ASI_BLK_AIUPL) ||
+            (asi == ASI_LDTX_AIUP) ||
+            (asi == ASI_LDTX_AIUPL) ||
+            (asi == ASI_P) ||
+            (asi == ASI_PNF) ||
+            (asi == ASI_PL) ||
+            (asi == ASI_PNFL) ||
+            (asi == ASI_PST8_P) ||
+            (asi == ASI_PST16_P) ||
+            (asi == ASI_PST32_P) ||
+            (asi == ASI_PST8_PL) ||
+            (asi == ASI_PST16_PL) ||
+            (asi == ASI_PST32_PL) ||
+            (asi == ASI_FL8_P) ||
+            (asi == ASI_FL16_P) ||
+            (asi == ASI_FL8_PL) ||
+            (asi == ASI_FL16_PL) ||
+            (asi == ASI_LDTX_P) ||
+            (asi == ASI_LDTX_PL) ||
+            (asi == ASI_BLK_P) ||
+            (asi == ASI_BLK_PL);
+    }
+
+    bool AsiIsSecondary(ASI asi)
+    {
+        return
+            (asi == ASI_AIUS) ||
+            (asi == ASI_BLK_AIUS) ||
+            (asi == ASI_AIUSL) ||
+            (asi == ASI_BLK_AIUSL) ||
+            (asi == ASI_LDTX_AIUS) ||
+            (asi == ASI_LDTX_AIUSL) ||
+            (asi == ASI_S) ||
+            (asi == ASI_SNF) ||
+            (asi == ASI_SL) ||
+            (asi == ASI_SNFL) ||
+            (asi == ASI_PST8_S) ||
+            (asi == ASI_PST16_S) ||
+            (asi == ASI_PST32_S) ||
+            (asi == ASI_PST8_SL) ||
+            (asi == ASI_PST16_SL) ||
+            (asi == ASI_PST32_SL) ||
+            (asi == ASI_FL8_S) ||
+            (asi == ASI_FL16_S) ||
+            (asi == ASI_FL8_SL) ||
+            (asi == ASI_FL16_SL) ||
+            (asi == ASI_LDTX_S) ||
+            (asi == ASI_LDTX_SL) ||
+            (asi == ASI_BLK_S) ||
+            (asi == ASI_BLK_SL);
+    }
+
+    bool AsiNucleus(ASI asi)
+    {
+        return
+            (asi == ASI_N) ||
+            (asi == ASI_NL) ||
+            (asi == ASI_LDTX_N) ||
+            (asi == ASI_LDTX_NL);
+    }
+
+    bool AsiIsAsIfUser(ASI asi)
+    {
+        return
+            (asi == ASI_AIUP) ||
+            (asi == ASI_AIUS) ||
+            (asi == ASI_BLK_AIUP) ||
+            (asi == ASI_BLK_AIUS) ||
+            (asi == ASI_AIUPL) ||
+            (asi == ASI_AIUSL) ||
+            (asi == ASI_BLK_AIUPL) ||
+            (asi == ASI_BLK_AIUSL) ||
+            (asi == ASI_LDTX_AIUP) ||
+            (asi == ASI_LDTX_AIUS) ||
+            (asi == ASI_LDTX_AIUPL) ||
+            (asi == ASI_LDTX_AIUSL);
+    }
+
+    bool AsiIsIO(ASI asi)
+    {
+        return
+            (asi == ASI_REAL_IO) ||
+            (asi == ASI_REAL_IO_L);
+    }
+
+    bool AsiIsReal(ASI asi)
+    {
+        return
+            (asi == ASI_REAL) ||
+            (asi == ASI_REAL_IO) ||
+            (asi == ASI_REAL_L) ||
+            (asi == ASI_REAL_IO_L) ||
+            (asi == ASI_LDTX_REAL) ||
+            (asi == ASI_LDTX_REAL_L) ||
+            (asi == ASI_MMU_REAL);
+    }
+
+    bool AsiIsLittle(ASI asi)
+    {
+        return
+            (asi == ASI_NL) ||
+            (asi == ASI_AIUPL) ||
+            (asi == ASI_AIUSL) ||
+            (asi == ASI_REAL_L) ||
+            (asi == ASI_REAL_IO_L) ||
+            (asi == ASI_BLK_AIUPL) ||
+            (asi == ASI_BLK_AIUSL) ||
+            (asi == ASI_LDTX_AIUPL) ||
+            (asi == ASI_LDTX_AIUSL) ||
+            (asi == ASI_LDTX_REAL_L) ||
+            (asi == ASI_LDTX_NL) ||
+            (asi == ASI_PL) ||
+            (asi == ASI_SL) ||
+            (asi == ASI_PNFL) ||
+            (asi == ASI_SNFL) ||
+            (asi == ASI_PST8_PL) ||
+            (asi == ASI_PST8_SL) ||
+            (asi == ASI_PST16_PL) ||
+            (asi == ASI_PST16_SL) ||
+            (asi == ASI_PST32_PL) ||
+            (asi == ASI_PST32_SL) ||
+            (asi == ASI_FL8_PL) ||
+            (asi == ASI_FL8_SL) ||
+            (asi == ASI_FL16_PL) ||
+            (asi == ASI_FL16_SL) ||
+            (asi == ASI_LDTX_PL) ||
+            (asi == ASI_LDTX_SL) ||
+            (asi == ASI_BLK_PL) ||
+            (asi == ASI_BLK_SL);
+    }
+
+    bool AsiIsTwin(ASI asi)
+    {
+        return
+            (asi == ASI_LDTX_AIUP) ||
+            (asi == ASI_LDTX_AIUS) ||
+            (asi == ASI_LDTX_REAL) ||
+            (asi == ASI_LDTX_N) ||
+            (asi == ASI_LDTX_AIUPL) ||
+            (asi == ASI_LDTX_AIUSL) ||
+            (asi == ASI_LDTX_REAL_L) ||
+            (asi == ASI_LDTX_NL) ||
+            (asi == ASI_LDTX_P) ||
+            (asi == ASI_LDTX_S) ||
+            (asi == ASI_LDTX_PL) ||
+            (asi == ASI_LDTX_SL);
+    }
+
+    bool AsiIsPartialStore(ASI asi)
+    {
+        return
+            (asi == ASI_PST8_P) ||
+            (asi == ASI_PST8_S) ||
+            (asi == ASI_PST16_P) ||
+            (asi == ASI_PST16_S) ||
+            (asi == ASI_PST32_P) ||
+            (asi == ASI_PST32_S) ||
+            (asi == ASI_PST8_PL) ||
+            (asi == ASI_PST8_SL) ||
+            (asi == ASI_PST16_PL) ||
+            (asi == ASI_PST16_SL) ||
+            (asi == ASI_PST32_PL) ||
+            (asi == ASI_PST32_SL);
+    }
+
+    bool AsiIsFloatingLoad(ASI asi)
+    {
+        return
+            (asi == ASI_FL8_P) ||
+            (asi == ASI_FL8_S) ||
+            (asi == ASI_FL16_P) ||
+            (asi == ASI_FL16_S) ||
+            (asi == ASI_FL8_PL) ||
+            (asi == ASI_FL8_SL) ||
+            (asi == ASI_FL16_PL) ||
+            (asi == ASI_FL16_SL);
+    }
+
+    bool AsiIsNoFault(ASI asi)
+    {
+        return
+            (asi == ASI_PNF) ||
+            (asi == ASI_SNF) ||
+            (asi == ASI_PNFL) ||
+            (asi == ASI_SNFL);
+    }
+
+    bool AsiIsScratchPad(ASI asi)
+    {
+        return
+            (asi == ASI_SCRATCHPAD) ||
+            (asi == ASI_HYP_SCRATCHPAD);
+    }
+
+    bool AsiIsCmt(ASI asi)
+    {
+        return
+            (asi == ASI_CMT_PER_STRAND) ||
+            (asi == ASI_CMT_SHARED);
+    }
+
+    bool AsiIsQueue(ASI asi)
+    {
+        return asi == ASI_QUEUE;
+    }
+
+    bool AsiIsDtlb(ASI asi)
+    {
+        return
+            (asi == ASI_DTLB_DATA_IN_REG) ||
+            (asi == ASI_DTLB_DATA_ACCESS_REG) ||
+            (asi == ASI_DTLB_TAG_READ_REG);
+    }
+
+    bool AsiIsMmu(ASI asi)
+    {
+        return
+            (asi == ASI_MMU_CONTEXTID) ||
+            (asi == ASI_IMMU) ||
+            (asi == ASI_MMU_REAL) ||
+            (asi == ASI_MMU) ||
+            (asi == ASI_DMMU) ||
+            (asi == ASI_UMMU) ||
+            (asi == ASI_DMMU_DEMAP);
+    }
+}
diff --git a/src/arch/sparc/asi.hh b/src/arch/sparc/asi.hh
index 482e077e0..876567225 100644
--- a/src/arch/sparc/asi.hh
+++ b/src/arch/sparc/asi.hh
@@ -156,23 +156,23 @@ namespace SparcISA
         ASI_PST32_SL = 0xCD,
         ASI_PST32_SECONDARY_LITTLE = ASI_PST32_SL,
         //0xCE-0xCF implementation dependent
-        ASI_PL8_P = 0xD0,
-        ASI_PL8_PRIMARY = ASI_PL8_P,
-        ASI_PL8_S = 0xD1,
-        ASI_PL8_SECONDARY = ASI_PL8_S,
-        ASI_PL16_P = 0xD2,
-        ASI_PL16_PRIMARY = ASI_PL16_P,
-        ASI_PL16_S = 0xD3,
-        ASI_PL16_SECONDARY = ASI_PL16_S,
+        ASI_FL8_P = 0xD0,
+        ASI_FL8_PRIMARY = ASI_FL8_P,
+        ASI_FL8_S = 0xD1,
+        ASI_FL8_SECONDARY = ASI_FL8_S,
+        ASI_FL16_P = 0xD2,
+        ASI_FL16_PRIMARY = ASI_FL16_P,
+        ASI_FL16_S = 0xD3,
+        ASI_FL16_SECONDARY = ASI_FL16_S,
         //0xD4-0xD7 implementation dependent
-        ASI_PL8_PL = 0xD8,
-        ASI_PL8_PRIMARY_LITTLE = ASI_PL8_PL,
-        ASI_PL8_SL = 0xD9,
-        ASI_PL8_SECONDARY_LITTLE = ASI_PL8_SL,
-        ASI_PL16_PL = 0xDA,
-        ASI_PL16_PRIMARY_LITTLE = ASI_PL16_PL,
-        ASI_PL16_SL = 0xDB,
-        ASI_PL16_SECONDARY_LITTLE = ASI_PL16_SL,
+        ASI_FL8_PL = 0xD8,
+        ASI_FL8_PRIMARY_LITTLE = ASI_FL8_PL,
+        ASI_FL8_SL = 0xD9,
+        ASI_FL8_SECONDARY_LITTLE = ASI_FL8_SL,
+        ASI_FL16_PL = 0xDA,
+        ASI_FL16_PRIMARY_LITTLE = ASI_FL16_PL,
+        ASI_FL16_SL = 0xDB,
+        ASI_FL16_SECONDARY_LITTLE = ASI_FL16_SL,
         //0xDC-0xDF implementation dependent
         //0xE0-0xE1 reserved
         ASI_LDTX_P = 0xE2,
@@ -193,9 +193,30 @@ namespace SparcISA
         ASI_BLK_PL = 0xF8,
         ASI_BLOCK_PRIMARY_LITTLE = ASI_BLK_PL,
         ASI_BLK_SL = 0xF9,
-        ASI_BLOCK_SECONDARY_LITTLE = ASI_BLK_SL
+        ASI_BLOCK_SECONDARY_LITTLE = ASI_BLK_SL,
         //0xFA-0xFF implementation dependent
+        MAX_ASI = 0xFF
     };
+
+    //Functions that classify an asi
+    bool AsiIsBlock(ASI);
+    bool AsiIsPrimary(ASI);
+    bool AsiIsSecondary(ASI);
+    bool AsiIsNucleus(ASI);
+    bool AsiIsAsIfUser(ASI);
+    bool AsiIsIO(ASI);
+    bool AsiIsReal(ASI);
+    bool AsiIsLittle(ASI);
+    bool AsiIsTwin(ASI);
+    bool AsiIsPartialStore(ASI);
+    bool AsiIsFloatingLoad(ASI);
+    bool AsiIsNoFault(ASI);
+    bool AsiIsScratchPad(ASI);
+    bool AsiIsCmt(ASI);
+    bool AsiIsQueue(ASI);
+    bool AsiIsDtlb(ASI);
+    bool AsiIsMmu(ASI);
+
 };
 
 #endif // __ARCH_SPARC_TLB_HH__
diff --git a/src/arch/sparc/isa/base.isa b/src/arch/sparc/isa/base.isa
index b518265aa..a4c022411 100644
--- a/src/arch/sparc/isa/base.isa
+++ b/src/arch/sparc/isa/base.isa
@@ -77,7 +77,7 @@ output header {{
           protected:
             // Constructor.
             SparcStaticInst(const char *mnem,
-                 MachInst _machInst, OpClass __opClass)
+                 ExtMachInst _machInst, OpClass __opClass)
                     : StaticInst(mnem, _machInst, __opClass)
                 {
                 }
diff --git a/src/arch/sparc/isa/bitfields.isa b/src/arch/sparc/isa/bitfields.isa
index 27f52fa29..372f5c4ef 100644
--- a/src/arch/sparc/isa/bitfields.isa
+++ b/src/arch/sparc/isa/bitfields.isa
@@ -76,3 +76,7 @@ def bitfield SIMM11	<10:0>;
 def bitfield SIMM13	<12:0>;
 def bitfield SW_TRAP	<7:0>;
 def bitfield X		<12>;
+
+// Extended bitfields which aren't part of the actual instruction.
+
+def bitfield EXT_ASI	<39:32>;
diff --git a/src/arch/sparc/isa/decoder.isa b/src/arch/sparc/isa/decoder.isa
index 1384b21a0..45d3616d9 100644
--- a/src/arch/sparc/isa/decoder.isa
+++ b/src/arch/sparc/isa/decoder.isa
@@ -441,7 +441,7 @@ decode OP default Unknown::unknown()
             0x34: decode OPF{
                 format BasicOperate{
                     0x01: fmovs({{
-                        Frd.uw = Frs2.uw;
+                        Frds.uw = Frs2s.uw;
                         //fsr.ftt = fsr.cexc = 0
                         Fsr &= ~(7 << 14);
                         Fsr &= ~(0x1F);
@@ -454,7 +454,7 @@ decode OP default Unknown::unknown()
                     }});
                     0x03: Trap::fmovq({{fault = new FpDisabled;}});
                     0x05: fnegs({{
-                        Frd.uw = Frs2.uw ^ (1UL << 31);
+                        Frds.uw = Frs2s.uw ^ (1UL << 31);
                         //fsr.ftt = fsr.cexc = 0
                         Fsr &= ~(7 << 14);
                         Fsr &= ~(0x1F);
@@ -467,7 +467,7 @@ decode OP default Unknown::unknown()
                     }});
                     0x07: Trap::fnegq({{fault = new FpDisabled;}});
                     0x09: fabss({{
-                        Frd.uw = ((1UL << 31) - 1) & Frs2.uw;
+                        Frds.uw = ((1UL << 31) - 1) & Frs2s.uw;
                         //fsr.ftt = fsr.cexc = 0
                         Fsr &= ~(7 << 14);
                         Fsr &= ~(0x1F);
@@ -479,55 +479,55 @@ decode OP default Unknown::unknown()
                         Fsr &= ~(0x1F);
                     }});
                     0x0B: Trap::fabsq({{fault = new FpDisabled;}});
-                    0x29: fsqrts({{Frd.sf = sqrt(Frs2.sf);}});
+                    0x29: fsqrts({{Frds.sf = sqrt(Frs2s.sf);}});
                     0x2A: fsqrtd({{Frd.df = sqrt(Frs2.df);}});
                     0x2B: Trap::fsqrtq({{fault = new FpDisabled;}});
-                    0x41: fadds({{Frd.sf = Frs1.sf + Frs2.sf;}});
+                    0x41: fadds({{Frds.sf = Frs1s.sf + Frs2s.sf;}});
                     0x42: faddd({{Frd.df = Frs1.df + Frs2.df;}});
                     0x43: Trap::faddq({{fault = new FpDisabled;}});
-                    0x45: fsubs({{Frd.sf = Frs1.sf - Frs2.sf;}});
+                    0x45: fsubs({{Frds.sf = Frs1s.sf - Frs2s.sf;}});
                     0x46: fsubd({{Frd.df = Frs1.df - Frs2.df;}});
                     0x47: Trap::fsubq({{fault = new FpDisabled;}});
-                    0x49: fmuls({{Frd.sf = Frs1.sf * Frs2.sf;}});
+                    0x49: fmuls({{Frds.sf = Frs1s.sf * Frs2s.sf;}});
                     0x4A: fmuld({{Frd.df = Frs1.df * Frs2.df;}});
                     0x4B: Trap::fmulq({{fault = new FpDisabled;}});
-                    0x4D: fdivs({{Frd.sf = Frs1.sf / Frs2.sf;}});
+                    0x4D: fdivs({{Frds.sf = Frs1s.sf / Frs2s.sf;}});
                     0x4E: fdivd({{Frd.df = Frs1.df / Frs2.df;}});
                     0x4F: Trap::fdivq({{fault = new FpDisabled;}});
-                    0x69: fsmuld({{Frd.df = Frs1.sf * Frs2.sf;}});
+                    0x69: fsmuld({{Frd.df = Frs1s.sf * Frs2s.sf;}});
                     0x6E: Trap::fdmulq({{fault = new FpDisabled;}});
                     0x81: fstox({{
-                            Frd.df = (double)static_cast<int64_t>(Frs2.sf);
+                            Frd.df = (double)static_cast<int64_t>(Frs2s.sf);
                     }});
                     0x82: fdtox({{
                             Frd.df = (double)static_cast<int64_t>(Frs2.df);
                     }});
                     0x83: Trap::fqtox({{fault = new FpDisabled;}});
                     0x84: fxtos({{
-                            Frd.sf = static_cast<float>((int64_t)Frs2.df);
+                            Frds.sf = static_cast<float>((int64_t)Frs2.df);
                     }});
                     0x88: fxtod({{
                             Frd.df = static_cast<double>((int64_t)Frs2.df);
                     }});
                     0x8C: Trap::fxtoq({{fault = new FpDisabled;}});
                     0xC4: fitos({{
-                            Frd.sf = static_cast<float>((int32_t)Frs2.sf);
+                            Frds.sf = static_cast<float>((int32_t)Frs2s.sf);
                     }});
-                    0xC6: fdtos({{Frd.sf = Frs2.df;}});
+                    0xC6: fdtos({{Frds.sf = Frs2.df;}});
                     0xC7: Trap::fqtos({{fault = new FpDisabled;}});
                     0xC8: fitod({{
-                            Frd.df = static_cast<double>((int32_t)Frs2.sf);
+                            Frd.df = static_cast<double>((int32_t)Frs2s.sf);
                     }});
-                    0xC9: fstod({{Frd.df = Frs2.sf;}});
+                    0xC9: fstod({{Frd.df = Frs2s.sf;}});
                     0xCB: Trap::fqtod({{fault = new FpDisabled;}});
                     0xCC: Trap::fitoq({{fault = new FpDisabled;}});
                     0xCD: Trap::fstoq({{fault = new FpDisabled;}});
                     0xCE: Trap::fdtoq({{fault = new FpDisabled;}});
                     0xD1: fstoi({{
-                            Frd.sf = (float)static_cast<int32_t>(Frs2.sf);
+                            Frds.sf = (float)static_cast<int32_t>(Frs2s.sf);
                     }});
                     0xD2: fdtoi({{
-                            Frd.sf = (float)static_cast<int32_t>(Frs2.df);
+                            Frds.sf = (float)static_cast<int32_t>(Frs2.df);
                     }});
                     0xD3: Trap::fqtoi({{fault = new FpDisabled;}});
                     default: Trap::fpop1({{fault = new FpDisabled;}});
@@ -620,7 +620,7 @@ decode OP default Unknown::unknown()
                 0x56: Trap::fpsub32({{fault = new IllegalInstruction;}});
                 0x57: Trap::fpsub32s({{fault = new IllegalInstruction;}});
                 0x60: BasicOperate::fzero({{Frd.df = 0;}});
-                0x61: BasicOperate::fzeros({{Frd.sf = 0;}});
+                0x61: BasicOperate::fzeros({{Frds.sf = 0;}});
                 0x62: Trap::fnor({{fault = new IllegalInstruction;}});
                 0x63: Trap::fnors({{fault = new IllegalInstruction;}});
                 0x64: Trap::fandnot2({{fault = new IllegalInstruction;}});
@@ -629,7 +629,7 @@ decode OP default Unknown::unknown()
                         Frd.df = (double)(~((uint64_t)Frs2.df));
                 }});
                 0x67: BasicOperate::fnot2s({{
-                        Frd.sf = (float)(~((uint32_t)Frs2.sf));
+                        Frds.sf = (float)(~((uint32_t)Frs2s.sf));
                 }});
                 0x68: Trap::fandnot1({{fault = new IllegalInstruction;}});
                 0x69: Trap::fandnot1s({{fault = new IllegalInstruction;}});
@@ -637,7 +637,7 @@ decode OP default Unknown::unknown()
                         Frd.df = (double)(~((uint64_t)Frs1.df));
                 }});
                 0x6B: BasicOperate::fnot1s({{
-                        Frd.sf = (float)(~((uint32_t)Frs1.sf));
+                        Frds.sf = (float)(~((uint32_t)Frs1s.sf));
                 }});
                 0x6C: Trap::fxor({{fault = new IllegalInstruction;}});
                 0x6D: Trap::fxors({{fault = new IllegalInstruction;}});
@@ -820,92 +820,248 @@ decode OP default Unknown::unknown()
     }
     0x3: decode OP3 {
         format Load {
-            0x00: lduw({{Rd = Mem;}}, {{32}});
-            0x01: ldub({{Rd = Mem;}}, {{8}});
-            0x02: lduh({{Rd = Mem;}}, {{16}});
+            0x00: lduw({{Rd = Mem.uw;}});
+            0x01: ldub({{Rd = Mem.ub;}});
+            0x02: lduh({{Rd = Mem.uhw;}});
             0x03: ldd({{
-                uint64_t val = Mem;
+                uint64_t val = Mem.udw;
                 RdLow = val<31:0>;
                 RdHigh = val<63:32>;
-            }}, {{64}});
+            }});
         }
         format Store {
-            0x04: stw({{Mem = Rd.sw;}}, {{32}});
-            0x05: stb({{Mem = Rd.sb;}}, {{8}});
-            0x06: sth({{Mem = Rd.shw;}}, {{16}});
-            0x07: std({{Mem = RdLow<31:0> | (RdHigh<31:0> << 32);}}, {{64}});
+            0x04: stw({{Mem.uw = Rd.sw;}});
+            0x05: stb({{Mem.ub = Rd.sb;}});
+            0x06: sth({{Mem.uhw = Rd.shw;}});
+            0x07: std({{Mem.udw = RdLow<31:0> | (RdHigh<31:0> << 32);}});
         }
         format Load {
-            0x08: ldsw({{Rd = (int32_t)Mem;}}, {{32}});
-            0x09: ldsb({{Rd = (int8_t)Mem;}}, {{8}});
-            0x0A: ldsh({{Rd = (int16_t)Mem;}}, {{16}});
-            0x0B: ldx({{Rd = (int64_t)Mem;}}, {{64}});
+            0x08: ldsw({{Rd = (int32_t)Mem.sw;}});
+            0x09: ldsb({{Rd = (int8_t)Mem.sb;}});
+            0x0A: ldsh({{Rd = (int16_t)Mem.shw;}});
+            0x0B: ldx({{Rd = (int64_t)Mem.sdw;}});
             0x0D: ldstub({{
-                Rd = Mem;
-                Mem = 0xFF;
-            }}, {{8}});
+                Rd = Mem.ub;
+                Mem.ub = 0xFF;
+            }});
         }
-        0x0E: Store::stx({{Mem = Rd}}, {{64}});
-        0x0F: LoadStore::swap({{
-            uint32_t temp = Rd;
-            Rd = Mem;
-            Mem = temp;
-        }}, {{32}});
+        0x0E: Store::stx({{Mem.udw = Rd}});
+        0x0F: LoadStore::swap(
+            {{*temp = Rd.uw;
+            Rd.uw = Mem.uw;}},
+            {{Mem.uw = *temp;}});
         format Load {
-            0x10: lduwa({{Rd = Mem;}}, {{32}});
-            0x11: lduba({{Rd = Mem;}}, {{8}});
-            0x12: lduha({{Rd = Mem;}}, {{16}});
+            0x10: lduwa({{Rd = Mem.uw;}});
+            0x11: lduba({{Rd = Mem.ub;}});
+            0x12: lduha({{Rd = Mem.uhw;}});
             0x13: ldda({{
-                uint64_t val = Mem;
+                uint64_t val = Mem.udw;
                 RdLow = val<31:0>;
                 RdHigh = val<63:32>;
-            }}, {{64}});
+            }});
         }
         format Store {
-            0x14: stwa({{Mem = Rd;}}, {{32}});
-            0x15: stba({{Mem = Rd;}}, {{8}});
-            0x16: stha({{Mem = Rd;}}, {{16}});
-            0x17: stda({{Mem = RdLow<31:0> | RdHigh<31:0> << 32;}}, {{64}});
+            0x14: stwa({{Mem.uw = Rd;}});
+            0x15: stba({{Mem.ub = Rd;}});
+            0x16: stha({{Mem.uhw = Rd;}});
+            0x17: stda({{Mem.udw = RdLow<31:0> | RdHigh<31:0> << 32;}});
         }
         format Load {
-            0x18: ldswa({{Rd = (int32_t)Mem;}}, {{32}});
-            0x19: ldsba({{Rd = (int8_t)Mem;}}, {{8}});
-            0x1A: ldsha({{Rd = (int16_t)Mem;}}, {{16}});
-            0x1B: ldxa({{Rd = (int64_t)Mem;}}, {{64}});
+            0x18: ldswa({{Rd = (int32_t)Mem.sw;}});
+            0x19: ldsba({{Rd = (int8_t)Mem.sb;}});
+            0x1A: ldsha({{Rd = (int16_t)Mem.shw;}});
+            0x1B: ldxa({{Rd = (int64_t)Mem.sdw;}});
         }
-        0x1D: LoadStore::ldstuba({{
-            Rd = Mem;
-            Mem = 0xFF;
-        }}, {{8}});
-        0x1E: Store::stxa({{Mem = Rd}}, {{64}});
-        0x1F: LoadStore::swapa({{
-            uint32_t temp = Rd;
-            Rd = Mem;
-            Mem = temp;
-        }}, {{32}});
+        0x1D: LoadStore::ldstuba(
+                {{Rd = Mem.ub;}},
+                {{Mem.ub = 0xFF}});
+        0x1E: Store::stxa({{Mem.udw = Rd}});
+        0x1F: LoadStore::swapa(
+            {{*temp = Rd.uw;
+            Rd.uw = Mem.uw;}},
+            {{Mem.uw = *temp;}});
         format Trap {
-            0x20: Load::ldf({{Frd.uw = Mem;}}, {{32}});
+            0x20: Load::ldf({{Frd.uw = Mem.uw;}});
             0x21: decode X {
-                0x0: Load::ldfsr({{Fsr = Mem<31:0> | Fsr<63:32>;}}, {{32}});
-                0x1: Load::ldxfsr({{Fsr = Mem;}}, {{64}});
+                0x0: Load::ldfsr({{Fsr = Mem.uw | Fsr<63:32>;}});
+                0x1: Load::ldxfsr({{Fsr = Mem.udw;}});
             }
             0x22: ldqf({{fault = new FpDisabled;}});
-            0x23: Load::lddf({{Frd.udw = Mem;}}, {{64}});
-            0x24: Store::stf({{Mem = Frd.uw;}}, {{32}});
+            0x23: Load::lddf({{Frd.udw = Mem.udw;}});
+            0x24: Store::stf({{Mem.uw = Frd.uw;}});
             0x25: decode X {
-                0x0: Store::stfsr({{Mem = Fsr<31:0>;}}, {{32}});
-                0x1: Store::stxfsr({{Mem = Fsr;}}, {{64}});
+                0x0: Store::stfsr({{Mem.uw = Fsr<31:0>;}});
+                0x1: Store::stxfsr({{Mem.udw = Fsr;}});
             }
             0x26: stqf({{fault = new FpDisabled;}});
-            0x27: Store::stdf({{Mem = Frd.udw;}}, {{64}});
+            0x27: Store::stdf({{Mem.udw = Frd.udw;}});
             0x2D: Nop::prefetch({{ }});
-            0x30: Load::ldfa({{Frd.uw = Mem;}}, {{32}});
+            0x30: Load::ldfa({{Frd.uw = Mem.uw;}});
             0x32: ldqfa({{fault = new FpDisabled;}});
-            0x33: Load::lddfa({{Frd.udw = Mem;}}, {{64}});
-            0x34: Store::stfa({{Mem = Frd.uw;}}, {{32}});
+            format LoadAlt {
+                0x33: decode EXT_ASI {
+                    //ASI_NUCLEUS
+                    0x04: FailUnimpl::lddfa_n();
+                    //ASI_NUCLEUS_LITTLE
+                    0x0C: FailUnimpl::lddfa_nl();
+                    //ASI_AS_IF_USER_PRIMARY
+                    0x10: FailUnimpl::lddfa_aiup();
+                    //ASI_AS_IF_USER_PRIMARY_LITTLE
+                    0x18: FailUnimpl::lddfa_aiupl();
+                    //ASI_AS_IF_USER_SECONDARY
+                    0x11: FailUnimpl::lddfa_aius();
+                    //ASI_AS_IF_USER_SECONDARY_LITTLE
+                    0x19: FailUnimpl::lddfa_aiusl();
+                    //ASI_REAL
+                    0x14: FailUnimpl::lddfa_real();
+                    //ASI_REAL_LITTLE
+                    0x1C: FailUnimpl::lddfa_real_l();
+                    //ASI_REAL_IO
+                    0x15: FailUnimpl::lddfa_real_io();
+                    //ASI_REAL_IO_LITTLE
+                    0x1D: FailUnimpl::lddfa_real_io_l();
+                    //ASI_PRIMARY
+                    0x80: FailUnimpl::lddfa_p();
+                    //ASI_PRIMARY_LITTLE
+                    0x88: FailUnimpl::lddfa_pl();
+                    //ASI_SECONDARY
+                    0x81: FailUnimpl::lddfa_s();
+                    //ASI_SECONDARY_LITTLE
+                    0x89: FailUnimpl::lddfa_sl();
+                    //ASI_PRIMARY_NO_FAULT
+                    0x82: FailUnimpl::lddfa_pnf();
+                    //ASI_PRIMARY_NO_FAULT_LITTLE
+                    0x8A: FailUnimpl::lddfa_pnfl();
+                    //ASI_SECONDARY_NO_FAULT
+                    0x83: FailUnimpl::lddfa_snf();
+                    //ASI_SECONDARY_NO_FAULT_LITTLE
+                    0x8B: FailUnimpl::lddfa_snfl();
+
+                    format BlockLoad {
+                        // LDBLOCKF
+                        //ASI_BLOCK_AS_IF_USER_PRIMARY
+                        0x16: FailUnimpl::ldblockf_aiup();
+                        //ASI_BLOCK_AS_IF_USER_SECONDARY
+                        0x17: FailUnimpl::ldblockf_aius();
+                        //ASI_BLOCK_AS_IF_USER_PRIMARY_LITTLE
+                        0x1E: FailUnimpl::ldblockf_aiupl();
+                        //ASI_BLOCK_AS_IF_USER_SECONDARY_LITTLE
+                        0x1F: FailUnimpl::ldblockf_aiusl();
+                        //ASI_BLOCK_PRIMARY
+                        0xF0: ldblockf_p({{Frd_N.udw = Mem.udw;}});
+                        //ASI_BLOCK_SECONDARY
+                        0xF1: FailUnimpl::ldblockf_s();
+                        //ASI_BLOCK_PRIMARY_LITTLE
+                        0xF8: FailUnimpl::ldblockf_pl();
+                        //ASI_BLOCK_SECONDARY_LITTLE
+                        0xF9: FailUnimpl::ldblockf_sl();
+                    }
+
+                    //LDSHORTF
+                    //ASI_FL8_PRIMARY
+                    0xD0: FailUnimpl::ldshortf_8p();
+                    //ASI_FL8_SECONDARY
+                    0xD1: FailUnimpl::ldshortf_8s();
+                    //ASI_FL8_PRIMARY_LITTLE
+                    0xD8: FailUnimpl::ldshortf_8pl();
+                    //ASI_FL8_SECONDARY_LITTLE
+                    0xD9: FailUnimpl::ldshortf_8sl();
+                    //ASI_FL16_PRIMARY
+                    0xD2: FailUnimpl::ldshortf_16p();
+                    //ASI_FL16_SECONDARY
+                    0xD3: FailUnimpl::ldshortf_16s();
+                    //ASI_FL16_PRIMARY_LITTLE
+                    0xDA: FailUnimpl::ldshortf_16pl();
+                    //ASI_FL16_SECONDARY_LITTLE
+                    0xDB: FailUnimpl::ldshortf_16sl();
+                    //Not an ASI which is legal with lddfa
+                    default: Trap::lddfa_bad_asi(
+                        {{fault = new DataAccessException;}});
+                }
+            }
+            0x34: Store::stfa({{Mem.uw = Frd.uw;}});
             0x36: stqfa({{fault = new FpDisabled;}});
-            //XXX need to work in the ASI thing
-            0x37: Store::stdfa({{Mem = Frd.udw;}}, {{64}});
+            format StoreAlt {
+                0x37: decode EXT_ASI {
+                    //ASI_NUCLEUS
+                    0x04: FailUnimpl::stdfa_n();
+                    //ASI_NUCLEUS_LITTLE
+                    0x0C: FailUnimpl::stdfa_nl();
+                    //ASI_AS_IF_USER_PRIMARY
+                    0x10: FailUnimpl::stdfa_aiup();
+                    //ASI_AS_IF_USER_PRIMARY_LITTLE
+                    0x18: FailUnimpl::stdfa_aiupl();
+                    //ASI_AS_IF_USER_SECONDARY
+                    0x11: FailUnimpl::stdfa_aius();
+                    //ASI_AS_IF_USER_SECONDARY_LITTLE
+                    0x19: FailUnimpl::stdfa_aiusl();
+                    //ASI_REAL
+                    0x14: FailUnimpl::stdfa_real();
+                    //ASI_REAL_LITTLE
+                    0x1C: FailUnimpl::stdfa_real_l();
+                    //ASI_REAL_IO
+                    0x15: FailUnimpl::stdfa_real_io();
+                    //ASI_REAL_IO_LITTLE
+                    0x1D: FailUnimpl::stdfa_real_io_l();
+                    //ASI_PRIMARY
+                    0x80: FailUnimpl::stdfa_p();
+                    //ASI_PRIMARY_LITTLE
+                    0x88: FailUnimpl::stdfa_pl();
+                    //ASI_SECONDARY
+                    0x81: FailUnimpl::stdfa_s();
+                    //ASI_SECONDARY_LITTLE
+                    0x89: FailUnimpl::stdfa_sl();
+                    //ASI_PRIMARY_NO_FAULT
+                    0x82: FailUnimpl::stdfa_pnf();
+                    //ASI_PRIMARY_NO_FAULT_LITTLE
+                    0x8A: FailUnimpl::stdfa_pnfl();
+                    //ASI_SECONDARY_NO_FAULT
+                    0x83: FailUnimpl::stdfa_snf();
+                    //ASI_SECONDARY_NO_FAULT_LITTLE
+                    0x8B: FailUnimpl::stdfa_snfl();
+
+                    format BlockStore {
+                        // STBLOCKF
+                        //ASI_BLOCK_AS_IF_USER_PRIMARY
+                        0x16: FailUnimpl::stblockf_aiup();
+                        //ASI_BLOCK_AS_IF_USER_SECONDARY
+                        0x17: FailUnimpl::stblockf_aius();
+                        //ASI_BLOCK_AS_IF_USER_PRIMARY_LITTLE
+                        0x1E: FailUnimpl::stblockf_aiupl();
+                        //ASI_BLOCK_AS_IF_USER_SECONDARY_LITTLE
+                        0x1F: FailUnimpl::stblockf_aiusl();
+                        //ASI_BLOCK_PRIMARY
+                        0xF0: stblockf_p({{Mem.udw = Frd_N.udw;}});
+                        //ASI_BLOCK_SECONDARY
+                        0xF1: FailUnimpl::stblockf_s();
+                        //ASI_BLOCK_PRIMARY_LITTLE
+                        0xF8: FailUnimpl::stblockf_pl();
+                        //ASI_BLOCK_SECONDARY_LITTLE
+                        0xF9: FailUnimpl::stblockf_sl();
+                    }
+
+                    //STSHORTF
+                    //ASI_FL8_PRIMARY
+                    0xD0: FailUnimpl::stshortf_8p();
+                    //ASI_FL8_SECONDARY
+                    0xD1: FailUnimpl::stshortf_8s();
+                    //ASI_FL8_PRIMARY_LITTLE
+                    0xD8: FailUnimpl::stshortf_8pl();
+                    //ASI_FL8_SECONDARY_LITTLE
+                    0xD9: FailUnimpl::stshortf_8sl();
+                    //ASI_FL16_PRIMARY
+                    0xD2: FailUnimpl::stshortf_16p();
+                    //ASI_FL16_SECONDARY
+                    0xD3: FailUnimpl::stshortf_16s();
+                    //ASI_FL16_PRIMARY_LITTLE
+                    0xDA: FailUnimpl::stshortf_16pl();
+                    //ASI_FL16_SECONDARY_LITTLE
+                    0xDB: FailUnimpl::stshortf_16sl();
+                    //Not an ASI which is legal with lddfa
+                    default: Trap::stdfa_bad_asi(
+                        {{fault = new DataAccessException;}});
+                }
+            }
             0x3C: Cas::casa({{
                 uint64_t val = Mem.uw;
                 if(Rs2.uw == val)
diff --git a/src/arch/sparc/isa/formats.isa b/src/arch/sparc/isa/formats.isa
deleted file mode 100644
index 17d68061b..000000000
--- a/src/arch/sparc/isa/formats.isa
+++ /dev/null
@@ -1,28 +0,0 @@
-//Include the basic format
-//Templates from this format are used later
-##include "formats/basic.isa"
-
-//Include the noop format
-##include "formats/nop.isa"
-
-//Include the integerOp and integerOpCc format
-##include "formats/integerop.isa"
-
-//Include the memory format
-##include "formats/mem.isa"
-
-//Include the compare and swap format
-##include "formats/cas.isa"
-
-//Include the trap format
-##include "formats/trap.isa"
-
-//Include the "unknown" format
-##include "formats/unknown.isa"
-
-//Include the priveleged mode format
-##include "formats/priv.isa"
-
-//Include the branch format
-##include "formats/branch.isa"
-
diff --git a/src/arch/sparc/isa/formats/basic.isa b/src/arch/sparc/isa/formats/basic.isa
index 0a47a7ffe..a4c05387b 100644
--- a/src/arch/sparc/isa/formats/basic.isa
+++ b/src/arch/sparc/isa/formats/basic.isa
@@ -33,6 +33,14 @@ def template BasicExecDeclare {{
         Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const;
 }};
 
+// Definitions of execute methods that panic.
+def template BasicExecPanic {{
+        Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const
+        {
+            panic("Execute method called when it shouldn't!");
+        }
+}};
+
 // Basic instruction class declaration template.
 def template BasicDeclare {{
         /**
@@ -42,14 +50,14 @@ def template BasicDeclare {{
         {
           public:
             // Constructor.
-            %(class_name)s(MachInst machInst);
+            %(class_name)s(ExtMachInst machInst);
             %(BasicExecDeclare)s
         };
 }};
 
 // Basic instruction class constructor template.
 def template BasicConstructor {{
-        inline %(class_name)s::%(class_name)s(MachInst machInst)
+        inline %(class_name)s::%(class_name)s(ExtMachInst machInst)
             : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
         {
                 %(constructor)s;
@@ -80,6 +88,11 @@ def template BasicDecode {{
         return new %(class_name)s(machInst);
 }};
 
+// Basic decode template, passing mnemonic in as string arg to constructor.
+def template BasicDecodeWithMnemonic {{
+    return new %(class_name)s("%(mnemonic)s", machInst);
+}};
+
 // The most basic instruction format... used only for a few misc. insts
 def format BasicOperate(code, *flags) {{
         iop = InstObjParams(name, Name, 'SparcStaticInst',
diff --git a/src/arch/sparc/isa/formats/branch.isa b/src/arch/sparc/isa/formats/branch.isa
index 2c206354b..5fb7ade2d 100644
--- a/src/arch/sparc/isa/formats/branch.isa
+++ b/src/arch/sparc/isa/formats/branch.isa
@@ -80,7 +80,7 @@ output header {{
                     OpClass __opClass) :
                 BranchDisp(mnem, _machInst, __opClass)
             {
-                disp = sign_ext(_machInst << 2, bits + 2);
+                disp = sext<bits + 2>((_machInst & mask(bits)) << 2);
             }
         };
 
@@ -95,7 +95,7 @@ output header {{
                     OpClass __opClass) :
                 BranchDisp(mnem, _machInst, __opClass)
             {
-                disp = sign_ext((D16HI << 16) | (D16LO << 2), 18);
+                disp = sext<18>((D16HI << 16) | (D16LO << 2));
             }
         };
 
@@ -108,7 +108,7 @@ output header {{
           protected:
             // Constructor
             BranchImm13(const char *mnem, MachInst _machInst, OpClass __opClass) :
-                Branch(mnem, _machInst, __opClass), imm(sign_ext(SIMM13, 13))
+                Branch(mnem, _machInst, __opClass), imm(sext<13>(SIMM13))
             {
             }
 
diff --git a/src/arch/sparc/isa/formats/formats.isa b/src/arch/sparc/isa/formats/formats.isa
new file mode 100644
index 000000000..5b81a1ab1
--- /dev/null
+++ b/src/arch/sparc/isa/formats/formats.isa
@@ -0,0 +1,62 @@
+// Copyright (c) 2006 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+//Include the basic format
+//Templates from this format are used later
+##include "basic.isa"
+
+//Include base classes for microcoding instructions
+##include "micro.isa"
+
+//Include the noop format
+##include "nop.isa"
+
+//Include the integerOp and integerOpCc format
+##include "integerop.isa"
+
+//Include the memory formats
+##include "mem/mem.isa"
+
+//Include the compare and swap format
+##include "cas.isa"
+
+//Include the trap format
+##include "trap.isa"
+
+//Include the unimplemented format
+##include "unimp.isa"
+
+//Include the "unknown" format
+##include "unknown.isa"
+
+//Include the priveleged mode format
+##include "priv.isa"
+
+//Include the branch format
+##include "branch.isa"
+
diff --git a/src/arch/sparc/isa/formats/integerop.isa b/src/arch/sparc/isa/formats/integerop.isa
index 83c7e6958..4f8ebebcc 100644
--- a/src/arch/sparc/isa/formats/integerop.isa
+++ b/src/arch/sparc/isa/formats/integerop.isa
@@ -87,7 +87,7 @@ output header {{
                     OpClass __opClass) :
                 IntOpImm(mnem, _machInst, __opClass)
             {
-                imm = sign_ext(SIMM10, 10);
+                imm = sext<10>(SIMM10);
             }
         };
 
@@ -102,7 +102,7 @@ output header {{
                     OpClass __opClass) :
                 IntOpImm(mnem, _machInst, __opClass)
             {
-                imm = sign_ext(SIMM11, 11);
+                imm = sext<11>(SIMM11);
             }
         };
 
@@ -117,7 +117,7 @@ output header {{
                     OpClass __opClass) :
                 IntOpImm(mnem, _machInst, __opClass)
             {
-                imm = sign_ext(SIMM13, 13);
+                imm = sext<13>(SIMM13);
             }
         };
 
@@ -264,13 +264,13 @@ let {{
         (usesImm, code, immCode,
          rString, iString) = splitOutImm(code)
         iop = InstObjParams(name, Name,	'IntOp', code,
-                opt_flags, ("cc_code", ccCode))
+                opt_flags, {"cc_code": ccCode})
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         exec_output = IntOpExecute.subst(iop)
         if usesImm:
             imm_iop = InstObjParams(name, Name + 'Imm', 'IntOpImm' + iString,
-                    immCode, opt_flags, ("cc_code", ccCode))
+                    immCode, opt_flags, {"cc_code": ccCode})
             header_output += BasicDeclare.subst(imm_iop)
             decoder_output += BasicConstructor.subst(imm_iop)
             exec_output += IntOpExecute.subst(imm_iop)
@@ -341,7 +341,7 @@ def format IntOpCcRes(code, *opt_flags) {{
 
 def format SetHi(code, *opt_flags) {{
     iop = InstObjParams(name, Name, 'SetHi',
-            code, opt_flags, ("cc_code", ''))
+            code, opt_flags, {"cc_code": ''})
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     exec_output = IntOpExecute.subst(iop)
diff --git a/src/arch/sparc/isa/formats/mem.isa b/src/arch/sparc/isa/formats/mem/basicmem.isa
index 9011c1fc6..c13194d0f 100644
--- a/src/arch/sparc/isa/formats/mem.isa
+++ b/src/arch/sparc/isa/formats/mem/basicmem.isa
@@ -1,3 +1,32 @@
+// Copyright (c) 2006 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Ali Saidi
+//          Gabe Black
+
 ////////////////////////////////////////////////////////////////////
 //
 // Mem instructions
@@ -30,15 +59,13 @@ output header {{
 
             // Constructor
             MemImm(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
-                Mem(mnem, _machInst, __opClass)
-            {
-                imm = sign_ext(SIMM13, 13);
-            }
+                Mem(mnem, _machInst, __opClass), imm(sext<13>(SIMM13))
+            {}
 
             std::string generateDisassembly(Addr pc,
                     const SymbolTable *symtab) const;
 
-            int32_t imm;
+            const int32_t imm;
         };
 }};
 
@@ -99,73 +126,69 @@ output decoder {{
         }
 }};
 
-def template MemExecute {{
-        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
-                Trace::InstRecord *traceData) const
+def template MemDeclare {{
+        /**
+         * Static instruction class for "%(mnemonic)s".
+         */
+        class %(class_name)s : public %(base_class)s
         {
-            Fault fault = NoFault;
-            Addr EA;
-            %(op_decl)s;
-            %(op_rd)s;
-            %(ea_code)s;
-            DPRINTF(Sparc, "The address is 0x%x\n", EA);
-            %(load)s;
-            %(code)s;
-
-            if(fault == NoFault)
-            {
-                %(store)s;
-                //Write the resulting state to the execution context
-                %(op_wb)s;
-            }
+          public:
 
-            return fault;
-        }
+            /// Constructor.
+            %(class_name)s(ExtMachInst machInst);
+
+            %(BasicExecDeclare)s
+
+            %(InitiateAccDeclare)s
+
+            %(CompleteAccDeclare)s
+        };
 }};
 
 let {{
-    # Leave memAccessFlags at 0 for now
-    loadString = "xc->read(EA, (uint%(width)s_t&)Mem, 0);"
-    storeString = "uint64_t write_result = 0; \
-    xc->write((uint%(width)s_t)Mem, EA, 0, &write_result);"
-
-    def doMemFormat(code, load, store, name, Name, opt_flags):
+    def doMemFormat(code, execute, faultCode, name, Name, opt_flags):
         addrCalcReg = 'EA = Rs1 + Rs2;'
         addrCalcImm = 'EA = Rs1 + imm;'
         iop = InstObjParams(name, Name, 'Mem', code,
-                opt_flags, ("ea_code", addrCalcReg),
-                ("load", load), ("store", store))
-        iop_imm = InstObjParams(name, Name + 'Imm', 'MemImm', code,
-                opt_flags, ("ea_code", addrCalcImm),
-                ("load", load), ("store", store))
-        header_output = BasicDeclare.subst(iop) + BasicDeclare.subst(iop_imm)
+                opt_flags, {"fault_check": faultCode, "ea_code": addrCalcReg})
+        iop_imm = InstObjParams(name, Name + "Imm", 'MemImm', code,
+                opt_flags, {"fault_check": faultCode, "ea_code": addrCalcImm})
+        header_output = MemDeclare.subst(iop) + MemDeclare.subst(iop_imm)
         decoder_output = BasicConstructor.subst(iop) + BasicConstructor.subst(iop_imm)
         decode_block = ROrImmDecode.subst(iop)
-        exec_output = MemExecute.subst(iop) + MemExecute.subst(iop_imm)
+        exec_output = doSplitExecute(code, addrCalcReg, addrCalcImm, execute,
+                faultCode, name, name + "Imm", Name, Name + "Imm", opt_flags)
         return (header_output, decoder_output, exec_output, decode_block)
 }};
 
-def format Load(code, width, *opt_flags) {{
+def format LoadAlt(code, *opt_flags) {{
         (header_output,
          decoder_output,
          exec_output,
-         decode_block) = doMemFormat(code,
-             loadString % {"width":width}, '', name, Name, opt_flags)
+         decode_block) = doMemFormat(code, LoadExecute,
+            AlternateAsiPrivFaultCheck, name, Name, opt_flags)
 }};
 
-def format Store(code, width, *opt_flags) {{
+def format StoreAlt(code, *opt_flags) {{
         (header_output,
          decoder_output,
          exec_output,
-         decode_block) = doMemFormat(code, '',
-             storeString % {"width":width}, name, Name, opt_flags)
+         decode_block) = doMemFormat(code, StoreExecute,
+            AlternateAsiPrivFaultCheck, name, Name, opt_flags)
+}};
+
+def format Load(code, *opt_flags) {{
+        (header_output,
+         decoder_output,
+         exec_output,
+         decode_block) = doMemFormat(code,
+             LoadExecute, '', name, Name, opt_flags)
 }};
 
-def format LoadStore(code, width, *opt_flags) {{
+def format Store(code, *opt_flags) {{
         (header_output,
          decoder_output,
          exec_output,
          decode_block) = doMemFormat(code,
-             loadString % {"width":width}, storeString % {"width":width},
-             name, Name, opt_flags)
+             StoreExecute, '', name, Name, opt_flags)
 }};
diff --git a/src/arch/sparc/isa/formats/mem/blockmem.isa b/src/arch/sparc/isa/formats/mem/blockmem.isa
new file mode 100644
index 000000000..93ad1b2b8
--- /dev/null
+++ b/src/arch/sparc/isa/formats/mem/blockmem.isa
@@ -0,0 +1,337 @@
+// Copyright (c) 2006 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Ali Saidi
+//          Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Block Memory instructions
+//
+
+output header {{
+
+        class BlockMem : public SparcMacroInst
+        {
+          protected:
+
+            // Constructor
+            // We make the assumption that all block memory operations
+            // Will take 8 instructions to execute
+            BlockMem(const char *mnem, ExtMachInst _machInst) :
+                SparcMacroInst(mnem, _machInst, No_OpClass, 8)
+            {}
+        };
+
+        class BlockMemImm : public BlockMem
+        {
+          protected:
+
+            // Constructor
+            BlockMemImm(const char *mnem, ExtMachInst _machInst) :
+                BlockMem(mnem, _machInst)
+            {}
+        };
+
+        class BlockMemMicro : public SparcDelayedMicroInst
+        {
+          protected:
+
+            // Constructor
+            BlockMemMicro(const char *mnem, ExtMachInst _machInst,
+                    OpClass __opClass, int8_t _offset) :
+                SparcDelayedMicroInst(mnem, _machInst, __opClass),
+                offset(_offset)
+            {}
+
+            std::string generateDisassembly(Addr pc,
+                const SymbolTable *symtab) const;
+
+            const int8_t offset;
+        };
+
+        class BlockMemImmMicro : public BlockMemMicro
+        {
+          protected:
+
+            // Constructor
+            BlockMemImmMicro(const char *mnem, ExtMachInst _machInst,
+                    OpClass __opClass, int8_t _offset) :
+                BlockMemMicro(mnem, _machInst, __opClass, _offset),
+                imm(sext<13>(SIMM13))
+            {}
+
+            std::string generateDisassembly(Addr pc,
+                const SymbolTable *symtab) const;
+
+            const int32_t imm;
+        };
+}};
+
+output decoder {{
+        std::string BlockMemMicro::generateDisassembly(Addr pc,
+                const SymbolTable *symtab) const
+        {
+            std::stringstream response;
+            bool load = flags[IsLoad];
+            bool save = flags[IsStore];
+
+            printMnemonic(response, mnemonic);
+            if(save)
+            {
+                printReg(response, _srcRegIdx[0]);
+                ccprintf(response, ", ");
+            }
+            ccprintf(response, "[ ");
+            printReg(response, _srcRegIdx[!save ? 0 : 1]);
+            ccprintf(response, " + ");
+            printReg(response, _srcRegIdx[!save ? 1 : 2]);
+            ccprintf(response, " ]");
+            if(load)
+            {
+                ccprintf(response, ", ");
+                printReg(response, _destRegIdx[0]);
+            }
+
+            return response.str();
+        }
+
+        std::string BlockMemImmMicro::generateDisassembly(Addr pc,
+                const SymbolTable *symtab) const
+        {
+            std::stringstream response;
+            bool load = flags[IsLoad];
+            bool save = flags[IsStore];
+
+            printMnemonic(response, mnemonic);
+            if(save)
+            {
+                printReg(response, _srcRegIdx[1]);
+                ccprintf(response, ", ");
+            }
+            ccprintf(response, "[ ");
+            printReg(response, _srcRegIdx[0]);
+            if(imm >= 0)
+                ccprintf(response, " + 0x%x ]", imm);
+            else
+                ccprintf(response, " + -0x%x ]", -imm);
+            if(load)
+            {
+                ccprintf(response, ", ");
+                printReg(response, _destRegIdx[0]);
+            }
+
+            return response.str();
+        }
+
+}};
+
+def template BlockMemDeclare {{
+        /**
+         * Static instruction class for a block memory operation
+         */
+        class %(class_name)s : public %(base_class)s
+        {
+          public:
+            //Constructor
+            %(class_name)s(ExtMachInst machInst);
+
+          protected:
+            class %(class_name)s_0 : public %(base_class)sMicro
+            {
+              public:
+                //Constructor
+                %(class_name)s_0(ExtMachInst machInst);
+                %(BasicExecDeclare)s
+                %(InitiateAccDeclare)s
+                %(CompleteAccDeclare)s
+            };
+
+            class %(class_name)s_1 : public %(base_class)sMicro
+            {
+              public:
+                //Constructor
+                %(class_name)s_1(ExtMachInst machInst);
+                %(BasicExecDeclare)s
+                %(InitiateAccDeclare)s
+                %(CompleteAccDeclare)s
+            };
+
+            class %(class_name)s_2 : public %(base_class)sMicro
+            {
+              public:
+                //Constructor
+                %(class_name)s_2(ExtMachInst machInst);
+                %(BasicExecDeclare)s
+                %(InitiateAccDeclare)s
+                %(CompleteAccDeclare)s
+            };
+
+            class %(class_name)s_3 : public %(base_class)sMicro
+            {
+              public:
+                //Constructor
+                %(class_name)s_3(ExtMachInst machInst);
+                %(BasicExecDeclare)s
+                %(InitiateAccDeclare)s
+                %(CompleteAccDeclare)s
+            };
+
+            class %(class_name)s_4 : public %(base_class)sMicro
+            {
+              public:
+                //Constructor
+                %(class_name)s_4(ExtMachInst machInst);
+                %(BasicExecDeclare)s
+                %(InitiateAccDeclare)s
+                %(CompleteAccDeclare)s
+            };
+
+            class %(class_name)s_5 : public %(base_class)sMicro
+            {
+              public:
+                //Constructor
+                %(class_name)s_5(ExtMachInst machInst);
+                %(BasicExecDeclare)s
+                %(InitiateAccDeclare)s
+                %(CompleteAccDeclare)s
+            };
+
+            class %(class_name)s_6 : public %(base_class)sMicro
+            {
+              public:
+                //Constructor
+                %(class_name)s_6(ExtMachInst machInst);
+                %(BasicExecDeclare)s
+                %(InitiateAccDeclare)s
+                %(CompleteAccDeclare)s
+            };
+
+            class %(class_name)s_7 : public %(base_class)sMicro
+            {
+              public:
+                //Constructor
+                %(class_name)s_7(ExtMachInst machInst);
+                %(BasicExecDeclare)s
+                %(InitiateAccDeclare)s
+                %(CompleteAccDeclare)s
+            };
+        };
+}};
+
+// Basic instruction class constructor template.
+def template BlockMemConstructor {{
+        inline %(class_name)s::%(class_name)s(ExtMachInst machInst)
+            : %(base_class)s("%(mnemonic)s", machInst)
+        {
+            %(constructor)s;
+            microOps[0] = new %(class_name)s_0(machInst);
+            microOps[1] = new %(class_name)s_1(machInst);
+            microOps[2] = new %(class_name)s_2(machInst);
+            microOps[3] = new %(class_name)s_3(machInst);
+            microOps[4] = new %(class_name)s_4(machInst);
+            microOps[5] = new %(class_name)s_5(machInst);
+            microOps[6] = new %(class_name)s_6(machInst);
+            microOps[7] = new %(class_name)s_7(machInst);
+        }
+}};
+
+def template BlockMemMicroConstructor {{
+        inline %(class_name)s::
+            %(class_name)s_%(micro_pc)s::
+            %(class_name)s_%(micro_pc)s(ExtMachInst machInst) :
+                %(base_class)sMicro("%(mnemonic)s[%(micro_pc)s]",
+                        machInst, %(op_class)s, %(micro_pc)s * 8)
+    {
+        %(constructor)s;
+        %(set_flags)s;
+    }
+}};
+
+let {{
+
+    def doBlockMemFormat(code, faultCode, execute, name, Name, opt_flags):
+        # XXX Need to take care of pstate.hpriv as well. The lower ASIs
+        # are split into ones that are available in priv and hpriv, and
+        # those that are only available in hpriv
+        addrCalcReg = 'EA = Rs1 + Rs2 + offset;'
+        addrCalcImm = 'EA = Rs1 + imm + offset;'
+        iop = InstObjParams(name, Name, 'BlockMem', code, opt_flags)
+        iop_imm = InstObjParams(name, Name + 'Imm', 'BlockMemImm', code, opt_flags)
+        header_output = BlockMemDeclare.subst(iop) + BlockMemDeclare.subst(iop_imm)
+        decoder_output = BlockMemConstructor.subst(iop) + BlockMemConstructor.subst(iop_imm)
+        decode_block = ROrImmDecode.subst(iop)
+        matcher = re.compile(r'Frd_N')
+        exec_output = ''
+        for microPc in range(8):
+            flag_code = ''
+            if (microPc == 7):
+                flag_code = "flags[IsLastMicroOp] = true;"
+            pcedCode = matcher.sub("Frd_%d" % microPc, code)
+            iop = InstObjParams(name, Name, 'BlockMem', pcedCode,
+                    opt_flags, {"ea_code": addrCalcReg,
+                    "fault_check": faultCode, "micro_pc": microPc,
+                    "set_flags": flag_code})
+            iop_imm = InstObjParams(name, Name + 'Imm', 'BlockMemImm', pcedCode,
+                    opt_flags, {"ea_code": addrCalcImm,
+                    "fault_check": faultCode, "micro_pc": microPc,
+                    "set_flags": flag_code})
+            decoder_output += BlockMemMicroConstructor.subst(iop)
+            decoder_output += BlockMemMicroConstructor.subst(iop_imm)
+            exec_output += doSplitExecute(
+                    pcedCode, addrCalcReg, addrCalcImm, execute, faultCode,
+                    makeMicroName(name, microPc),
+                    makeMicroName(name + "Imm", microPc),
+                    makeMicroName(Name, microPc),
+                    makeMicroName(Name + "Imm", microPc),
+                    opt_flags);
+            faultCode = ''
+        return (header_output, decoder_output, exec_output, decode_block)
+}};
+
+def format BlockLoad(code, *opt_flags) {{
+        # We need to make sure to check the highest priority fault last.
+        # That way, if other faults have been detected, they'll be overwritten
+        # rather than the other way around.
+        faultCode = AlternateASIPrivFaultCheck + BlockAlignmentFaultCheck
+        (header_output,
+         decoder_output,
+         exec_output,
+         decode_block) = doBlockMemFormat(code, faultCode,
+             LoadExecute, name, Name, opt_flags)
+}};
+
+def format BlockStore(code, *opt_flags) {{
+        # We need to make sure to check the highest priority fault last.
+        # That way, if other faults have been detected, they'll be overwritten
+        # rather than the other way around.
+        faultCode = AlternateASIPrivFaultCheck + BlockAlignmentFaultCheck
+        (header_output,
+         decoder_output,
+         exec_output,
+         decode_block) = doBlockMemFormat(code, faultCode,
+             StoreExecute, name, Name, opt_flags)
+}};
diff --git a/src/arch/sparc/isa/formats/mem/mem.isa b/src/arch/sparc/isa/formats/mem/mem.isa
new file mode 100644
index 000000000..20a22c45d
--- /dev/null
+++ b/src/arch/sparc/isa/formats/mem/mem.isa
@@ -0,0 +1,45 @@
+// Copyright (c) 2006 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Ali Saidi
+//          Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Mem formats
+//
+
+//Include mem utility templates and functions
+##include "util.isa"
+
+//Include the basic memory format
+##include "basicmem.isa"
+
+//Include the block memory format
+##include "blockmem.isa"
+
+//Include the load/store memory format
+##include "loadstore.isa"
diff --git a/src/arch/sparc/isa/formats/mem/util.isa b/src/arch/sparc/isa/formats/mem/util.isa
new file mode 100644
index 000000000..241a25d17
--- /dev/null
+++ b/src/arch/sparc/isa/formats/mem/util.isa
@@ -0,0 +1,226 @@
+// Copyright (c) 2006 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Ali Saidi
+//          Gabe Black
+//          Steve Reinhardt
+
+////////////////////////////////////////////////////////////////////
+//
+// Mem utility templates and functions
+//
+
+//This template provides the execute functions for a load
+def template LoadExecute {{
+        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+                Trace::InstRecord *traceData) const
+        {
+            Fault fault = NoFault;
+            Addr EA;
+            %(op_decl)s;
+            %(op_rd)s;
+            %(ea_code)s;
+            DPRINTF(Sparc, "The address is 0x%x\n", EA);
+            %(fault_check)s;
+            if(fault == NoFault)
+            {
+                fault = xc->read(EA, (uint%(mem_acc_size)s_t&)Mem, 0);
+            }
+            if(fault == NoFault)
+            {
+                %(code)s;
+            }
+            if(fault == NoFault)
+            {
+                //Write the resulting state to the execution context
+                %(op_wb)s;
+            }
+
+            return fault;
+        }
+
+        Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s * xc,
+                Trace::InstRecord * traceData) const
+        {
+            Fault fault = NoFault;
+            Addr EA;
+            uint%(mem_acc_size)s_t Mem;
+            %(ea_decl)s;
+            %(ea_rd)s;
+            %(ea_code)s;
+            %(fault_check)s;
+            if(fault == NoFault)
+            {
+                fault = xc->read(EA, (uint%(mem_acc_size)s_t&)Mem, 0);
+            }
+            return fault;
+        }
+
+        Fault %(class_name)s::completeAcc(PacketPtr pkt, %(CPU_exec_context)s * xc,
+                Trace::InstRecord * traceData) const
+        {
+            Fault fault = NoFault;
+            %(code_decl)s;
+            %(code_rd)s;
+            Mem = pkt->get<typeof(Mem)>();
+            %(code)s;
+            if(fault == NoFault)
+            {
+                %(code_wb)s;
+            }
+            return fault;
+        }
+}};
+
+//This template provides the execute functions for a store
+def template StoreExecute {{
+        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+                Trace::InstRecord *traceData) const
+        {
+            Fault fault = NoFault;
+            uint64_t write_result = 0;
+            Addr EA;
+            %(op_decl)s;
+            %(op_rd)s;
+            %(ea_code)s;
+            DPRINTF(Sparc, "The address is 0x%x\n", EA);
+            %(fault_check)s;
+            if(fault == NoFault)
+            {
+                %(code)s;
+            }
+            if(fault == NoFault)
+            {
+                fault = xc->write((uint%(mem_acc_size)s_t)Mem, EA, 0, &write_result);
+            }
+            if(fault == NoFault)
+            {
+                //Write the resulting state to the execution context
+                %(op_wb)s;
+            }
+
+            return fault;
+        }
+
+        Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s * xc,
+                Trace::InstRecord * traceData) const
+        {
+            Fault fault = NoFault;
+            uint64_t write_result = 0;
+            Addr EA;
+            %(op_decl)s;
+            %(op_rd)s;
+            %(ea_code)s;
+            DPRINTF(Sparc, "The address is 0x%x\n", EA);
+            %(fault_check)s;
+            if(fault == NoFault)
+            {
+                %(code)s;
+            }
+            if(fault == NoFault)
+            {
+                fault = xc->write((uint%(mem_acc_size)s_t)Mem, EA, 0, &write_result);
+            }
+            if(fault == NoFault)
+            {
+                //Write the resulting state to the execution context
+                %(op_wb)s;
+            }
+            return fault;
+        }
+
+        Fault %(class_name)s::completeAcc(PacketPtr, %(CPU_exec_context)s * xc,
+                Trace::InstRecord * traceData) const
+        {
+            return NoFault;
+        }
+}};
+
+//This delcares the initiateAcc function in memory operations
+def template InitiateAccDeclare {{
+    Fault initiateAcc(%(CPU_exec_context)s *, Trace::InstRecord *) const;
+}};
+
+//This declares the completeAcc function in memory operations
+def template CompleteAccDeclare {{
+    Fault completeAcc(PacketPtr, %(CPU_exec_context)s *, Trace::InstRecord *) const;
+}};
+
+//Here are some code snippets which check for various fault conditions
+let {{
+    # The LSB can be zero, since it's really the MSB in doubles and quads
+    # and we're dealing with doubles
+    BlockAlignmentFaultCheck = '''
+        if(RD & 0xe)
+            fault = new IllegalInstruction;
+        else if(EA & 0x3f)
+            fault = new MemAddressNotAligned;
+    '''
+    # XXX Need to take care of pstate.hpriv as well. The lower ASIs
+    # are split into ones that are available in priv and hpriv, and
+    # those that are only available in hpriv
+    AlternateASIPrivFaultCheck = '''
+        if(bits(Pstate,2,2) == 0 && (EXT_ASI & 0x80) == 0)
+            fault = new PrivilegedAction;
+        else if(AsiIsAsIfUser((ASI)EXT_ASI) && !bits(Pstate,2,2))
+            fault = new PrivilegedAction;
+    '''
+
+}};
+
+//A simple function to generate the name of the macro op of a certain
+//instruction at a certain micropc
+let {{
+    def makeMicroName(name, microPc):
+        return name + "::" + name + "_" + str(microPc)
+}};
+
+//This function properly generates the execute functions for one of the
+//templates above. This is needed because in one case, ea computation,
+//fault checks and the actual code all occur in the same function,
+//and in the other they're distributed across two. Also note that for
+//execute functions, the name of the base class doesn't matter.
+let {{
+    def doSplitExecute(code, eaRegCode, eaImmCode, execute,
+            faultCode, nameReg, nameImm, NameReg, NameImm, opt_flags):
+        codeIop = InstObjParams(nameReg, NameReg, '', code, opt_flags)
+        executeCode = ''
+        for (eaCode, name, Name) in (
+                (eaRegCode, nameReg, NameReg),
+                (eaImmCode, nameImm, NameImm)):
+            eaIop = InstObjParams(name, Name, '', eaCode,
+                    opt_flags, {"fault_check": faultCode})
+            iop = InstObjParams(name, Name, '', code, opt_flags,
+                    {"fault_check": faultCode, "ea_code" : eaCode})
+            (iop.ea_decl,
+             iop.ea_rd,
+             iop.ea_wb) = (eaIop.op_decl, eaIop.op_rd, eaIop.op_wb)
+            (iop.code_decl,
+             iop.code_rd,
+             iop.code_wb) = (codeIop.op_decl, codeIop.op_rd, codeIop.op_wb)
+            executeCode += execute.subst(iop)
+        return executeCode
+}};
diff --git a/src/arch/sparc/isa/formats/micro.isa b/src/arch/sparc/isa/formats/micro.isa
new file mode 100644
index 000000000..82d7fb4cb
--- /dev/null
+++ b/src/arch/sparc/isa/formats/micro.isa
@@ -0,0 +1,103 @@
+// Copyright (c) 2006 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+output header {{
+
+        class SparcMacroInst : public SparcStaticInst
+        {
+          protected:
+            const uint32_t numMicroOps;
+
+            //Constructor.
+            SparcMacroInst(const char *mnem, ExtMachInst _machInst,
+                    OpClass __opClass, uint32_t _numMicroOps)
+                        : SparcStaticInst(mnem, _machInst, __opClass),
+                        numMicroOps(_numMicroOps)
+            {
+                assert(numMicroOps);
+                microOps = new StaticInstPtr[numMicroOps];
+                flags[IsMacroOp] = true;
+            }
+
+            ~SparcMacroInst()
+            {
+                delete [] microOps;
+            }
+
+            std::string generateDisassembly(Addr pc,
+                const SymbolTable *symtab) const;
+
+            StaticInstPtr * microOps;
+
+            StaticInstPtr fetchMicroOp(MicroPC microPC)
+            {
+                assert(microPC < numMicroOps);
+                return microOps[microPC];
+            }
+
+            %(BasicExecPanic)s
+        };
+
+        class SparcMicroInst : public SparcStaticInst
+        {
+          protected:
+            //Constructor.
+            SparcMicroInst(const char *mnem,
+                    ExtMachInst _machInst, OpClass __opClass)
+                        : SparcStaticInst(mnem, _machInst, __opClass)
+            {
+                flags[IsMicroOp] = true;
+            }
+        };
+
+        class SparcDelayedMicroInst : public SparcMicroInst
+        {
+          protected:
+            //Constructor.
+            SparcDelayedMicroInst(const char *mnem,
+                    ExtMachInst _machInst, OpClass __opClass)
+                        : SparcMicroInst(mnem, _machInst, __opClass)
+            {
+                flags[IsDelayedCommit] = true;
+            }
+        };
+}};
+
+output decoder {{
+
+        std::string SparcMacroInst::generateDisassembly(Addr pc,
+                const SymbolTable *symtab) const
+        {
+            std::stringstream response;
+
+            printMnemonic(response, mnemonic);
+
+            return response.str();
+        }
+
+}};
diff --git a/src/arch/sparc/isa/formats/priv.isa b/src/arch/sparc/isa/formats/priv.isa
index d7ee01519..2a38422a7 100644
--- a/src/arch/sparc/isa/formats/priv.isa
+++ b/src/arch/sparc/isa/formats/priv.isa
@@ -103,13 +103,13 @@ let {{
         (usesImm, code, immCode,
          rString, iString) = splitOutImm(code)
         iop = InstObjParams(name, Name, 'Priv', code,
-                opt_flags, ("check", checkCode))
+                opt_flags, {"check": checkCode})
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         exec_output = PrivExecute.subst(iop)
         if usesImm:
             imm_iop = InstObjParams(name, Name + 'Imm', 'PrivImm',
-                    immCode, opt_flags, ("check", checkCode))
+                    immCode, opt_flags, {"check": checkCode})
             header_output += BasicDeclare.subst(imm_iop)
             decoder_output += BasicConstructor.subst(imm_iop)
             exec_output += PrivExecute.subst(imm_iop)
diff --git a/src/arch/sparc/isa/formats/unimp.isa b/src/arch/sparc/isa/formats/unimp.isa
new file mode 100644
index 000000000..a623507a1
--- /dev/null
+++ b/src/arch/sparc/isa/formats/unimp.isa
@@ -0,0 +1,147 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2003-2005 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Steve Reinhardt
+
+////////////////////////////////////////////////////////////////////
+//
+// Unimplemented instructions
+//
+
+output header {{
+    /**
+     * Static instruction class for unimplemented instructions that
+     * cause simulator termination.  Note that these are recognized
+     * (legal) instructions that the simulator does not support; the
+     * 'Unknown' class is used for unrecognized/illegal instructions.
+     * This is a leaf class.
+     */
+    class FailUnimplemented : public SparcStaticInst
+    {
+      public:
+        /// Constructor
+        FailUnimplemented(const char *_mnemonic, ExtMachInst _machInst)
+            : SparcStaticInst(_mnemonic, _machInst, No_OpClass)
+        {
+            // don't call execute() (which panics) if we're on a
+            // speculative path
+            flags[IsNonSpeculative] = true;
+        }
+
+        %(BasicExecDeclare)s
+
+        std::string
+        generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+    };
+
+    /**
+     * Base class for unimplemented instructions that cause a warning
+     * to be printed (but do not terminate simulation).  This
+     * implementation is a little screwy in that it will print a
+     * warning for each instance of a particular unimplemented machine
+     * instruction, not just for each unimplemented opcode.  Should
+     * probably make the 'warned' flag a static member of the derived
+     * class.
+     */
+    class WarnUnimplemented : public SparcStaticInst
+    {
+      private:
+        /// Have we warned on this instruction yet?
+        mutable bool warned;
+
+      public:
+        /// Constructor
+        WarnUnimplemented(const char *_mnemonic, ExtMachInst _machInst)
+            : SparcStaticInst(_mnemonic, _machInst, No_OpClass), warned(false)
+        {
+            // don't call execute() (which panics) if we're on a
+            // speculative path
+            flags[IsNonSpeculative] = true;
+        }
+
+        %(BasicExecDeclare)s
+
+        std::string
+        generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+    };
+}};
+
+output decoder {{
+    std::string
+    FailUnimplemented::generateDisassembly(Addr pc,
+                                           const SymbolTable *symtab) const
+    {
+        return csprintf("%-10s (unimplemented)", mnemonic);
+    }
+
+    std::string
+    WarnUnimplemented::generateDisassembly(Addr pc,
+                                           const SymbolTable *symtab) const
+    {
+#ifdef SS_COMPATIBLE_DISASSEMBLY
+        return csprintf("%-10s", mnemonic);
+#else
+        return csprintf("%-10s (unimplemented)", mnemonic);
+#endif
+    }
+}};
+
+output exec {{
+    Fault
+    FailUnimplemented::execute(%(CPU_exec_context)s *xc,
+                               Trace::InstRecord *traceData) const
+    {
+        panic("attempt to execute unimplemented instruction '%s' "
+              "(inst 0x%08x)", mnemonic, machInst);
+        return NoFault;
+    }
+
+    Fault
+    WarnUnimplemented::execute(%(CPU_exec_context)s *xc,
+                               Trace::InstRecord *traceData) const
+    {
+        if (!warned) {
+            warn("instruction '%s' unimplemented\n", mnemonic);
+            warned = true;
+        }
+
+        return NoFault;
+    }
+}};
+
+
+def format FailUnimpl() {{
+    iop = InstObjParams(name, 'FailUnimplemented')
+    decode_block = BasicDecodeWithMnemonic.subst(iop)
+}};
+
+def format WarnUnimpl() {{
+    iop = InstObjParams(name, 'WarnUnimplemented')
+    decode_block = BasicDecodeWithMnemonic.subst(iop)
+}};
+
diff --git a/src/arch/sparc/isa/includes.isa b/src/arch/sparc/isa/includes.isa
index f1c2bee96..a324756ec 100644
--- a/src/arch/sparc/isa/includes.isa
+++ b/src/arch/sparc/isa/includes.isa
@@ -40,6 +40,7 @@ output header {{
 #include "cpu/static_inst.hh"
 #include "arch/sparc/faults.hh"
 #include "mem/request.hh"  // some constructors use MemReq flags
+#include "mem/packet.hh"
 #include "arch/sparc/isa_traits.hh"
 #include "arch/sparc/regfile.hh"
 }};
@@ -48,6 +49,7 @@ output decoder {{
 #include "base/cprintf.hh"
 #include "base/loader/symtab.hh"
 #include "cpu/thread_context.hh"  // for Jump::branchTarget()
+#include "mem/packet.hh"
 
 #if defined(linux)
 #include <fenv.h>
@@ -65,6 +67,8 @@ output exec {{
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
 #include "sim/sim_exit.hh"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
 
 using namespace SparcISA;
 }};
diff --git a/src/arch/sparc/isa/main.isa b/src/arch/sparc/isa/main.isa
index 14acf54fa..df5ad0c99 100644
--- a/src/arch/sparc/isa/main.isa
+++ b/src/arch/sparc/isa/main.isa
@@ -26,7 +26,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
-// Authors: Korey Sewell
+// Authors: Gabe Black
 
 ////////////////////////////////////////////////////////////////////
 //
@@ -55,7 +55,7 @@ namespace SparcISA;
 ##include "base.isa"
 
 //Include the definitions for the instruction formats
-##include "formats.isa"
+##include "formats/formats.isa"
 
 //Include the decoder definition
 ##include "decoder.isa"
diff --git a/src/arch/sparc/isa/operands.isa b/src/arch/sparc/isa/operands.isa
index 605816083..b8b75170b 100644
--- a/src/arch/sparc/isa/operands.isa
+++ b/src/arch/sparc/isa/operands.isa
@@ -42,6 +42,16 @@ def operand_types {{
     'qf' : ('float', 128)
 }};
 
+output header {{
+    // A function to "decompress" double and quad floating point
+    // register numbers stuffed into 5 bit fields. These have their
+    // MSB put in the LSB position but are otherwise normal.
+    static inline unsigned int dfpr(unsigned int regNum)
+    {
+        return (regNum & (~1)) | ((regNum & 1) << 5);
+    }
+}};
+
 def operands {{
     # Int regs default to unsigned, but code should not count on this.
     # For clarity, descriptions that depend on unsigned behavior should
@@ -51,10 +61,22 @@ def operands {{
     'RdHigh':		('IntReg', 'udw', 'RD | 1', 'IsInteger', 3),
     'Rs1': 		('IntReg', 'udw', 'RS1', 'IsInteger', 4),
     'Rs2': 		('IntReg', 'udw', 'RS2', 'IsInteger', 5),
-    'Frd':		('FloatReg', 'df', 'RD', 'IsFloating', 10),
-    'Frs1':		('FloatReg', 'df', 'RS1', 'IsFloating', 11),
-    'Frs2':		('FloatReg', 'df', 'RS2', 'IsFloating', 12),
-    'Mem': 		('Mem', 'udw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 20),
+    'Frds':		('FloatReg', 'sf', 'RD', 'IsFloating', 10),
+    'Frd':		('FloatReg', 'df', 'dfpr(RD)', 'IsFloating', 10),
+    # Each Frd_N refers to the Nth double precision register from Frd.
+    # Note that this adds twice N to the register number.
+    'Frd_0':		('FloatReg', 'df', 'dfpr(RD)', 'IsFloating', 10),
+    'Frd_1':		('FloatReg', 'df', 'dfpr(RD) + 2', 'IsFloating', 10),
+    'Frd_2':		('FloatReg', 'df', 'dfpr(RD) + 4', 'IsFloating', 10),
+    'Frd_3':		('FloatReg', 'df', 'dfpr(RD) + 6', 'IsFloating', 10),
+    'Frd_4':		('FloatReg', 'df', 'dfpr(RD) + 8', 'IsFloating', 10),
+    'Frd_5':		('FloatReg', 'df', 'dfpr(RD) + 10', 'IsFloating', 10),
+    'Frd_6':		('FloatReg', 'df', 'dfpr(RD) + 12', 'IsFloating', 10),
+    'Frd_7':		('FloatReg', 'df', 'dfpr(RD) + 14', 'IsFloating', 10),
+    'Frs1s':		('FloatReg', 'df', 'RS1', 'IsFloating', 11),
+    'Frs1':		('FloatReg', 'df', 'dfpr(RS1)', 'IsFloating', 11),
+    'Frs2s':		('FloatReg', 'df', 'RS2', 'IsFloating', 12),
+    'Frs2':		('FloatReg', 'df', 'dfpr(RS2)', 'IsFloating', 12),
     'NPC': 		('NPC', 'udw', None, ( None, None, 'IsControl' ), 31),
     'NNPC':		('NNPC', 'udw', None, (None, None, 'IsControl' ), 32),
     #'Runiq': ('ControlReg', 'uq', 'Uniq', None, 1),
@@ -84,6 +106,8 @@ def operands {{
     'Gl':               ('ControlReg', 'udw', 'MISCREG_GL', None, 54),
 
     'Fsr':		('ControlReg', 'udw', 'MISCREG_FSR', None, 55),
-    'Gsr':		('ControlReg', 'udw', 'MISCREG_GSR', None, 56)
+    'Gsr':		('ControlReg', 'udw', 'MISCREG_GSR', None, 56),
+    # Mem gets a large number so it's always last
+    'Mem': 		('Mem', 'udw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100)
 
 }};
diff --git a/src/arch/sparc/utility.hh b/src/arch/sparc/utility.hh
index f1c071148..23fddf0e9 100644
--- a/src/arch/sparc/utility.hh
+++ b/src/arch/sparc/utility.hh
@@ -33,12 +33,25 @@
 
 #include "arch/sparc/isa_traits.hh"
 #include "base/misc.hh"
+#include "base/bitfield.hh"
+#include "cpu/thread_context.hh"
 
 namespace SparcISA
 {
     inline ExtMachInst
-    makeExtMI(MachInst inst, const Addr &pc) {
-        return ExtMachInst(inst);
+    makeExtMI(MachInst inst, ThreadContext * xc) {
+        ExtMachInst emi = (unsigned MachInst) inst;
+        //The I bit, bit 13, is used to figure out where the ASI
+        //should come from. Use that in the ExtMachInst. This is
+        //slightly redundant, but it removes the need to put a condition
+        //into all the execute functions
+        if(inst & (1 << 13))
+            emi |= (static_cast<ExtMachInst>(xc->readMiscReg(MISCREG_ASI))
+                    << (sizeof(MachInst) * 8));
+        else
+            emi |= (static_cast<ExtMachInst>(bits(inst, 12, 5))
+                    << (sizeof(MachInst) * 8));
+        return emi;
     }
 
     inline bool isCallerSaveIntegerRegister(unsigned int reg) {
diff --git a/src/base/bitfield.hh b/src/base/bitfield.hh
index f4e49aa8d..879780d56 100644
--- a/src/base/bitfield.hh
+++ b/src/base/bitfield.hh
@@ -32,7 +32,7 @@
 #ifndef __BASE_BITFIELD_HH__
 #define __BASE_BITFIELD_HH__
 
-#include "sim/host.hh"
+#include <inttypes.h>
 
 /**
  * Generate a 64-bit mask of 'nbits' 1s, right justified.
diff --git a/src/base/traceflags.py b/src/base/traceflags.py
index 640e7e165..92735aa5f 100644
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@@ -134,6 +134,7 @@ baseFlags = [
     'PciConfigAll',
     'Pipeline',
     'Printf',
+    'Quiesce',
     'ROB',
     'Regs',
     'Rename',
diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh
index ba34c5cfc..36c7349e6 100644
--- a/src/cpu/checker/cpu_impl.hh
+++ b/src/cpu/checker/cpu_impl.hh
@@ -200,7 +200,7 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
             validateInst(inst);
 
             curStaticInst = StaticInst::decode(makeExtMI(machInst,
-                                                         thread->readPC()));
+                                                         thread->getTC()));
 
 #if FULL_SYSTEM
             thread->setInst(machInst);
diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc
index 8b1e60aea..9d85311bb 100644
--- a/src/cpu/exetrace.cc
+++ b/src/cpu/exetrace.cc
@@ -60,61 +60,66 @@ Trace::InstRecord::dump(ostream &outs)
     if (flags[PRINT_REG_DELTA])
     {
 #if THE_ISA == SPARC_ISA
-        static uint64_t regs[32] = {
-            0, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0, 0};
-        static uint64_t ccr = 0;
-        static uint64_t y = 0;
-        static uint64_t floats[32];
-        uint64_t newVal;
-        static const char * prefixes[4] = {"G", "O", "L", "I"};
-
-        char buf[256];
-        sprintf(buf, "PC = 0x%016llx", thread->readNextPC());
-        outs << buf;
-        sprintf(buf, " NPC = 0x%016llx", thread->readNextNPC());
-        outs << buf;
-        newVal = thread->readMiscReg(SparcISA::MISCREG_CCR);
-        if(newVal != ccr)
+        //Don't print what happens for each micro-op, just print out
+        //once at the last op, and for regular instructions.
+        if(!staticInst->isMicroOp() || staticInst->isLastMicroOp())
         {
-            sprintf(buf, " CCR = 0x%016llx", newVal);
+            static uint64_t regs[32] = {
+                0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0};
+            static uint64_t ccr = 0;
+            static uint64_t y = 0;
+            static uint64_t floats[32];
+            uint64_t newVal;
+            static const char * prefixes[4] = {"G", "O", "L", "I"};
+
+            char buf[256];
+            sprintf(buf, "PC = 0x%016llx", thread->readNextPC());
             outs << buf;
-            ccr = newVal;
-        }
-        newVal = thread->readMiscReg(SparcISA::MISCREG_Y);
-        if(newVal != y)
-        {
-            sprintf(buf, " Y = 0x%016llx", newVal);
+            sprintf(buf, " NPC = 0x%016llx", thread->readNextNPC());
             outs << buf;
-            y = newVal;
-        }
-        for(int y = 0; y < 4; y++)
-        {
-            for(int x = 0; x < 8; x++)
+            newVal = thread->readMiscReg(SparcISA::MISCREG_CCR);
+            if(newVal != ccr)
+            {
+                sprintf(buf, " CCR = 0x%016llx", newVal);
+                outs << buf;
+                ccr = newVal;
+            }
+            newVal = thread->readMiscReg(SparcISA::MISCREG_Y);
+            if(newVal != y)
+            {
+                sprintf(buf, " Y = 0x%016llx", newVal);
+                outs << buf;
+                y = newVal;
+            }
+            for(int y = 0; y < 4; y++)
             {
-                int index = x + 8 * y;
-                newVal = thread->readIntReg(index);
-                if(regs[index] != newVal)
+                for(int x = 0; x < 8; x++)
                 {
-                    sprintf(buf, " %s%d = 0x%016llx", prefixes[y], x, newVal);
-                    outs << buf;
-                    regs[index] = newVal;
+                    int index = x + 8 * y;
+                    newVal = thread->readIntReg(index);
+                    if(regs[index] != newVal)
+                    {
+                        sprintf(buf, " %s%d = 0x%016llx", prefixes[y], x, newVal);
+                        outs << buf;
+                        regs[index] = newVal;
+                    }
                 }
             }
-        }
-        for(int y = 0; y < 32; y++)
-        {
-            newVal = thread->readFloatRegBits(2 * y, 64);
-            if(floats[y] != newVal)
+            for(int y = 0; y < 32; y++)
             {
-                sprintf(buf, " F%d = 0x%016llx", y, newVal);
-                outs << buf;
-                floats[y] = newVal;
+                newVal = thread->readFloatRegBits(2 * y, 64);
+                if(floats[y] != newVal)
+                {
+                    sprintf(buf, " F%d = 0x%016llx", 2 * y, newVal);
+                    outs << buf;
+                    floats[y] = newVal;
+                }
             }
+            outs << endl;
         }
-        outs << endl;
 #endif
     }
     else if (flags[INTEL_FORMAT]) {
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 1e0d07f9a..91e073cf0 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -113,7 +113,7 @@ MemTest::MemTest(const string &name,
 //		 PhysicalMemory *check_mem,
                  unsigned _memorySize,
                  unsigned _percentReads,
-//		 unsigned _percentCopies,
+                 unsigned _percentFunctional,
                  unsigned _percentUncacheable,
                  unsigned _progressInterval,
                  unsigned _percentSourceUnaligned,
@@ -130,7 +130,7 @@ MemTest::MemTest(const string &name,
 //      checkMem(check_mem),
       size(_memorySize),
       percentReads(_percentReads),
-//      percentCopies(_percentCopies),
+      percentFunctional(_percentFunctional),
       percentUncacheable(_percentUncacheable),
       progressInterval(_progressInterval),
       nextProgressMessage(_progressInterval),
@@ -345,7 +345,7 @@ MemTest::tick()
     } else {
         paddr = ((base) ? baseAddr1 : baseAddr2) + offset;
     }
-    bool probe = (random() % 2 == 1) && !(flags & UNCACHEABLE);
+    bool probe = (random() % 100 < percentFunctional) && !(flags & UNCACHEABLE);
     //bool probe = false;
 
     paddr &= ~((1 << access_size) - 1);
@@ -501,7 +501,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(MemTest)
 //    SimObjectParam<PhysicalMemory *> check_mem;
     Param<unsigned> memory_size;
     Param<unsigned> percent_reads;
-//    Param<unsigned> percent_copies;
+    Param<unsigned> percent_functional;
     Param<unsigned> percent_uncacheable;
     Param<unsigned> progress_interval;
     Param<unsigned> percent_source_unaligned;
@@ -520,7 +520,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest)
 //    INIT_PARAM(check_mem, "check memory"),
     INIT_PARAM(memory_size, "memory size"),
     INIT_PARAM(percent_reads, "target read percentage"),
-//    INIT_PARAM(percent_copies, "target copy percentage"),
+    INIT_PARAM(percent_functional, "percentage of access that are functional"),
     INIT_PARAM(percent_uncacheable, "target uncacheable percentage"),
     INIT_PARAM(progress_interval, "progress report interval (in accesses)"),
     INIT_PARAM(percent_source_unaligned,
@@ -537,7 +537,7 @@ END_INIT_SIM_OBJECT_PARAMS(MemTest)
 CREATE_SIM_OBJECT(MemTest)
 {
     return new MemTest(getInstanceName(), /*cache->getInterface(),*/ /*main_mem,*/
-                       /*check_mem,*/ memory_size, percent_reads, /*percent_copies,*/
+                       /*check_mem,*/ memory_size, percent_reads, percent_functional,
                        percent_uncacheable, progress_interval,
                        percent_source_unaligned, percent_dest_unaligned,
                        trace_addr, max_loads, atomic);
diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh
index 4e88ecffd..edde4a3b2 100644
--- a/src/cpu/memtest/memtest.hh
+++ b/src/cpu/memtest/memtest.hh
@@ -55,7 +55,7 @@ class MemTest : public MemObject
 //	    PhysicalMemory *check_mem,
             unsigned _memorySize,
             unsigned _percentReads,
-//	    unsigned _percentCopies,
+            unsigned _percentFunctional,
             unsigned _percentUncacheable,
             unsigned _progressInterval,
             unsigned _percentSourceUnaligned,
@@ -144,7 +144,7 @@ class MemTest : public MemObject
     unsigned size;		// size of testing memory region
 
     unsigned percentReads;	// target percentage of read accesses
-//    unsigned percentCopies;	// target percentage of copy accesses
+    unsigned percentFunctional;	// target percentage of functional accesses
     unsigned percentUncacheable;
 
     int id;
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index da75bfecf..e7bf83b20 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -1118,7 +1118,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
             inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
                         (&cacheData[tid][offset]));
 
-            ext_inst = TheISA::makeExtMI(inst, fetch_PC);
+            ext_inst = TheISA::makeExtMI(inst, cpu->tcBase(tid));
 
             // Create a new DynInst from the instruction fetched.
             DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh
index eed6de6dd..36e87ec9c 100644
--- a/src/cpu/ozone/front_end_impl.hh
+++ b/src/cpu/ozone/front_end_impl.hh
@@ -883,7 +883,7 @@ FrontEnd<Impl>::getInstFromCacheline()
     // Get the instruction from the array of the cache line.
     inst = htog(*reinterpret_cast<MachInst *>(&cacheData[offset]));
 
-    ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC);
+    ExtMachInst decode_inst = TheISA::makeExtMI(inst, tc);
 
     // Create a new DynInst from the instruction fetched.
     DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst),
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh
index 8307da521..dc58a8285 100644
--- a/src/cpu/ozone/lw_lsq.hh
+++ b/src/cpu/ozone/lw_lsq.hh
@@ -260,7 +260,7 @@ class OzoneLWLSQ {
 
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
                                             AddrRangeList &snoop)
-        { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1); }
+        { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); }
 
         virtual bool recvTiming(PacketPtr pkt);
 
diff --git a/src/cpu/quiesce_event.cc b/src/cpu/quiesce_event.cc
index 8dd20db02..fa79e6d1e 100644
--- a/src/cpu/quiesce_event.cc
+++ b/src/cpu/quiesce_event.cc
@@ -28,6 +28,7 @@
  * Authors: Kevin Lim
  */
 
+#include "cpu/base.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/quiesce_event.hh"
 
@@ -39,6 +40,7 @@ EndQuiesceEvent::EndQuiesceEvent(ThreadContext *_tc)
 void
 EndQuiesceEvent::process()
 {
+    DPRINTF(Quiesce, "activating %s\n", tc->getCpuPtr()->name());
     tc->activate();
 }
 
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 38a8ba097..edba55b0d 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -183,11 +183,14 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
 void
 AtomicSimpleCPU::resume()
 {
-    changeState(SimObject::Running);
-    if (thread->status() == ThreadContext::Active) {
+    if (_status != SwitchedOut && _status != Idle) {
         assert(system->getMemoryMode() == System::Atomic);
-        if (!tickEvent.scheduled())
-            tickEvent.schedule(curTick);
+
+        changeState(SimObject::Running);
+        if (thread->status() == ThreadContext::Active) {
+            if (!tickEvent.scheduled())
+                tickEvent.schedule(curTick);
+        }
     }
 }
 
@@ -448,7 +451,8 @@ AtomicSimpleCPU::tick()
     for (int i = 0; i < width; ++i) {
         numCycles++;
 
-        checkForInterrupts();
+        if (!curStaticInst || !curStaticInst->isDelayedCommit())
+            checkForInterrupts();
 
         Fault fault = setupFetchRequest(ifetch_req);
 
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index a79d3c542..cbb3980cb 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -396,7 +396,20 @@ BaseSimpleCPU::preExecute()
 
     // decode the instruction
     inst = gtoh(inst);
-    curStaticInst = StaticInst::decode(makeExtMI(inst, thread->readPC()));
+    //If we're not in the middle of a macro instruction
+    if (!curMacroStaticInst) {
+        StaticInstPtr instPtr = StaticInst::decode(makeExtMI(inst, thread->getTC()));
+        if (instPtr->isMacroOp()) {
+            curMacroStaticInst = instPtr;
+            curStaticInst = curMacroStaticInst->fetchMicroOp(0);
+        } else {
+            curStaticInst = instPtr;
+        }
+    } else {
+        //Read the next micro op from the macro op
+        curStaticInst = curMacroStaticInst->fetchMicroOp(thread->readMicroPC());
+    }
+
 
     traceData = Trace::getInstRecord(curTick, tc, curStaticInst,
                                      thread->readPC());
@@ -446,18 +459,35 @@ BaseSimpleCPU::advancePC(Fault fault)
 {
     if (fault != NoFault) {
         fault->invoke(tc);
-    }
-    else {
-        // go to the next instruction
-        thread->setPC(thread->readNextPC());
+    } else {
+        //If we're at the last micro op for this instruction
+        if (curStaticInst->isLastMicroOp()) {
+            //We should be working with a macro op
+            assert(curMacroStaticInst);
+            //Close out this macro op, and clean up the
+            //microcode state
+            curMacroStaticInst = StaticInst::nullStaticInstPtr;
+            thread->setMicroPC(0);
+            thread->setNextMicroPC(1);
+        }
+        //If we're still in a macro op
+        if (curMacroStaticInst) {
+            //Advance the micro pc
+            thread->setMicroPC(thread->readNextMicroPC());
+            //Advance the "next" micro pc. Note that there are no delay
+            //slots, and micro ops are "word" addressed.
+            thread->setNextMicroPC(thread->readNextMicroPC() + 1);
+        } else {
+            // go to the next instruction
+            thread->setPC(thread->readNextPC());
 #if ISA_HAS_DELAY_SLOT
-        thread->setNextPC(thread->readNextNPC());
-        thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
-        assert(thread->readNextPC() != thread->readNextNPC());
+            thread->setNextPC(thread->readNextNPC());
+            thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
+            assert(thread->readNextPC() != thread->readNextNPC());
 #else
-        thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
+            thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
 #endif
-
+        }
     }
 
 #if FULL_SYSTEM
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 57cfa3c2c..af6b6f835 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -128,6 +128,7 @@ class BaseSimpleCPU : public BaseCPU
     TheISA::IntReg dataReg;
 
     StaticInstPtr curStaticInst;
+    StaticInstPtr curMacroStaticInst;
 
     void checkForInterrupts();
     Fault setupFetchRequest(Request *req);
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 97df0e5d5..fe6775ea4 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -427,7 +427,8 @@ TimingSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res)
 void
 TimingSimpleCPU::fetch()
 {
-    checkForInterrupts();
+    if (!curStaticInst || !curStaticInst->isDelayedCommit())
+        checkForInterrupts();
 
     Request *ifetch_req = new Request();
     ifetch_req->setThreadContext(cpu_id, /* thread ID */ 0);
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index 6fa6500bd..fe22e6c43 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -377,6 +377,16 @@ class SimpleThread : public ThreadState
         regs.setPC(val);
     }
 
+    uint64_t readMicroPC()
+    {
+        return microPC;
+    }
+
+    void setMicroPC(uint64_t val)
+    {
+        microPC = val;
+    }
+
     uint64_t readNextPC()
     {
         return regs.readNextPC();
@@ -387,6 +397,16 @@ class SimpleThread : public ThreadState
         regs.setNextPC(val);
     }
 
+    uint64_t readNextMicroPC()
+    {
+        return nextMicroPC;
+    }
+
+    void setNextMicroPC(uint64_t val)
+    {
+        nextMicroPC = val;
+    }
+
     uint64_t readNextNPC()
     {
         return regs.readNextNPC();
diff --git a/src/cpu/static_inst.cc b/src/cpu/static_inst.cc
index c311d2282..cb4a7cdf7 100644
--- a/src/cpu/static_inst.cc
+++ b/src/cpu/static_inst.cc
@@ -75,3 +75,10 @@ StaticInst::hasBranchTarget(Addr pc, ThreadContext *tc, Addr &tgt) const
     return false;
 }
 
+StaticInstPtr
+StaticInst::fetchMicroOp(MicroPC micropc)
+{
+    panic("StaticInst::fetchMicroOp() called on instruction "
+            "that is not microcoded.");
+}
+
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index 578d14191..523cfae40 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -67,6 +67,8 @@ namespace Trace {
     class InstRecord;
 }
 
+typedef uint32_t MicroPC;
+
 /**
  * Base, ISA-independent static instruction class.
  *
@@ -139,6 +141,14 @@ class StaticInstBase : public RefCounted
         IsIprAccess,    ///< Accesses IPRs
         IsUnverifiable, ///< Can't be verified by a checker
 
+        //Flags for microcode
+        IsMacroOp,      ///< Is a macroop containing microops
+        IsMicroOp,	///< Is a microop
+        IsDelayedCommit,	///< This microop doesn't commit right away
+        IsLastMicroOp,	///< This microop ends a microop sequence
+        //This flag doesn't do anything yet
+        IsMicroBranch,	///< This microop branches within the microcode for a macroop
+
         NumFlags
     };
 
@@ -230,6 +240,12 @@ class StaticInstBase : public RefCounted
     bool isQuiesce() const { return flags[IsQuiesce]; }
     bool isIprAccess() const { return flags[IsIprAccess]; }
     bool isUnverifiable() const { return flags[IsUnverifiable]; }
+    bool isMacroOp() const { return flags[IsMacroOp]; }
+    bool isMicroOp() const { return flags[IsMicroOp]; }
+    bool isDelayedCommit() const { return flags[IsDelayedCommit]; }
+    bool isLastMicroOp() const { return flags[IsLastMicroOp]; }
+    //This flag doesn't do anything yet
+    bool isMicroBranch() const { return flags[IsMicroBranch]; }
     //@}
 
     /// Operation class.  Used to select appropriate function unit in issue.
@@ -347,6 +363,12 @@ class StaticInst : public StaticInstBase
 #include "cpu/static_inst_exec_sigs.hh"
 
     /**
+     * Return the microop that goes with a particular micropc. This should
+     * only be defined/used in macroops which will contain microops
+     */
+    virtual StaticInstPtr fetchMicroOp(MicroPC micropc);
+
+    /**
      * Return the target address for a PC-relative branch.
      * Invalid if not a PC-relative branch (i.e. isDirectCtrl()
      * should be true).
diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc
index 6a96560f1..c644ae8d7 100644
--- a/src/cpu/thread_state.cc
+++ b/src/cpu/thread_state.cc
@@ -42,13 +42,13 @@
 ThreadState::ThreadState(int _cpuId, int _tid)
     : cpuId(_cpuId), tid(_tid), lastActivate(0), lastSuspend(0),
       profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL),
-      funcExeInst(0), storeCondFailures(0)
+      microPC(0), nextMicroPC(1), funcExeInst(0), storeCondFailures(0)
 #else
 ThreadState::ThreadState(int _cpuId, int _tid, Process *_process,
                          short _asid, MemObject *mem)
     : cpuId(_cpuId), tid(_tid), lastActivate(0), lastSuspend(0),
       process(_process), asid(_asid),
-      funcExeInst(0), storeCondFailures(0)
+      microPC(0), nextMicroPC(1), funcExeInst(0), storeCondFailures(0)
 #endif
 {
     numInst = 0;
diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh
index 14e033b7f..60353760c 100644
--- a/src/cpu/thread_state.hh
+++ b/src/cpu/thread_state.hh
@@ -200,6 +200,16 @@ struct ThreadState {
      */
     TheISA::MachInst inst;
 
+    /** The current microcode pc for the currently executing macro
+     * operation.
+     */
+    MicroPC microPC;
+
+    /** The next microcode pc for the currently executing macro
+     * operation.
+     */
+    MicroPC nextMicroPC;
+
   public:
     /**
      * Temporary storage to pass the source address from copy_load to
diff --git a/src/dev/isa_fake.cc b/src/dev/isa_fake.cc
index 4f1771ff9..23761cd10 100644
--- a/src/dev/isa_fake.cc
+++ b/src/dev/isa_fake.cc
@@ -61,6 +61,7 @@ IsaFake::read(PacketPtr pkt)
     DPRINTF(Tsunami, "read  va=%#x size=%d\n", pkt->getAddr(), pkt->getSize());
 
     switch (pkt->getSize()) {
+      case sizeof(uint64_t):
          pkt->set(0xFFFFFFFFFFFFFFFFULL);
          break;
       case sizeof(uint32_t):
diff --git a/src/kern/linux/linux.hh b/src/kern/linux/linux.hh
index 3a2677642..6e0b37d91 100644
--- a/src/kern/linux/linux.hh
+++ b/src/kern/linux/linux.hh
@@ -54,11 +54,11 @@ class Linux : public OperatingSystem
 
     //@{
     /// Basic Linux types.
-/*    typedef uint64_t size_t;
+    typedef uint64_t size_t;
     typedef uint64_t off_t;
     typedef int64_t time_t;
     typedef uint32_t uid_t;
-    typedef uint32_t gid_t;*/
+    typedef uint32_t gid_t;
     //@}
 
     /// Stat buffer.  Note that we can't call it 'stat' since that
diff --git a/src/kern/solaris/solaris.hh b/src/kern/solaris/solaris.hh
index a5ca41cdd..ed574fdbf 100644
--- a/src/kern/solaris/solaris.hh
+++ b/src/kern/solaris/solaris.hh
@@ -56,7 +56,7 @@ class Solaris : public OperatingSystem
 
     //@{
     /// Basic Solaris types.
-/*    typedef uint64_t size_t;
+    typedef uint64_t size_t;
     typedef uint64_t off_t;
     typedef int64_t time_t;
     typedef int32_t uid_t;
@@ -65,7 +65,7 @@ class Solaris : public OperatingSystem
     typedef uint64_t ino_t;
     typedef uint64_t dev_t;
     typedef uint32_t mode_t;
-    typedef uint32_t nlink_t;*/
+    typedef uint32_t nlink_t;
     //@}
 
     struct tgt_timespec {
diff --git a/src/kern/tru64/tru64.hh b/src/kern/tru64/tru64.hh
index 18671c364..6d6d0d96d 100644
--- a/src/kern/tru64/tru64.hh
+++ b/src/kern/tru64/tru64.hh
@@ -393,34 +393,6 @@ class Tru64 : public OperatingSystem
         uint64_t pad2[2];	//!< pad2
     };
 
-    /*/// Helper function to convert a host stat buffer to a target stat
-    /// buffer.  Also copies the target buffer out to the simulated
-    /// memory space.  Used by stat(), fstat(), and lstat().
-    template <class T>
-    static void
-    copyOutStatBuf(TranslatingPort *mem, Addr addr, global_stat *host)
-    {
-        using namespace TheISA;
-
-        TypedBufferArg<T> tgt(addr);
-
-        tgt->st_dev = htog(host->st_dev);
-        tgt->st_ino = htog(host->st_ino);
-        tgt->st_mode = htog(host->st_mode);
-        tgt->st_nlink = htog(host->st_nlink);
-        tgt->st_uid = htog(host->st_uid);
-        tgt->st_gid = htog(host->st_gid);
-        tgt->st_rdev = htog(host->st_rdev);
-        tgt->st_size = htog(host->st_size);
-        tgt->st_atimeX = htog(host->st_atime);
-        tgt->st_mtimeX = htog(host->st_mtime);
-        tgt->st_ctimeX = htog(host->st_ctime);
-        tgt->st_blksize = htog(host->st_blksize);
-        tgt->st_blocks = htog(host->st_blocks);
-
-        tgt.copyOut(mem);
-    }*/
-
     /// Helper function to convert a host statfs buffer to a target statfs
     /// buffer.  Also copies the target buffer out to the simulated
     /// memory space.  Used by statfs() and fstatfs().
@@ -450,34 +422,6 @@ class Tru64 : public OperatingSystem
         tgt.copyOut(mem);
     }
 
-/*    /// Helper function to convert a host stat buffer to an old pre-F64
-    /// (4.x) target stat buffer.  Also copies the target buffer out to
-    /// the simulated memory space.  Used by pre_F64_stat(),
-    /// pre_F64_fstat(), and pre_F64_lstat().
-    static void
-    copyOutPreF64StatBuf(TranslatingPort *mem, Addr addr, struct stat *host)
-    {
-        using namespace TheISA;
-
-        TypedBufferArg<Tru64::pre_F64_stat> tgt(addr);
-
-        tgt->st_dev = htog(host->st_dev);
-        tgt->st_ino = htog(host->st_ino);
-        tgt->st_mode = htog(host->st_mode);
-        tgt->st_nlink = htog(host->st_nlink);
-        tgt->st_uid = htog(host->st_uid);
-        tgt->st_gid = htog(host->st_gid);
-        tgt->st_rdev = htog(host->st_rdev);
-        tgt->st_size = htog(host->st_size);
-        tgt->st_atimeX = htog(host->st_atime);
-        tgt->st_mtimeX = htog(host->st_mtime);
-        tgt->st_ctimeX = htog(host->st_ctime);
-        tgt->st_blksize = htog(host->st_blksize);
-        tgt->st_blocks = htog(host->st_blocks);
-
-        tgt.copyOut(mem);
-    }*/
-
 
     /// The target system's hostname.
     static const char *hostname;
@@ -600,12 +544,19 @@ class Tru64 : public OperatingSystem
             process->next_thread_stack_base -= stack_size;
         }
 
-        stack_base = roundDown(stack_base, VMPageSize);
+        Addr rounded_stack_base = roundDown(stack_base, VMPageSize);
+        Addr rounded_stack_size = roundUp(stack_size, VMPageSize);
+
+        DPRINTF(SyscallVerbose,
+                "stack_create: allocating stack @ %#x size %#x "
+                "(rounded from %#x, %#x)\n",
+                rounded_stack_base, rounded_stack_size,
+                stack_base, stack_size);
 
         // map memory
-        process->pTable->allocate(stack_base, roundUp(stack_size, VMPageSize));
+        process->pTable->allocate(rounded_stack_base, rounded_stack_size);
 
-        argp->address = gtoh(stack_base);
+        argp->address = gtoh(rounded_stack_base);
         argp.copyOut(tc->getMemPort());
 
         return 0;
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 6cbeef5a4..599958222 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -79,9 +79,7 @@ BaseCache::CachePort::recvTiming(PacketPtr pkt)
         && !pkt->isRead() && !pkt->isWrite()) {
         //Upgrade or Invalidate
         //Look into what happens if two slave caches on bus
-        DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(),
-                pkt->getAddr() & (((ULL(1))<<48)-1),
-                pkt->getAddr() & ~((Addr)cache->blkSize - 1));
+        DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr());
 
         assert(!(pkt->flags & SATISFIED));
         pkt->flags |= SATISFIED;
@@ -115,32 +113,7 @@ BaseCache::CachePort::recvFunctional(PacketPtr pkt)
         // If the target contains data, and it overlaps the
         // probed request, need to update data
         if (target->intersect(pkt)) {
-            uint8_t* pkt_data;
-            uint8_t* write_data;
-            int data_size;
-            if (target->getAddr() < pkt->getAddr()) {
-                int offset = pkt->getAddr() - target->getAddr();
-                            pkt_data = pkt->getPtr<uint8_t>();
-                            write_data = target->getPtr<uint8_t>() + offset;
-                            data_size = target->getSize() - offset;
-                            assert(data_size > 0);
-                            if (data_size > pkt->getSize())
-                                data_size = pkt->getSize();
-            } else {
-                int offset = target->getAddr() - pkt->getAddr();
-                pkt_data = pkt->getPtr<uint8_t>() + offset;
-                write_data = target->getPtr<uint8_t>();
-                data_size = pkt->getSize() - offset;
-                assert(data_size >= pkt->getSize());
-                if (data_size > target->getSize())
-                    data_size = target->getSize();
-            }
-
-            if (pkt->isWrite()) {
-                memcpy(pkt_data, write_data, data_size);
-            } else {
-                memcpy(write_data, pkt_data, data_size);
-            }
+            fixPacket(pkt, target);
         }
     }
     cache->doFunctionalAccess(pkt, isCpuSide);
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index dcb0e7b78..9bb72e85c 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -205,9 +205,10 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
         missQueue->doWriteback(writebacks.front());
         writebacks.pop_front();
     }
-    DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(),
-            pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss",
-            pkt->getAddr() & ~((Addr)blkSize - 1));
+
+    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(),
+            (blk) ? "hit" : "miss");
+
     if (blk) {
         // Hit
         hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
@@ -231,8 +232,16 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
                 exitSimLoop("A cache reached the maximum miss count");
         }
     }
-    missQueue->handleMiss(pkt, size, curTick + hitLatency);
-//    return MA_CACHE_MISS;
+
+    if (pkt->flags & SATISFIED) {
+        // happens when a store conditional fails because it missed
+        // the cache completely
+        if (pkt->needsResponse())
+            respond(pkt, curTick+lat);
+    } else {
+        missQueue->handleMiss(pkt, size, curTick + hitLatency);
+    }
+
     return true;
 }
 
@@ -280,10 +289,8 @@ Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr,
             CacheBlk::State old_state = (blk) ? blk->status : 0;
             CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
             if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from "
-                        "state %i to %i\n",
-                        pkt->getAddr() & (((ULL(1))<<48)-1),
-                        old_state, new_state);
+                DPRINTF(Cache, "Block for blk addr %x moving from state "
+                        "%i to %i\n", pkt->getAddr(), old_state, new_state);
             //Set the state on the upgrade
             memcpy(pkt->getPtr<uint8_t>(), blk->data, blkSize);
             PacketList writebacks;
@@ -323,8 +330,7 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(PacketPtr &pkt)
             //Make the response a Bad address and send it
         }
 //	MemDebug::cacheResponse(pkt);
-        DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(),
-                pkt->getAddr() & (((ULL(1))<<48)-1));
+        DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
 
         if (pkt->isCacheFill() && !pkt->isNoAllocate()) {
             blk = tags->findBlock(pkt);
@@ -334,7 +340,7 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(PacketPtr &pkt)
             if (old_state != new_state)
                 DPRINTF(Cache, "Block for blk addr %x moving from "
                         "state %i to %i\n",
-                        pkt->getAddr() & (((ULL(1))<<48)-1),
+                        pkt->getAddr(),
                         old_state, new_state);
             blk = tags->handleFill(blk, (MSHR*)pkt->senderState,
                                    new_state, writebacks, pkt);
@@ -419,8 +425,8 @@ Cache<TagStore,Buffering,Coherence>::snoop(PacketPtr &pkt)
 
                     //Append the invalidate on
                     missQueue->addTarget(mshr,invalidatePkt);
-                    DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n",
-                            pkt->getAddr() & (((ULL(1))<<48)-1));
+                    DPRINTF(Cache, "Appending Invalidate to addr: %x\n",
+                            pkt->getAddr());
                     return;
                 }
             }
@@ -428,8 +434,8 @@ Cache<TagStore,Buffering,Coherence>::snoop(PacketPtr &pkt)
         //We also need to check the writeback buffers and handle those
         std::vector<MSHR *> writebacks;
         if (missQueue->findWrites(blk_addr, writebacks)) {
-            DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n",
-                    pkt->getAddr() & (((ULL(1))<<48)-1));
+            DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n",
+                    pkt->getAddr());
 
             //Look through writebacks for any non-uncachable writes, use that
             for (int i=0; i<writebacks.size(); i++) {
@@ -520,12 +526,9 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update,
 {
 //    MemDebug::cacheProbe(pkt);
     if (!pkt->req->isUncacheable()) {
-        if (pkt->isInvalidate() && !pkt->isRead()
-            && !pkt->isWrite()) {
+        if (pkt->isInvalidate() && !pkt->isRead() && !pkt->isWrite()) {
             //Upgrade or Invalidate, satisfy it, don't forward
-            DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(),
-                    pkt->getAddr() & (((ULL(1))<<48)-1),
-                    pkt->getAddr() & ~((Addr)blkSize - 1));
+            DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr());
             pkt->flags |= SATISFIED;
             return 0;
         }
@@ -542,9 +545,8 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update,
     int lat;
     BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update);
 
-    DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(),
-            pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss",
-            pkt->getAddr() & ~((Addr)blkSize - 1));
+    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(),
+            pkt->getAddr(), (blk) ? "hit" : "miss");
 
 
     // Need to check for outstanding misses and writes
@@ -560,7 +562,6 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update,
     if (!update) {
         // Check for data in MSHR and writebuffer.
         if (mshr) {
-            warn("Found outstanding miss on an non-update probe");
             MSHR::TargetList *targets = mshr->getTargetList();
             MSHR::TargetList::iterator i = targets->begin();
             MSHR::TargetList::iterator end = targets->end();
@@ -568,71 +569,15 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update,
                 PacketPtr target = *i;
                 // If the target contains data, and it overlaps the
                 // probed request, need to update data
-                if (target->isWrite() && target->intersect(pkt)) {
-                    uint8_t* pkt_data;
-                    uint8_t* write_data;
-                    int data_size;
-                    if (target->getAddr() < pkt->getAddr()) {
-                        int offset = pkt->getAddr() - target->getAddr();
-                        pkt_data = pkt->getPtr<uint8_t>();
-                        write_data = target->getPtr<uint8_t>() + offset;
-                        data_size = target->getSize() - offset;
-                        assert(data_size > 0);
-                        if (data_size > pkt->getSize())
-                            data_size = pkt->getSize();
-                    } else {
-                        int offset = target->getAddr() - pkt->getAddr();
-                        pkt_data = pkt->getPtr<uint8_t>() + offset;
-                        write_data = target->getPtr<uint8_t>();
-                        data_size = pkt->getSize() - offset;
-                        assert(data_size >= pkt->getSize());
-                        if (data_size > target->getSize())
-                            data_size = target->getSize();
-                    }
-
-                    if (pkt->isWrite()) {
-                        memcpy(pkt_data, write_data, data_size);
-                    } else {
-                        pkt->flags |= SATISFIED;
-                        pkt->result = Packet::Success;
-                        memcpy(write_data, pkt_data, data_size);
-                    }
+                if (target->intersect(pkt)) {
+                    fixPacket(pkt, target);
                 }
             }
         }
         for (int i = 0; i < writes.size(); ++i) {
             PacketPtr write = writes[i]->pkt;
             if (write->intersect(pkt)) {
-                warn("Found outstanding write on an non-update probe");
-                uint8_t* pkt_data;
-                uint8_t* write_data;
-                int data_size;
-                if (write->getAddr() < pkt->getAddr()) {
-                    int offset = pkt->getAddr() - write->getAddr();
-                    pkt_data = pkt->getPtr<uint8_t>();
-                    write_data = write->getPtr<uint8_t>() + offset;
-                    data_size = write->getSize() - offset;
-                    assert(data_size > 0);
-                    if (data_size > pkt->getSize())
-                        data_size = pkt->getSize();
-                } else {
-                    int offset = write->getAddr() - pkt->getAddr();
-                    pkt_data = pkt->getPtr<uint8_t>() + offset;
-                    write_data = write->getPtr<uint8_t>();
-                    data_size = pkt->getSize() - offset;
-                    assert(data_size >= pkt->getSize());
-                    if (data_size > write->getSize())
-                        data_size = write->getSize();
-                }
-
-                if (pkt->isWrite()) {
-                    memcpy(pkt_data, write_data, data_size);
-                } else {
-                    pkt->flags |= SATISFIED;
-                    pkt->result = Packet::Success;
-                    memcpy(write_data, pkt_data, data_size);
-                }
-
+                fixPacket(pkt, write);
             }
         }
         if (pkt->isRead()
@@ -642,10 +587,10 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update,
             assert(pkt->result == Packet::Success);
         }
         return 0;
-    } else if (!blk) {
+    } else if (!blk && !(pkt->flags & SATISFIED)) {
         // update the cache state and statistics
         if (mshr || !writes.empty()){
-            // Can't handle it, return pktuest unsatisfied.
+            // Can't handle it, return request unsatisfied.
             panic("Atomic access ran into outstanding MSHR's or WB's!");
         }
         if (!pkt->req->isUncacheable()) {
@@ -660,10 +605,8 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update,
 
             busPkt->time = curTick;
 
-            DPRINTF(Cache, "Sending a atomic %s for %x blk_addr: %x\n",
-                    busPkt->cmdString(),
-                    busPkt->getAddr() & (((ULL(1))<<48)-1),
-                    busPkt->getAddr() & ~((Addr)blkSize - 1));
+            DPRINTF(Cache, "Sending a atomic %s for %x\n",
+                    busPkt->cmdString(), busPkt->getAddr());
 
             lat = memSidePort->sendAtomic(busPkt);
 
@@ -682,19 +625,13 @@ return 0;
             CacheBlk::State old_state = (blk) ? blk->status : 0;
             CacheBlk::State new_state =
                 coherence->getNewState(busPkt, old_state);
-            DPRINTF(Cache,
-                        "Receive response:%s for blk addr %x in state %i\n",
-                    busPkt->cmdString(),
-                    busPkt->getAddr() & (((ULL(1))<<48)-1), old_state);
+            DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
+                    busPkt->cmdString(), busPkt->getAddr(), old_state);
             if (old_state != new_state)
-                    DPRINTF(Cache, "Block for blk addr %x moving from "
-                            "state %i to %i\n",
-                            busPkt->getAddr() & (((ULL(1))<<48)-1),
-                            old_state, new_state);
-
-            tags->handleFill(blk, busPkt,
-                             new_state,
-                             writebacks, pkt);
+                DPRINTF(Cache, "Block for blk addr %x moving from state "
+                        "%i to %i\n", busPkt->getAddr(), old_state, new_state);
+
+            tags->handleFill(blk, busPkt, new_state, writebacks, pkt);
             //Free the packet
             delete busPkt;
 
@@ -710,18 +647,20 @@ return 0;
             return memSidePort->sendAtomic(pkt);
         }
     } else {
-        // There was a cache hit.
-        // Handle writebacks if needed
-        while (!writebacks.empty()){
-            memSidePort->sendAtomic(writebacks.front());
-            writebacks.pop_front();
-        }
+        if (blk) {
+            // There was a cache hit.
+            // Handle writebacks if needed
+            while (!writebacks.empty()){
+                memSidePort->sendAtomic(writebacks.front());
+                writebacks.pop_front();
+            }
 
-        hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+        }
 
         return hitLatency;
     }
-    fatal("Probe not handled.\n");
+
     return 0;
 }
 
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
index 52beb0880..3d7721805 100644
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -206,8 +206,7 @@ bool
 CoherenceProtocol::supplyTrans(BaseCache *cache, PacketPtr &pkt,
                                CacheBlk *blk,
                                MSHR *mshr,
-                               CacheBlk::State & new_state
-                               )
+                               CacheBlk::State & new_state)
 {
     return true;
 }
@@ -263,182 +262,106 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
                                      const bool doUpgrades)
     : SimObject(name)
 {
-    if ((protocol == "mosi" || protocol == "moesi") && !doUpgrades) {
-        cerr << "CoherenceProtocol: ownership protocols require upgrade transactions"
-             << "(write miss on owned block generates ReadExcl, which will clobber dirty block)"
-             << endl;
-        fatal("");
+    // Python should catch this, but in case it doesn't...
+    if (!(protocol == "msi"  || protocol == "mesi" ||
+          protocol == "mosi" || protocol == "moesi")) {
+        fatal("CoherenceProtocol: unrecognized protocol %s\n",  protocol);
     }
 
-    Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq;
-    Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeReq : Packet::ReadExResp;
-
-//@todo add in hardware prefetch to this list
-    if (protocol == "msi") {
-        // incoming requests: specify outgoing bus request
-        transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq);
-        transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq);
-        transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd);
-        //Prefetching causes a read
-        transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq);
-        transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq);
-
-        // on response to given request: specify new state
-        transitionTable[Invalid][Packet::ReadResp].onResponse(Shared);
-        transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified);
-        transitionTable[Shared][writeToSharedResp].onResponse(Modified);
-
-        // bus snoop transition functions
-        transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition);
-        transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition);
-        transitionTable[Shared][Packet::ReadReq].onSnoop(nullTransition);
-        transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
-        transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans);
-        //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv)
-        transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-
-        if (doUpgrades) {
-            transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition);
-            transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans);
-        }
+    bool hasOwned = (protocol == "mosi" || protocol == "moesi");
+    bool hasExclusive = (protocol == "mesi" || protocol == "moesi");
+
+    if (hasOwned && !doUpgrades) {
+        fatal("CoherenceProtocol: ownership protocols require upgrade "
+              "transactions\n(write miss on owned block generates ReadExcl, "
+              "which will clobber dirty block)\n");
     }
 
-    else if(protocol == "mesi") {
-        // incoming requests: specify outgoing bus request
-        transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq);
-        transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq);
-        transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd);
-        //Prefetching causes a read
-        transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq);
-        transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq);
-
-        // on response to given request: specify new state
-        transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive);
-        //It will move into shared if the shared line is asserted in the
-        //getNewState function
-        transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified);
-        transitionTable[Shared][writeToSharedResp].onResponse(Modified);
-
-        // bus snoop transition functions
-        transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition);
-        transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition);
-        transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared);
-        transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans);
-        transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared);
-        transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
-        transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans);
-        //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv)
-        transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-
-        if (doUpgrades) {
-            transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition);
-            transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans);
-        }
+    // set up a few shortcuts to save typing & visual clutter
+    typedef Packet P;
+    StateTransition (&tt)[stateMax+1][NUM_MEM_CMDS] = transitionTable;
+
+    P::Command writeToSharedCmd =  doUpgrades ? P::UpgradeReq : P::ReadExReq;
+    P::Command writeToSharedResp = doUpgrades ? P::UpgradeReq : P::ReadExResp;
+
+    // Note that all transitions by default cause a panic.
+    // Override the valid transitions with the appropriate actions here.
+
+    //
+    // ----- incoming requests: specify outgoing bus request -----
+    //
+    tt[Invalid][P::ReadReq].onRequest(P::ReadReq);
+    // we only support write allocate right now
+    tt[Invalid][P::WriteReq].onRequest(P::ReadExReq);
+    tt[Shared][P::WriteReq].onRequest(writeToSharedCmd);
+    if (hasOwned) {
+        tt[Owned][P::WriteReq].onRequest(writeToSharedCmd);
     }
 
-    else if(protocol == "mosi") {
-        // incoming requests: specify outgoing bus request
-        transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq);
-        transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq);
-        transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd);
-        transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd);
-        //Prefetching causes a read
-        transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq);
-        transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq);
-
-        // on response to given request: specify new state
-        transitionTable[Invalid][Packet::ReadResp].onResponse(Shared);
-        transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified);
-        transitionTable[Shared][writeToSharedResp].onResponse(Modified);
-        transitionTable[Owned][writeToSharedResp].onResponse(Modified);
-
-        // bus snoop transition functions
-        transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition);
-        transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition);
-        transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition);
-        transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared);
-        transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans);
-        transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
-        transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
-        transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
-        transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
-        transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans);
-        //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv)
-        transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+    // Prefetching causes a read
+    tt[Invalid][P::SoftPFReq].onRequest(P::ReadReq);
+    tt[Invalid][P::HardPFReq].onRequest(P::ReadReq);
+
+    //
+    // ----- on response to given request: specify new state -----
+    //
+    tt[Invalid][P::ReadExResp].onResponse(Modified);
+    tt[Shared][writeToSharedResp].onResponse(Modified);
+    // Go to Exclusive state on read response if we have one (will
+    // move into shared if the shared line is asserted in the
+    // getNewState function)
+    //
+    // originally had this as:
+    // tt[Invalid][P::ReadResp].onResponse(hasExclusive ? Exclusive: Shared);
+    // ...but for some reason that caused a link error...
+    if (hasExclusive) {
+        tt[Invalid][P::ReadResp].onResponse(Exclusive);
+    } else {
+        tt[Invalid][P::ReadResp].onResponse(Shared);
+    }
+    if (hasOwned) {
+        tt[Owned][writeToSharedResp].onResponse(Modified);
     }
 
-    else if(protocol == "moesi") {
-        // incoming requests: specify outgoing bus request
-        transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq);
-        transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq);
-        transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd);
-        transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd);
-        //Prefetching causes a read
-        transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq);
-        transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq);
-
-        // on response to given request: specify new state
-        transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive);
-        //It will move into shared if the shared line is asserted in the
-        //getNewState function
-        transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified);
-        transitionTable[Shared][writeToSharedResp].onResponse(Modified);
-        transitionTable[Owned][writeToSharedResp].onResponse(Modified);
-
-        // bus snoop transition functions
-        transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition);
-        transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition);
-        transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition);
-        transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared);
-        transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans);
-        transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans);
-        transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared);
-        transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
-        transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
-        transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
-        transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
-        transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans);
-        //Transitions on seeing a DMA (writeInv(samelevel) or DMAInv)
-        transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
-        transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+    //
+    // ----- bus snoop transition functions -----
+    //
+    tt[Invalid][P::ReadReq].onSnoop(nullTransition);
+    tt[Invalid][P::ReadExReq].onSnoop(nullTransition);
+    tt[Invalid][P::InvalidateReq].onSnoop(invalidateTrans);
+    tt[Invalid][P::WriteInvalidateReq].onSnoop(invalidateTrans);
+    tt[Shared][P::ReadReq].onSnoop(hasExclusive
+                                   ? assertShared : nullTransition);
+    tt[Shared][P::ReadExReq].onSnoop(invalidateTrans);
+    tt[Shared][P::InvalidateReq].onSnoop(invalidateTrans);
+    tt[Shared][P::WriteInvalidateReq].onSnoop(invalidateTrans);
+    if (doUpgrades) {
+        tt[Invalid][P::UpgradeReq].onSnoop(nullTransition);
+        tt[Shared][P::UpgradeReq].onSnoop(invalidateTrans);
+    }
+    tt[Modified][P::ReadExReq].onSnoop(supplyAndInvalidateTrans);
+    tt[Modified][P::ReadReq].onSnoop(hasOwned
+                                     ? supplyAndGotoOwnedTrans
+                                     : supplyAndGotoSharedTrans);
+    tt[Modified][P::InvalidateReq].onSnoop(invalidateTrans);
+    tt[Modified][P::WriteInvalidateReq].onSnoop(invalidateTrans);
+
+    if (hasExclusive) {
+        tt[Exclusive][P::ReadReq].onSnoop(assertShared);
+        tt[Exclusive][P::ReadExReq].onSnoop(invalidateTrans);
+        tt[Exclusive][P::InvalidateReq].onSnoop(invalidateTrans);
+        tt[Exclusive][P::WriteInvalidateReq].onSnoop(invalidateTrans);
     }
 
-    else {
-        cerr << "CoherenceProtocol: unrecognized protocol " << protocol
-             <<  endl;
-        fatal("");
+    if (hasOwned) {
+        tt[Owned][P::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
+        tt[Owned][P::ReadExReq].onSnoop(supplyAndInvalidateTrans);
+        tt[Owned][P::UpgradeReq].onSnoop(invalidateTrans);
+        tt[Owned][P::InvalidateReq].onSnoop(invalidateTrans);
+        tt[Owned][P::WriteInvalidateReq].onSnoop(invalidateTrans);
     }
+
+    // @todo add in hardware prefetch to this list
 }
 
 
diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh
index b30fb053b..481277523 100644
--- a/src/mem/cache/coherence/coherence_protocol.hh
+++ b/src/mem/cache/coherence/coherence_protocol.hh
@@ -211,31 +211,25 @@ class CoherenceProtocol : public SimObject
     friend class CoherenceProtocol::StateTransition;
 
     /** Mask to select status bits relevant to coherence protocol. */
-    const static CacheBlk::State
-        stateMask = BlkValid | BlkWritable | BlkDirty;
+    static const int stateMask = BlkValid | BlkWritable | BlkDirty;
 
     /** The Modified (M) state. */
-    const static CacheBlk::State
-        Modified = BlkValid | BlkWritable | BlkDirty;
+    static const int Modified = BlkValid | BlkWritable | BlkDirty;
     /** The Owned (O) state. */
-    const static CacheBlk::State
-        Owned = BlkValid | BlkDirty;
+    static const int Owned = BlkValid | BlkDirty;
     /** The Exclusive (E) state. */
-    const static CacheBlk::State
-        Exclusive = BlkValid | BlkWritable;
+    static const int Exclusive = BlkValid | BlkWritable;
     /** The Shared (S) state. */
-    const static CacheBlk::State
-        Shared = BlkValid;
+    static const int Shared = BlkValid;
     /** The Invalid (I) state. */
-    const static CacheBlk::State
-        Invalid = 0;
+    static const int Invalid = 0;
 
     /**
      * Maximum state encoding value (used to size transition lookup
      * table).  Could be more than number of states, depends on
      * encoding of status bits.
      */
-    const static int stateMax = stateMask;
+    static const int stateMax = stateMask;
 
     /**
      * The table of all possible transitions, organized by starting state and
diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh
index 4408cfc4f..934a843a6 100644
--- a/src/mem/cache/miss/blocking_buffer.hh
+++ b/src/mem/cache/miss/blocking_buffer.hh
@@ -128,8 +128,8 @@ public:
     }
 
     /**
-     * Selects a outstanding pktuest to service.
-     * @return The pktuest to service, NULL if none found.
+     * Selects a outstanding request to service.
+     * @return The request to service, NULL if none found.
      */
     PacketPtr getPacket();
 
@@ -147,7 +147,7 @@ public:
     void restoreOrigCmd(PacketPtr &pkt);
 
     /**
-     * Marks a pktuest as in service (sent on the bus). This can have side
+     * Marks a request as in service (sent on the bus). This can have side
      * effect since storage for no response commands is deallocated once they
      * are successfully sent.
      * @param pkt The request that was sent on the bus.
@@ -155,14 +155,14 @@ public:
     void markInService(PacketPtr &pkt, MSHR* mshr);
 
     /**
-     * Frees the resources of the pktuest and unblock the cache.
+     * Frees the resources of the request and unblock the cache.
      * @param pkt The request that has been satisfied.
-     * @param time The time when the pktuest is satisfied.
+     * @param time The time when the request is satisfied.
      */
     void handleResponse(PacketPtr &pkt, Tick time);
 
     /**
-     * Removes all outstanding pktuests for a given thread number. If a request
+     * Removes all outstanding requests for a given thread number. If a request
      * has been sent to the bus, this function removes all of its targets.
      * @param threadNum The thread number of the requests to squash.
      */
@@ -220,14 +220,14 @@ public:
                      int size, uint8_t *data, bool compressed);
 
     /**
-     * Perform a writeback pktuest.
+     * Perform a writeback request.
      * @param pkt The writeback request.
      */
     void doWriteback(PacketPtr &pkt);
 
     /**
-     * Returns true if there are outstanding pktuests.
-     * @return True if there are outstanding pktuests.
+     * Returns true if there are outstanding requests.
+     * @return True if there are outstanding requests.
      */
     bool havePending()
     {
diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh
index 2e04802fb..b67a896f4 100644
--- a/src/mem/cache/miss/miss_queue.hh
+++ b/src/mem/cache/miss/miss_queue.hh
@@ -77,7 +77,7 @@ class MissQueue
     /** The block size of the parent cache. */
     int blkSize;
 
-    /** Increasing order number assigned to each incoming pktuest. */
+    /** Increasing order number assigned to each incoming request. */
     uint64_t order;
 
     bool prefetchMiss;
@@ -212,7 +212,7 @@ class MissQueue
     void setPrefetcher(BasePrefetcher *_prefetcher);
 
     /**
-     * Handle a cache miss properly. Either allocate an MSHR for the pktuest,
+     * Handle a cache miss properly. Either allocate an MSHR for the request,
      * or forward it through the write buffer.
      * @param pkt The request that missed in the cache.
      * @param blk_size The block size of the cache.
@@ -232,8 +232,8 @@ class MissQueue
                      PacketPtr &target);
 
     /**
-     * Selects a outstanding pktuest to service.
-     * @return The pktuest to service, NULL if none found.
+     * Selects a outstanding request to service.
+     * @return The request to service, NULL if none found.
      */
     PacketPtr getPacket();
 
@@ -251,7 +251,7 @@ class MissQueue
     void restoreOrigCmd(PacketPtr &pkt);
 
     /**
-     * Marks a pktuest as in service (sent on the bus). This can have side
+     * Marks a request as in service (sent on the bus). This can have side
      * effect since storage for no response commands is deallocated once they
      * are successfully sent.
      * @param pkt The request that was sent on the bus.
@@ -259,14 +259,14 @@ class MissQueue
     void markInService(PacketPtr &pkt, MSHR* mshr);
 
     /**
-     * Collect statistics and free resources of a satisfied pktuest.
+     * Collect statistics and free resources of a satisfied request.
      * @param pkt The request that has been satisfied.
-     * @param time The time when the pktuest is satisfied.
+     * @param time The time when the request is satisfied.
      */
     void handleResponse(PacketPtr &pkt, Tick time);
 
     /**
-     * Removes all outstanding pktuests for a given thread number. If a request
+     * Removes all outstanding requests for a given thread number. If a request
      * has been sent to the bus, this function removes all of its targets.
      * @param threadNum The thread number of the requests to squash.
      */
@@ -313,14 +313,14 @@ class MissQueue
                      int size, uint8_t *data, bool compressed);
 
     /**
-     * Perform the given writeback pktuest.
+     * Perform the given writeback request.
      * @param pkt The writeback request.
      */
     void doWriteback(PacketPtr &pkt);
 
     /**
-     * Returns true if there are outstanding pktuests.
-     * @return True if there are outstanding pktuests.
+     * Returns true if there are outstanding requests.
+     * @return True if there are outstanding requests.
      */
     bool havePending();
 
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index d92aa8a85..281ea9d49 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -44,7 +44,7 @@ class MSHR;
 
 /**
  * Miss Status and handling Register. This class keeps all the information
- * needed to handle a cache miss including a list of target pktuests.
+ * needed to handle a cache miss including a list of target requests.
  */
 class MSHR {
   public:
@@ -63,15 +63,15 @@ class MSHR {
     Addr addr;
     /** Adress space id of the miss. */
     short asid;
-    /** True if the pktuest has been sent to the bus. */
+    /** True if the request has been sent to the bus. */
     bool inService;
     /** Thread number of the miss. */
     int threadNum;
-    /** The pktuest that is forwarded to the next level of the hierarchy. */
+    /** The request that is forwarded to the next level of the hierarchy. */
     PacketPtr pkt;
     /** The number of currently allocated targets. */
     short ntargets;
-    /** The original pktuesting command. */
+    /** The original requesting command. */
     Packet::Command originalCmd;
     /** Order number of assigned by the miss queue. */
     uint64_t order;
@@ -88,24 +88,24 @@ class MSHR {
     Iterator allocIter;
 
 private:
-    /** List of all pktuests that match the address */
+    /** List of all requests that match the address */
     TargetList targets;
 
 public:
     /**
      * Allocate a miss to this MSHR.
-     * @param cmd The pktuesting command.
+     * @param cmd The requesting command.
      * @param addr The address of the miss.
      * @param asid The address space id of the miss.
-     * @param size The number of bytes to pktuest.
+     * @param size The number of bytes to request.
      * @param pkt  The original miss.
      */
     void allocate(Packet::Command cmd, Addr addr, int size,
                   PacketPtr &pkt);
 
     /**
-     * Allocate this MSHR as a buffer for the given pktuest.
-     * @param target The memory pktuest to buffer.
+     * Allocate this MSHR as a buffer for the given request.
+     * @param target The memory request to buffer.
      */
     void allocateAsBuffer(PacketPtr &target);
 
@@ -115,7 +115,7 @@ public:
     void deallocate();
 
     /**
-     * Add a pktuest to the list of targets.
+     * Add a request to the list of targets.
      * @param target The target.
      */
     void allocateTarget(PacketPtr &target);
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index 30397d9a0..ec2ddae8a 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -39,7 +39,7 @@
 #include "mem/cache/miss/mshr.hh"
 
 /**
- * A Class for maintaining a list of pending and allocated memory pktuests.
+ * A Class for maintaining a list of pending and allocated memory requests.
  */
 class MSHRQueue {
   private:
@@ -55,7 +55,7 @@ class MSHRQueue {
     // Parameters
     /**
      * The total number of MSHRs in this queue. This number is set as the
-     * number of MSHRs pktuested plus (numReserve - 1). This allows for
+     * number of MSHRs requested plus (numReserve - 1). This allows for
      * the same number of effective MSHRs while still maintaining the reserve.
      */
     const int numMSHRs;
@@ -103,14 +103,14 @@ class MSHRQueue {
     bool findMatches(Addr addr, std::vector<MSHR*>& matches) const;
 
     /**
-     * Find any pending pktuests that overlap the given request.
+     * Find any pending requests that overlap the given request.
      * @param pkt The request to find.
      * @return A pointer to the earliest matching MSHR.
      */
     MSHR* findPending(PacketPtr &pkt) const;
 
     /**
-     * Allocates a new MSHR for the pktuest and size. This places the request
+     * Allocates a new MSHR for the request and size. This places the request
      * as the first target in the MSHR.
      * @param pkt The request to handle.
      * @param size The number in bytes to fetch from memory.
@@ -121,12 +121,12 @@ class MSHRQueue {
     MSHR* allocate(PacketPtr &pkt, int size = 0);
 
     /**
-     * Allocate a read pktuest for the given address, and places the given
+     * Allocate a read request for the given address, and places the given
      * target on the target list.
      * @param addr The address to fetch.
      * @param asid The address space for the fetch.
-     * @param size The number of bytes to pktuest.
-     * @param target The first target for the pktuest.
+     * @param size The number of bytes to request.
+     * @param target The first target for the request.
      * @return Pointer to the new MSHR.
      */
     MSHR* allocateFetch(Addr addr, int size, PacketPtr &target);
@@ -135,7 +135,7 @@ class MSHRQueue {
      * Allocate a target list for the given address.
      * @param addr The address to fetch.
      * @param asid The address space for the fetch.
-     * @param size The number of bytes to pktuest.
+     * @param size The number of bytes to request.
      * @return Pointer to the new MSHR.
      */
     MSHR* allocateTargetList(Addr addr, int size);
@@ -181,14 +181,14 @@ class MSHRQueue {
     void markInService(MSHR* mshr);
 
     /**
-     * Mark an in service mshr as pending, used to resend a pktuest.
+     * Mark an in service mshr as pending, used to resend a request.
      * @param mshr The MSHR to resend.
      * @param cmd The command to resend.
      */
     void markPending(MSHR* mshr, Packet::Command cmd);
 
     /**
-     * Squash outstanding pktuests with the given thread number. If a request
+     * Squash outstanding requests with the given thread number. If a request
      * is in service, just squashes the targets.
      * @param threadNum The thread to squash.
      */
@@ -196,7 +196,7 @@ class MSHRQueue {
 
     /**
      * Returns true if the pending list is not empty.
-     * @return True if there are outstanding pktuests.
+     * @return True if there are outstanding requests.
      */
     bool havePending() const
     {
@@ -213,8 +213,8 @@ class MSHRQueue {
     }
 
     /**
-     * Returns the pktuest at the head of the pendingList.
-     * @return The next pktuest to service.
+     * Returns the request at the head of the pendingList.
+     * @return The next request to service.
      */
     PacketPtr getReq() const
     {
diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh
index 748f6fb25..898d3c7a0 100644
--- a/src/mem/cache/tags/split.hh
+++ b/src/mem/cache/tags/split.hh
@@ -71,13 +71,13 @@ class Split : public BaseTags
 
     Addr blkMask;
 
-    /** Number of NIC pktuests that hit in the NIC partition */
+    /** Number of NIC requests that hit in the NIC partition */
     Stats::Scalar<> NR_NP_hits;
-    /** Number of NIC pktuests that hit in the CPU partition */
+    /** Number of NIC requests that hit in the CPU partition */
     Stats::Scalar<> NR_CP_hits;
-    /** Number of CPU pktuests that hit in the NIC partition */
+    /** Number of CPU requests that hit in the NIC partition */
     Stats::Scalar<> CR_NP_hits;
-    /** Number of CPU pktuests that hit in the CPU partition */
+    /** Number of CPU requests that hit in the CPU partition */
     Stats::Scalar<> CR_CP_hits;
     /** The number of nic replacements (i.e. misses) */
     Stats::Scalar<> nic_repl;
diff --git a/src/mem/cache/tags/split_blk.hh b/src/mem/cache/tags/split_blk.hh
index 64d903579..f38516180 100644
--- a/src/mem/cache/tags/split_blk.hh
+++ b/src/mem/cache/tags/split_blk.hh
@@ -47,7 +47,7 @@ class SplitBlk : public CacheBlk {
     bool isTouched;
     /** Has this block been used after being brought in? (for LIFO partition) */
     bool isUsed;
-    /** is this blk a NIC block? (i.e. pktuested by the NIC) */
+    /** is this blk a NIC block? (i.e. requested by the NIC) */
     bool isNIC;
     /** timestamp of the arrival of this block into the cache */
     Tick ts;
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 834f4b63c..fa8d82c46 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -151,7 +151,7 @@ fixPacket(PacketPtr func, PacketPtr timing)
     Addr timingStart    = timing->getAddr();
     Addr timingEnd      = timing->getAddr() + timing->getSize() - 1;
 
-    assert(!(funcStart > timingEnd || timingStart < funcEnd));
+    assert(!(funcStart > timingEnd || timingStart > funcEnd));
 
     if (DTRACE(FunctionalAccess)) {
        DebugOut() << func;
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index d8ad49bdb..cb97dd036 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -51,16 +51,16 @@ typedef uint8_t* PacketDataPtr;
 typedef std::list<PacketPtr> PacketList;
 
 //Coherence Flags
-#define NACKED_LINE 1 << 0
-#define SATISFIED 1 << 1
-#define SHARED_LINE 1 << 2
-#define CACHE_LINE_FILL 1 << 3
-#define COMPRESSED 1 << 4
-#define NO_ALLOCATE 1 << 5
-#define SNOOP_COMMIT 1 << 6
+#define NACKED_LINE     (1 << 0)
+#define SATISFIED       (1 << 1)
+#define SHARED_LINE     (1 << 2)
+#define CACHE_LINE_FILL (1 << 3)
+#define COMPRESSED      (1 << 4)
+#define NO_ALLOCATE     (1 << 5)
+#define SNOOP_COMMIT    (1 << 6)
 
 //for now.  @todo fix later
-#define NUM_MEM_CMDS 1 << 11
+#define NUM_MEM_CMDS    (1 << 11)
 /**
  * A Packet is used to encapsulate a transfer between two objects in
  * the memory system (e.g., the L1 and L2 cache).  (In contrast, a
@@ -172,17 +172,17 @@ class Packet
     // as well.
     enum CommandAttribute
     {
-        IsRead                = 1 << 0,
-        IsWrite                = 1 << 1,
-        IsPrefetch        = 1 << 2,
-        IsInvalidate        = 1 << 3,
-        IsRequest        = 1 << 4,
-        IsResponse         = 1 << 5,
-        NeedsResponse        = 1 << 6,
+        IsRead          = 1 << 0,
+        IsWrite         = 1 << 1,
+        IsPrefetch      = 1 << 2,
+        IsInvalidate    = 1 << 3,
+        IsRequest       = 1 << 4,
+        IsResponse      = 1 << 5,
+        NeedsResponse   = 1 << 6,
         IsSWPrefetch    = 1 << 7,
         IsHWPrefetch    = 1 << 8,
         IsUpgrade       = 1 << 9,
-        HasData                = 1 << 10
+        HasData         = 1 << 10
     };
 
   public:
@@ -190,27 +190,27 @@ class Packet
     enum Command
     {
         InvalidCmd      = 0,
-        ReadReq                = IsRead  | IsRequest | NeedsResponse,
+        ReadReq         = IsRead  | IsRequest | NeedsResponse,
         WriteReq        = IsWrite | IsRequest | NeedsResponse | HasData,
-        WriteReqNoAck        = IsWrite | IsRequest | HasData,
+        WriteReqNoAck   = IsWrite | IsRequest | HasData,
         ReadResp        = IsRead  | IsResponse | NeedsResponse | HasData,
-        WriteResp        = IsWrite | IsResponse | NeedsResponse,
+        WriteResp       = IsWrite | IsResponse | NeedsResponse,
         Writeback       = IsWrite | IsRequest | HasData,
         SoftPFReq       = IsRead  | IsRequest | IsSWPrefetch | NeedsResponse,
         HardPFReq       = IsRead  | IsRequest | IsHWPrefetch | NeedsResponse,
         SoftPFResp      = IsRead  | IsResponse | IsSWPrefetch
-                                | NeedsResponse | HasData,
+                                  | NeedsResponse | HasData,
         HardPFResp      = IsRead  | IsResponse | IsHWPrefetch
-                                    | NeedsResponse | HasData,
+                                  | NeedsResponse | HasData,
         InvalidateReq   = IsInvalidate | IsRequest,
-        WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest
-                                   | HasData | NeedsResponse,
-        WriteInvalidateResp = IsWrite | IsInvalidate | IsRequest | NeedsResponse
-                                   | IsResponse,
+        WriteInvalidateReq  = IsWrite | IsInvalidate | IsRequest
+                                      | HasData | NeedsResponse,
+        WriteInvalidateResp = IsWrite | IsInvalidate | IsRequest
+                                      | NeedsResponse | IsResponse,
         UpgradeReq      = IsInvalidate | IsRequest | IsUpgrade,
         ReadExReq       = IsRead | IsInvalidate | IsRequest | NeedsResponse,
         ReadExResp      = IsRead | IsInvalidate | IsResponse
-                                | NeedsResponse | HasData
+                                 | NeedsResponse | HasData
     };
 
     /** Return the string name of the cmd field (for debugging and
@@ -310,6 +310,7 @@ class Packet
      *   multiple transactions. */
     void reinitFromRequest() {
         assert(req->validPaddr);
+        flags = 0;
         addr = req->paddr;
         size = req->size;
         time = req->time;
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index 350b4c8e3..55a461a8b 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -33,12 +33,10 @@
 void
 SimpleTimingPort::recvFunctional(PacketPtr pkt)
 {
-    //First check queued events
     std::list<PacketPtr>::iterator i = transmitList.begin();
     std::list<PacketPtr>::iterator end = transmitList.end();
-    bool cont = true;
 
-    while (i != end && cont) {
+    while (i != end) {
         PacketPtr target = *i;
         // If the target contains data, and it overlaps the
         // probed request, need to update data
@@ -46,8 +44,9 @@ SimpleTimingPort::recvFunctional(PacketPtr pkt)
             fixPacket(pkt, target);
 
     }
+
     //Then just do an atomic access and throw away the returned latency
-    if (cont)
+    if (pkt->result != Packet::Success)
         recvAtomic(pkt);
 }
 
diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py
index 03e0508fb..d41fd5a61 100644
--- a/src/python/m5/__init__.py
+++ b/src/python/m5/__init__.py
@@ -150,7 +150,6 @@ def changeToAtomic(system):
     doDrain(system)
     print "Changing memory mode to atomic"
     system.changeTiming(cc_main.SimObject.Atomic)
-    resume(system)
 
 def changeToTiming(system):
     if not isinstance(system, objects.Root) and not isinstance(system, objects.System):
@@ -159,7 +158,6 @@ def changeToTiming(system):
     doDrain(system)
     print "Changing memory mode to timing"
     system.changeTiming(cc_main.SimObject.Timing)
-    resume(system)
 
 def switchCpus(cpuList):
     print "switching cpus"
@@ -190,7 +188,6 @@ def switchCpus(cpuList):
     cc_main.cleanupCountedDrain(drain_event)
     # Now all of the CPUs are ready to be switched out
     for old_cpu in old_cpus:
-        print "switching"
         old_cpu._ccObject.switchOut()
     index = 0
     for new_cpu in new_cpus:
diff --git a/src/python/m5/objects/MemTest.py b/src/python/m5/objects/MemTest.py
index 83399be80..1219ddd4d 100644
--- a/src/python/m5/objects/MemTest.py
+++ b/src/python/m5/objects/MemTest.py
@@ -13,6 +13,7 @@ class MemTest(SimObject):
     percent_reads = Param.Percent(65, "target read percentage")
     percent_source_unaligned = Param.Percent(50,
         "percent of copy source address that are unaligned")
+    percent_functional = Param.Percent(50, "percent of access that are functional")
     percent_uncacheable = Param.Percent(10,
         "target uncacheable percentage")
     progress_interval = Param.Counter(1000000,
diff --git a/src/sim/faults.cc b/src/sim/faults.cc
index 650b728f7..cea35482a 100644
--- a/src/sim/faults.cc
+++ b/src/sim/faults.cc
@@ -37,7 +37,7 @@
 #if !FULL_SYSTEM
 void FaultBase::invoke(ThreadContext * tc)
 {
-    fatal("fault (%s) detected @ PC 0x%08p", name(), tc->readPC());
+    fatal("fault (%s) detected @ PC %p", name(), tc->readPC());
 }
 #else
 void FaultBase::invoke(ThreadContext * tc)
diff --git a/src/sim/main.cc b/src/sim/main.cc
index 8bb0d7aaa..133141e57 100644
--- a/src/sim/main.cc
+++ b/src/sim/main.cc
@@ -55,6 +55,7 @@
 #include "base/statistics.hh"
 #include "base/str.hh"
 #include "base/time.hh"
+#include "config/pythonhome.hh"
 #include "cpu/base.hh"
 #include "cpu/smt.hh"
 #include "mem/mem_object.hh"
@@ -145,6 +146,11 @@ main(int argc, char **argv)
     if (setenv("PYTHONPATH", pythonpath.c_str(), true) == -1)
         fatal("setenv: %s\n", strerror(errno));
 
+    char *python_home = getenv("PYTHONHOME");
+    if (!python_home)
+        python_home = PYTHONHOME;
+    Py_SetPythonHome(python_home);
+
     // initialize embedded Python interpreter
     Py_Initialize();
     PySys_SetArgv(argc, argv);
diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc
index addf897c6..d913e159b 100644
--- a/src/sim/pseudo_inst.cc
+++ b/src/sim/pseudo_inst.cc
@@ -74,6 +74,8 @@ namespace AlphaPseudo
         if (!doQuiesce)
             return;
 
+        DPRINTF(Quiesce, "%s: quiesce()\n", tc->getCpuPtr()->name());
+
         tc->suspend();
         if (tc->getKernelStats())
             tc->getKernelStats()->quiesce();
@@ -87,10 +89,15 @@ namespace AlphaPseudo
 
         EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent();
 
+        Tick resume = curTick + Clock::Int::ns * ns;
+
         if (quiesceEvent->scheduled())
-            quiesceEvent->reschedule(curTick + Clock::Int::ns * ns);
+            quiesceEvent->reschedule(resume);
         else
-            quiesceEvent->schedule(curTick + Clock::Int::ns * ns);
+            quiesceEvent->schedule(resume);
+
+        DPRINTF(Quiesce, "%s: quiesceNs(%d) until %d\n",
+                tc->getCpuPtr()->name(), ns, resume);
 
         tc->suspend();
         if (tc->getKernelStats())
@@ -105,12 +112,15 @@ namespace AlphaPseudo
 
         EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent();
 
+        Tick resume = curTick + tc->getCpuPtr()->cycles(cycles);
+
         if (quiesceEvent->scheduled())
-            quiesceEvent->reschedule(curTick +
-                                     tc->getCpuPtr()->cycles(cycles));
+            quiesceEvent->reschedule(resume);
         else
-            quiesceEvent->schedule(curTick +
-                                   tc->getCpuPtr()->cycles(cycles));
+            quiesceEvent->schedule(resume);
+
+        DPRINTF(Quiesce, "%s: quiesceCycles(%d) until %d\n",
+                tc->getCpuPtr()->name(), cycles, resume);
 
         tc->suspend();
         if (tc->getKernelStats())
diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh
index edd4e331d..e79712a19 100644
--- a/src/sim/syscall_emul.hh
+++ b/src/sim/syscall_emul.hh
@@ -356,6 +356,14 @@ convertStatBuf(target_stat &tgt, host_stat *host, bool fakeTTY = false)
     tgt->st_dev = htog(tgt->st_dev);
     tgt->st_ino = host->st_ino;
     tgt->st_ino = htog(tgt->st_ino);
+    tgt->st_mode = host->st_mode;
+    tgt->st_mode = htog(tgt->st_mode);
+    tgt->st_nlink = host->st_nlink;
+    tgt->st_nlink = htog(tgt->st_nlink);
+    tgt->st_uid = host->st_uid;
+    tgt->st_uid = htog(tgt->st_uid);
+    tgt->st_gid = host->st_gid;
+    tgt->st_gid = htog(tgt->st_gid);
     if (fakeTTY)
         tgt->st_rdev = 0x880d;
     else
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini
new file mode 100644
index 000000000..982973385
--- /dev/null
+++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini
@@ -0,0 +1,236 @@
+[root]
+type=Root
+children=system
+checkpoint=
+clock=1000000000000
+max_tick=0
+output_file=cout
+progress_interval=0
+
+[debug]
+break_cycles=
+
+[exetrace]
+intel_format=false
+pc_symbol=true
+print_cpseq=false
+print_cycle=true
+print_data=true
+print_effaddr=true
+print_fetchseq=false
+print_iregs=false
+print_opclass=true
+print_reg_delta=false
+print_thread=true
+speculative=true
+trace_system=client
+
+[serialize]
+count=10
+cycle=0
+dir=cpt.%012d
+period=0
+
+[stats]
+descriptions=true
+dump_cycle=0
+dump_period=0
+dump_reset=false
+ignore_events=
+mysql_db=
+mysql_host=
+mysql_password=
+mysql_user=
+project_name=test
+simulation_name=test
+simulation_sample=0
+text_compat=true
+text_file=m5stats.txt
+
+[system]
+type=System
+children=cpu membus physmem
+mem_mode=atomic
+physmem=system.physmem
+
+[system.cpu]
+type=TimingSimpleCPU
+children=dcache icache l2cache toL2Bus workload
+clock=1
+cpu_id=0
+defer_registration=false
+function_trace=false
+function_trace_start=0
+max_insts_all_threads=0
+max_insts_any_thread=0
+max_loads_all_threads=0
+max_loads_any_thread=0
+mem=system.cpu.dcache
+progress_interval=0
+system=system
+workload=system.cpu.workload
+dcache_port=system.cpu.dcache.cpu_side
+icache_port=system.cpu.icache.cpu_side
+
+[system.cpu.dcache]
+type=BaseCache
+adaptive_compression=false
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+hit_latency=1
+latency=1
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=262144
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=5
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.dcache_port
+mem_side=system.cpu.toL2Bus.port[1]
+
+[system.cpu.icache]
+type=BaseCache
+adaptive_compression=false
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+hit_latency=1
+latency=1
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=131072
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=5
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.icache_port
+mem_side=system.cpu.toL2Bus.port[0]
+
+[system.cpu.l2cache]
+type=BaseCache
+adaptive_compression=false
+assoc=2
+block_size=64
+compressed_bus=false
+compression_latency=0
+hash_delay=1
+hit_latency=1
+latency=1
+lifo=false
+max_miss_count=0
+mshrs=10
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+protocol=Null
+repl=Null
+size=2097152
+split=false
+split_size=0
+store_compressed=false
+subblock_size=0
+tgts_per_mshr=5
+trace_addr=0
+two_queue=false
+write_buffers=8
+cpu_side=system.cpu.toL2Bus.port[2]
+mem_side=system.membus.port[1]
+
+[system.cpu.toL2Bus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=hello
+egid=100
+env=
+euid=100
+executable=tests/test-progs/hello/bin/sparc/linux/hello
+gid=100
+input=cin
+output=cout
+pid=100
+ppid=99
+system=system
+uid=100
+
+[system.membus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+port=system.physmem.port system.cpu.l2cache.mem_side
+
+[system.physmem]
+type=PhysicalMemory
+file=
+latency=1
+range=0:134217727
+port=system.membus.port[0]
+
+[trace]
+bufsize=0
+cycle=0
+dump_on_exit=false
+file=cout
+flags=
+ignore=
+start=0
+
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out
new file mode 100644
index 000000000..5210081b3
--- /dev/null
+++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out
@@ -0,0 +1,227 @@
+[root]
+type=Root
+clock=1000000000000
+max_tick=0
+progress_interval=0
+output_file=cout
+
+[system.physmem]
+type=PhysicalMemory
+file=
+range=[0,134217727]
+latency=1
+
+[system]
+type=System
+physmem=system.physmem
+mem_mode=atomic
+
+[system.membus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+
+[system.cpu.dcache]
+type=BaseCache
+size=262144
+assoc=2
+block_size=64
+latency=1
+mshrs=10
+tgts_per_mshr=5
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+hit_latency=1
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=hello
+executable=tests/test-progs/hello/bin/sparc/linux/hello
+input=cin
+output=cout
+env=
+system=system
+uid=100
+euid=100
+gid=100
+egid=100
+pid=100
+ppid=99
+
+[system.cpu]
+type=TimingSimpleCPU
+max_insts_any_thread=0
+max_insts_all_threads=0
+max_loads_any_thread=0
+max_loads_all_threads=0
+progress_interval=0
+mem=system.cpu.dcache
+system=system
+cpu_id=0
+workload=system.cpu.workload
+clock=1
+defer_registration=false
+// width not specified
+function_trace=false
+function_trace_start=0
+// simulate_stalls not specified
+
+[system.cpu.toL2Bus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+
+[system.cpu.icache]
+type=BaseCache
+size=131072
+assoc=2
+block_size=64
+latency=1
+mshrs=10
+tgts_per_mshr=5
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+hit_latency=1
+
+[system.cpu.l2cache]
+type=BaseCache
+size=2097152
+assoc=2
+block_size=64
+latency=1
+mshrs=10
+tgts_per_mshr=5
+write_buffers=8
+prioritizeRequests=false
+protocol=null
+trace_addr=0
+hash_delay=1
+repl=null
+compressed_bus=false
+store_compressed=false
+adaptive_compression=false
+compression_latency=0
+block_size=64
+max_miss_count=0
+addr_range=[0,18446744073709551615]
+split=false
+split_size=0
+lifo=false
+two_queue=false
+prefetch_miss=false
+prefetch_access=false
+prefetcher_size=100
+prefetch_past_page=false
+prefetch_serial_squash=false
+prefetch_latency=10
+prefetch_degree=1
+prefetch_policy=none
+prefetch_cache_check_push=true
+prefetch_use_cpu_id=true
+prefetch_data_accesses_only=false
+hit_latency=1
+
+[trace]
+flags=
+start=0
+cycle=0
+bufsize=0
+file=cout
+dump_on_exit=false
+ignore=
+
+[stats]
+descriptions=true
+project_name=test
+simulation_name=test
+simulation_sample=0
+text_file=m5stats.txt
+text_compat=true
+mysql_db=
+mysql_user=
+mysql_password=
+mysql_host=
+events_start=-1
+dump_reset=false
+dump_cycle=0
+dump_period=0
+ignore_events=
+
+[random]
+seed=1
+
+[exetrace]
+speculative=true
+print_cycle=true
+print_opclass=true
+print_thread=true
+print_effaddr=true
+print_data=true
+print_iregs=false
+print_fetchseq=false
+print_cpseq=false
+print_reg_delta=false
+pc_symbol=true
+intel_format=false
+trace_system=client
+
+[debug]
+break_cycles=
+
+[statsreset]
+reset_cycle=0
+
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt
new file mode 100644
index 000000000..c4dc22855
--- /dev/null
+++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt
@@ -0,0 +1,214 @@
+
+---------- Begin Simulation Statistics ----------
+host_inst_rate                                  53689                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 177104                       # Number of bytes of host memory used
+host_seconds                                     0.08                       # Real time elapsed on the host
+host_tick_rate                               17808084                       # Simulator tick rate (ticks/s)
+sim_freq                                 1000000000000                       # Frequency of simulated ticks
+sim_insts                                        4483                       # Number of instructions simulated
+sim_seconds                                  0.000001                       # Number of seconds simulated
+sim_ticks                                     1497001                       # Number of ticks simulated
+system.cpu.dcache.ReadReq_accesses                464                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  3972.166667                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  2972.166667                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                    410                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency         214497                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.116379                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                   54                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_miss_latency       160497                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.116379                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses              54                       # number of ReadReq MSHR misses
+system.cpu.dcache.WriteReq_accesses               501                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_avg_miss_latency  3980.840580                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  2980.840580                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits                   432                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency        274678                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.137725                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                  69                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_miss_latency       205678                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.137725                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses             69                       # number of WriteReq MSHR misses
+system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs                   6.845528                       # Average number of references to valid blocks.
+system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
+system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
+system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
+system.cpu.dcache.demand_accesses                 965                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  3977.032520                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  2977.032520                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                     842                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency          489175                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.127461                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   123                       # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency       366175                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.127461                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses              123                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
+system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
+system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
+system.cpu.dcache.overall_accesses                965                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  3977.032520                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  2977.032520                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_uncacheable_latency     no value                       # average overall mshr uncacheable latency
+system.cpu.dcache.overall_hits                    842                       # number of overall hits
+system.cpu.dcache.overall_miss_latency         489175                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.127461                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  123                       # number of overall misses
+system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency       366175                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.127461                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses             123                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
+system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
+system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
+system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
+system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
+system.cpu.dcache.prefetcher.num_hwpf_evicted            0                       # number of hwpf removed due to no buffer left
+system.cpu.dcache.prefetcher.num_hwpf_identified            0                       # number of hwpf identified
+system.cpu.dcache.prefetcher.num_hwpf_issued            0                       # number of hwpf issued
+system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
+system.cpu.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
+system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.dcache.replacements                      0                       # number of replacements
+system.cpu.dcache.sampled_refs                    123                       # Sample count of references to valid blocks.
+system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
+system.cpu.dcache.tagsinuse                 71.370810                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                      842                       # Total number of references to valid blocks.
+system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
+system.cpu.dcache.writebacks                        0                       # number of writebacks
+system.cpu.icache.ReadReq_accesses               4484                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  3979.178571                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  2979.178571                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   4232                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        1002753                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.056200                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  252                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_miss_latency       750753                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.056200                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses             252                       # number of ReadReq MSHR misses
+system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.icache.avg_refs                  16.793651                       # Average number of references to valid blocks.
+system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
+system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
+system.cpu.icache.cache_copies                      0                       # number of cache copies performed
+system.cpu.icache.demand_accesses                4484                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  3979.178571                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  2979.178571                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                    4232                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency         1002753                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.056200                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   252                       # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency       750753                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.056200                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses              252                       # number of demand (read+write) MSHR misses
+system.cpu.icache.fast_writes                       0                       # number of fast writes performed
+system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
+system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
+system.cpu.icache.overall_accesses               4484                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency  3979.178571                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  2979.178571                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_uncacheable_latency     no value                       # average overall mshr uncacheable latency
+system.cpu.icache.overall_hits                   4232                       # number of overall hits
+system.cpu.icache.overall_miss_latency        1002753                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.056200                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  252                       # number of overall misses
+system.cpu.icache.overall_mshr_hits                 0                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency       750753                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.056200                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses             252                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
+system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
+system.cpu.icache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
+system.cpu.icache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
+system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
+system.cpu.icache.prefetcher.num_hwpf_evicted            0                       # number of hwpf removed due to no buffer left
+system.cpu.icache.prefetcher.num_hwpf_identified            0                       # number of hwpf identified
+system.cpu.icache.prefetcher.num_hwpf_issued            0                       # number of hwpf issued
+system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
+system.cpu.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
+system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.icache.replacements                      0                       # number of replacements
+system.cpu.icache.sampled_refs                    252                       # Sample count of references to valid blocks.
+system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
+system.cpu.icache.tagsinuse                115.914677                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     4232                       # Total number of references to valid blocks.
+system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
+system.cpu.icache.writebacks                        0                       # number of writebacks
+system.cpu.idle_fraction                            0                       # Percentage of idle cycles
+system.cpu.l2cache.ReadReq_accesses               375                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency  2986.473118                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  1985.473118                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_hits                     3                       # number of ReadReq hits
+system.cpu.l2cache.ReadReq_miss_latency       1110968                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.992000                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses                 372                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency       738596                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.992000                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses            372                       # number of ReadReq MSHR misses
+system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
+system.cpu.l2cache.avg_refs                  0.008065                       # Average number of references to valid blocks.
+system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
+system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
+system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
+system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
+system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
+system.cpu.l2cache.demand_accesses                375                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency  2986.473118                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  1985.473118                       # average overall mshr miss latency
+system.cpu.l2cache.demand_hits                      3                       # number of demand (read+write) hits
+system.cpu.l2cache.demand_miss_latency        1110968                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate          0.992000                       # miss rate for demand accesses
+system.cpu.l2cache.demand_misses                  372                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
+system.cpu.l2cache.demand_mshr_miss_latency       738596                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_rate     0.992000                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_misses             372                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
+system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
+system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
+system.cpu.l2cache.overall_accesses               375                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency  2986.473118                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  1985.473118                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_uncacheable_latency     no value                       # average overall mshr uncacheable latency
+system.cpu.l2cache.overall_hits                     3                       # number of overall hits
+system.cpu.l2cache.overall_miss_latency       1110968                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate         0.992000                       # miss rate for overall accesses
+system.cpu.l2cache.overall_misses                 372                       # number of overall misses
+system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
+system.cpu.l2cache.overall_mshr_miss_latency       738596                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_rate     0.992000                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_misses            372                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
+system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
+system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
+system.cpu.l2cache.prefetcher.num_hwpf_evicted            0                       # number of hwpf removed due to no buffer left
+system.cpu.l2cache.prefetcher.num_hwpf_identified            0                       # number of hwpf identified
+system.cpu.l2cache.prefetcher.num_hwpf_issued            0                       # number of hwpf issued
+system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
+system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
+system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
+system.cpu.l2cache.replacements                     0                       # number of replacements
+system.cpu.l2cache.sampled_refs                   372                       # Sample count of references to valid blocks.
+system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
+system.cpu.l2cache.tagsinuse               185.896040                       # Cycle average of tags in use
+system.cpu.l2cache.total_refs                       3                       # Total number of references to valid blocks.
+system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
+system.cpu.l2cache.writebacks                       0                       # number of writebacks
+system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
+system.cpu.numCycles                          1497001                       # number of cpu cycles simulated
+system.cpu.num_insts                             4483                       # Number of instructions executed
+system.cpu.num_refs                               965                       # Number of memory references
+system.cpu.workload.PROG:num_syscalls              11                       # Number of system calls
+
+---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/stderr b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stderr
new file mode 100644
index 000000000..7873672f2
--- /dev/null
+++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stderr
@@ -0,0 +1,3 @@
+warn: More than two loadable segments in ELF object.
+warn: Ignoring segment @ 0x0 length 0x0.
+warn: Entering event queue @ 0.  Starting simulation...
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout
new file mode 100644
index 000000000..3c17ee40b
--- /dev/null
+++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout
@@ -0,0 +1,12 @@
+Hello World!M5 Simulator System
+
+Copyright (c) 2001-2006
+The Regents of The University of Michigan
+All Rights Reserved
+
+
+M5 compiled Oct 23 2006 07:47:36
+M5 started Mon Oct 23 07:47:41 2006
+M5 executing on zeep
+command line: build/SPARC_SE/m5.debug -d build/SPARC_SE/tests/debug/quick/00.hello/sparc/linux/simple-timing tests/run.py quick/00.hello/sparc/linux/simple-timing
+Exiting @ tick 1497001 because target called exit()
diff --git a/util/statetrace/arch/tracechild_sparc.cc b/util/statetrace/arch/tracechild_sparc.cc
index 378de0865..bad81b647 100644
--- a/util/statetrace/arch/tracechild_sparc.cc
+++ b/util/statetrace/arch/tracechild_sparc.cc
@@ -47,10 +47,10 @@ string SparcTraceChild::regNames[numregs] = {
     //Input registers
     "i0", "i1", "i2", "i3", "i4", "i5", "i6", "i7",
     //Floating point
-    "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
-    "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
-    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
-    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
+    "f0", "f2", "f4", "f6", "f8", "f10", "f12", "f14",
+    "f16", "f18", "f20", "f22", "f24", "f26", "f28", "f30",
+    "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46",
+    "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62",
     //Miscelaneous
     "fsr", "fprs", "pc", "npc", "y", "cwp", "pstate", "asi", "ccr"};
 
@@ -98,37 +98,37 @@ int64_t getRegs(regs & myregs, fpu & myfpu,
         case SparcTraceChild::I7: return inputs[7];
         //Floating point
         case SparcTraceChild::F0: return myfpu.f_fpstatus.fpu_fr[0];
-        case SparcTraceChild::F1: return myfpu.f_fpstatus.fpu_fr[1];
-        case SparcTraceChild::F2: return myfpu.f_fpstatus.fpu_fr[2];
-        case SparcTraceChild::F3: return myfpu.f_fpstatus.fpu_fr[3];
-        case SparcTraceChild::F4: return myfpu.f_fpstatus.fpu_fr[4];
-        case SparcTraceChild::F5: return myfpu.f_fpstatus.fpu_fr[5];
-        case SparcTraceChild::F6: return myfpu.f_fpstatus.fpu_fr[6];
-        case SparcTraceChild::F7: return myfpu.f_fpstatus.fpu_fr[7];
-        case SparcTraceChild::F8: return myfpu.f_fpstatus.fpu_fr[8];
-        case SparcTraceChild::F9: return myfpu.f_fpstatus.fpu_fr[9];
-        case SparcTraceChild::F10: return myfpu.f_fpstatus.fpu_fr[10];
-        case SparcTraceChild::F11: return myfpu.f_fpstatus.fpu_fr[11];
-        case SparcTraceChild::F12: return myfpu.f_fpstatus.fpu_fr[12];
-        case SparcTraceChild::F13: return myfpu.f_fpstatus.fpu_fr[13];
-        case SparcTraceChild::F14: return myfpu.f_fpstatus.fpu_fr[14];
-        case SparcTraceChild::F15: return myfpu.f_fpstatus.fpu_fr[15];
-        case SparcTraceChild::F16: return myfpu.f_fpstatus.fpu_fr[16];
-        case SparcTraceChild::F17: return myfpu.f_fpstatus.fpu_fr[17];
-        case SparcTraceChild::F18: return myfpu.f_fpstatus.fpu_fr[18];
-        case SparcTraceChild::F19: return myfpu.f_fpstatus.fpu_fr[19];
-        case SparcTraceChild::F20: return myfpu.f_fpstatus.fpu_fr[20];
-        case SparcTraceChild::F21: return myfpu.f_fpstatus.fpu_fr[21];
-        case SparcTraceChild::F22: return myfpu.f_fpstatus.fpu_fr[22];
-        case SparcTraceChild::F23: return myfpu.f_fpstatus.fpu_fr[23];
-        case SparcTraceChild::F24: return myfpu.f_fpstatus.fpu_fr[24];
-        case SparcTraceChild::F25: return myfpu.f_fpstatus.fpu_fr[25];
-        case SparcTraceChild::F26: return myfpu.f_fpstatus.fpu_fr[26];
-        case SparcTraceChild::F27: return myfpu.f_fpstatus.fpu_fr[27];
-        case SparcTraceChild::F28: return myfpu.f_fpstatus.fpu_fr[28];
-        case SparcTraceChild::F29: return myfpu.f_fpstatus.fpu_fr[29];
-        case SparcTraceChild::F30: return myfpu.f_fpstatus.fpu_fr[30];
-        case SparcTraceChild::F31: return myfpu.f_fpstatus.fpu_fr[31];
+        case SparcTraceChild::F2: return myfpu.f_fpstatus.fpu_fr[1];
+        case SparcTraceChild::F4: return myfpu.f_fpstatus.fpu_fr[2];
+        case SparcTraceChild::F6: return myfpu.f_fpstatus.fpu_fr[3];
+        case SparcTraceChild::F8: return myfpu.f_fpstatus.fpu_fr[4];
+        case SparcTraceChild::F10: return myfpu.f_fpstatus.fpu_fr[5];
+        case SparcTraceChild::F12: return myfpu.f_fpstatus.fpu_fr[6];
+        case SparcTraceChild::F14: return myfpu.f_fpstatus.fpu_fr[7];
+        case SparcTraceChild::F16: return myfpu.f_fpstatus.fpu_fr[8];
+        case SparcTraceChild::F18: return myfpu.f_fpstatus.fpu_fr[9];
+        case SparcTraceChild::F20: return myfpu.f_fpstatus.fpu_fr[10];
+        case SparcTraceChild::F22: return myfpu.f_fpstatus.fpu_fr[11];
+        case SparcTraceChild::F24: return myfpu.f_fpstatus.fpu_fr[12];
+        case SparcTraceChild::F26: return myfpu.f_fpstatus.fpu_fr[13];
+        case SparcTraceChild::F28: return myfpu.f_fpstatus.fpu_fr[14];
+        case SparcTraceChild::F30: return myfpu.f_fpstatus.fpu_fr[15];
+        case SparcTraceChild::F32: return myfpu.f_fpstatus.fpu_fr[16];
+        case SparcTraceChild::F34: return myfpu.f_fpstatus.fpu_fr[17];
+        case SparcTraceChild::F36: return myfpu.f_fpstatus.fpu_fr[18];
+        case SparcTraceChild::F38: return myfpu.f_fpstatus.fpu_fr[19];
+        case SparcTraceChild::F40: return myfpu.f_fpstatus.fpu_fr[20];
+        case SparcTraceChild::F42: return myfpu.f_fpstatus.fpu_fr[21];
+        case SparcTraceChild::F44: return myfpu.f_fpstatus.fpu_fr[22];
+        case SparcTraceChild::F46: return myfpu.f_fpstatus.fpu_fr[23];
+        case SparcTraceChild::F48: return myfpu.f_fpstatus.fpu_fr[24];
+        case SparcTraceChild::F50: return myfpu.f_fpstatus.fpu_fr[25];
+        case SparcTraceChild::F52: return myfpu.f_fpstatus.fpu_fr[26];
+        case SparcTraceChild::F54: return myfpu.f_fpstatus.fpu_fr[27];
+        case SparcTraceChild::F56: return myfpu.f_fpstatus.fpu_fr[28];
+        case SparcTraceChild::F58: return myfpu.f_fpstatus.fpu_fr[29];
+        case SparcTraceChild::F60: return myfpu.f_fpstatus.fpu_fr[30];
+        case SparcTraceChild::F62: return myfpu.f_fpstatus.fpu_fr[31];
         //Miscelaneous
         case SparcTraceChild::FSR: return myfpu.f_fpstatus.Fpu_fsr;
         case SparcTraceChild::FPRS: return myregs.r_fprs;
@@ -188,46 +188,110 @@ bool SparcTraceChild::step()
     //being breakpointed should be word (64bit) aligned, and that both the
     //next instruction and the instruction after that need to be breakpointed
     //so that annulled branches will still stop as well.
+
+    /*
+     * Useful constants
+     */
     const static uint64_t breakInst = 0x91d02001;
     const static uint64_t breakWord = breakInst | (breakInst << 32);
-    const static uint64_t lowMask = (uint64_t)(0xFFFFFFFF);
+    const static uint64_t lowMask = 0xFFFFFFFFULL;
     const static uint64_t highMask = lowMask << 32;
+
+    /*
+     * storage for the original contents of the child process's memory
+     */
     uint64_t originalInst, originalAnnulInst;
+
+    /*
+     * Get information about where the process is and is headed next.
+     */
+    uint64_t currentPC = getRegVal(PC);
+    bool unalignedPC = currentPC & 7;
+    uint64_t alignedPC = currentPC & (~7);
     uint64_t nextPC = getRegVal(NPC);
-    bool unaligned = nextPC & 7;
-    uint64_t alignedPC = nextPC & (~7);
-    originalInst = ptrace(PTRACE_PEEKTEXT, pid, alignedPC, 0);
-    if(unaligned)
+    bool unalignedNPC = nextPC & 7;
+    uint64_t alignedNPC = nextPC & (~7);
+
+    /*
+     * Store the original contents of the child process's memory
+     */
+    originalInst = ptrace(PTRACE_PEEKTEXT, pid, alignedNPC, 0);
+    //Save a ptrace call if we can
+    if(unalignedNPC)
     {
-        originalAnnulInst = ptrace(PTRACE_PEEKTEXT, pid, alignedPC+8, 0);
+        originalAnnulInst = ptrace(PTRACE_PEEKTEXT, pid, alignedNPC+8, 0);
     }
-    uint64_t newInst;
-    if(unaligned)
+
+    /*
+     * Prepare breakpointed copies of child processes memory
+     */
+    uint64_t newInst, newAnnulInst;
+    //If the current instruction is in the same word as the npc
+    if(alignedPC == alignedNPC)
     {
-        newInst = (originalInst & highMask) | (breakInst << 0);
-        if(ptrace(PTRACE_POKETEXT, pid, alignedPC, newInst) != 0)
-            cerr << "Poke failed" << endl;
-        newInst = (originalAnnulInst & lowMask) | (breakInst << 32);
-        if(ptrace(PTRACE_POKETEXT, pid, alignedPC+8, newInst) != 0)
-            cerr << "Poke failed" << endl;
+        //Make sure we only replace the other part
+        if(unalignedPC)
+            newInst = (originalInst & lowMask) | (breakWord & highMask);
+        else
+            newInst = (originalInst & highMask) | (breakWord & lowMask);
+    }
+    else
+    {
+        //otherwise replace the whole thing
+        newInst = breakWord;
+    }
+    //If the current instruction is in the same word as the word after
+    //the npc
+    if(alignedPC == alignedNPC+8)
+    {
+        //Make sure we only replace the other part
+        if(unalignedPC)
+            newAnnulInst = (originalAnnulInst & lowMask) | (breakWord & highMask);
+        else
+            newAnnulInst = (originalAnnulInst & highMask) | (breakWord & lowMask);
     }
     else
     {
-        if(ptrace(PTRACE_POKETEXT, pid, alignedPC, breakWord) != 0)
+        //otherwise replace the whole thing
+        newAnnulInst = breakWord;
+    }
+
+    /*
+     * Stuff the breakpoint instructions into the child's address space.
+     */
+    //Replace the word at npc
+    if(ptrace(PTRACE_POKETEXT, pid, alignedNPC, newInst) != 0)
+        cerr << "Poke failed" << endl;
+    //Replace the next word, if necessary
+    if(unalignedNPC)
+    {
+        if(ptrace(PTRACE_POKETEXT, pid, alignedNPC+8, newAnnulInst) != 0)
             cerr << "Poke failed" << endl;
     }
+
+    /*
+     * Restart the child process
+     */
     //Note that the "addr" parameter is supposed to be ignored, but in at
     //least one version of the kernel, it must be 1 or it will set what
     //pc to continue from
-    if(ptrace(PTRACE_CONT, pid, /*nextPC - 4*/ 1, 0) != 0)
+    if(ptrace(PTRACE_CONT, pid, 1, 0) != 0)
         cerr << "Cont failed" << endl;
     doWait();
+
+    /*
+     * Update our record of the child's state
+     */
     update(pid);
-    if(ptrace(PTRACE_POKETEXT, pid, alignedPC, originalInst) != 0)
+
+    /*
+     * Put back the original contents of the childs address space
+     */
+    if(ptrace(PTRACE_POKETEXT, pid, alignedNPC, originalInst) != 0)
         cerr << "Repoke failed" << endl;
-    if(unaligned)
+    if(unalignedNPC)
     {
-        if(ptrace(PTRACE_POKETEXT, pid, alignedPC+8, originalAnnulInst) != 0)
+        if(ptrace(PTRACE_POKETEXT, pid, alignedNPC+8, originalAnnulInst) != 0)
             cerr << "Repoke failed" << endl;
     }
     return true;
diff --git a/util/statetrace/arch/tracechild_sparc.hh b/util/statetrace/arch/tracechild_sparc.hh
index d177d5941..80770211a 100644
--- a/util/statetrace/arch/tracechild_sparc.hh
+++ b/util/statetrace/arch/tracechild_sparc.hh
@@ -57,10 +57,10 @@ public:
                 //Input registers
                 I0, I1, I2, I3, I4, I5, I6, I7,
                 //Floating point
-                F0, F1, F2, F3, F4, F5, F6, F7,
-                F8, F9, F10, F11, F12, F13, F14, F15,
-                F16, F17, F18, F19, F20, F21, F22, F23,
-                F24, F25, F26, F27, F28, F29, F30, F31,
+                F0, F2, F4, F6, F8, F10, F12, F14,
+                F16, F18, F20, F22, F24, F26, F28, F30,
+                F32, F34, F36, F38, F40, F42, F44, F46,
+                F48, F50, F52, F54, F56, F58, F60, F62,
                 //Miscelaneous
                 FSR, FPRS, PC, NPC, Y, CWP, PSTATE, ASI, CCR,
                 numregs
diff --git a/util/tracediff b/util/tracediff
index f2377a999..b25efe9b2 100755
--- a/util/tracediff
+++ b/util/tracediff
@@ -33,23 +33,63 @@
 # ******Note that you need to enable some trace flags in the args in order
 # to do anything useful!******
 #
-# If you want to pass different arguments to the two instances of m5,
-# you can embed them in the simulator arguments like this:
+# Script arguments are handled uniformly as follows:
+# - If the argument does not contain a '|' character, it is appended
+#   to both command lines.
+# - If the argument has a '|' character in it, the text on either side
+#   of the '|' is appended to the respective command lines.  Note that
+#   you'll have to quote the arg or escape the '|' with a backslash
+#   so that the shell doesn't think you're doing a pipe.
 #
-# % tracediff "m5.opt --option1" "m5.opt --option2" [common args]
+# In other words, the arguments should look like the command line you
+# want to run, with "|" used to list the alternatives for the parts
+# that you want to differ between the two runs.
+#
+# For example:
+#
+# % tracediff m5.opt --opt1 "--opt2|--opt3" --opt4
+# would compare these two runs:
+# m5.opt --opt1 --opt2 --opt4
+# m5.opt --opt1 --opt3 --opt4
+#
+# If you want to compare two different simulator binaries, put a '|'
+# in the first script argument ("path1/m5.opt|path2/m5.opt").  If you
+# want to add arguments to one run only, just put a '|' in with text
+# only on one side ("--onlyOn1|").  You can do this with multiple
+# arguments together too ("|-a -b -c" adds three args to the second
+# run only).
 #
 
 if (@ARGV < 2) {
-    die "Usage: tracediff sim1 sim2 [--root.trace.flags=X args...]\n";
+    die "Usage: tracediff \"sim1|sim2\" [common-arg \"arg1|arg2\" ...]\n";
+}
+
+foreach $arg (@ARGV) {
+    @pair = split('\|', $arg, -1); # -1 enables null trailing fields
+    if ($#pair > 0) {
+	push @cmd1, $pair[0];
+	push @cmd2, $pair[1];
+    } else {
+	push @cmd1, $arg;
+	push @cmd2, $arg;
+    }
 }
 
 # First two args are the two simulator binaries to compare
-$sim1 = shift;
-$sim2 = shift;
+$sim1 = shift @cmd1;
+$sim2 = shift @cmd2;
+
+# Everything else is a simulator arg.
+$args1 = join(' ', @cmd1);
+$args2 = join(' ', @cmd2);
 
-# Everything else on the command line is taken to be an m5 argument to
-# be given to both invocations
-$simargs = '"' . join('" "', @ARGV) . '"';
+# Common mistake: if you don't set any traceflags this often isn't
+# doing what you want.
+if ($args1 !~ /--trace-flags/) {
+    print "****\n";
+    print "**** WARNING: no trace flags set... you may not be diffing much!\n";
+    print "****\n";
+}
 
 # Run individual invocations in separate dirs so output and intermediate
 # files (particularly config.py and config.ini) don't conflict.
@@ -58,8 +98,8 @@ $dir2 = "tracediff-$$-2";
 mkdir($dir1) or die "Can't create dir $dir1\n";
 mkdir($dir2) or die "Can't create dir $dir2\n";
 
-$cmd1 = "$sim1 -d $dir1 $simargs 2>&1 |";
-$cmd2 = "$sim2 -d $dir2 $simargs 2>&1 |";
+$cmd1 = "$sim1 -d $dir1 $args1 2>&1 |";
+$cmd2 = "$sim2 -d $dir2 $args2 2>&1 |";
 
 # This only works if you have rundiff in your path.  I just edit it
 # with an explicit path if necessary.