Merge m5.eecs.umich.edu:/bk/newmem

into ewok.(none):/home/gblack/m5/newmem --HG-- extra : convert_revision : 516c357f98c7a571c70362babd3fa162fbc2ed5a
author: Gabe Black <gblack@eecs.umich.edu> 2006-07-18 18:23:23 -0400
committer: Gabe Black <gblack@eecs.umich.edu> 2006-07-18 18:23:23 -0400
commit: 44974a4462e019cfc5c65d20ad620faa9bc7f8cf (patch)
tree: 94f25a8a565021f97cbf6f28a37accdf157bbafc
parent: 15a8f050605919579e81b6abb98a0b596334216d (diff)
parent: fe9e851e8c0a52ee412350036c94cc61c9b8dc04 (diff)
download: gem5-44974a4462e019cfc5c65d20ad620faa9bc7f8cf.tar.xz
290 files changed, 22055 insertions, 5773 deletions
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 000000000..ec3de7bb2
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,42 @@
+Steven K. Reinhardt
+-----------------------
+
+Nathan L. Binkert
+-----------------------
+
+Erik G. Hallnor
+-----------------------
+
+Steve E. Raasch
+-----------------------
+
+Lisa R. Hsu
+-----------------------
+
+Ali G. Saidi
+-----------------------
+
+Andrew L. Schultz
+-----------------------
+
+Kevin T. Lim
+-----------------------
+
+Ronald G. Dreslinski Jr
+-----------------------
+
+Gabriel Black
+-----------------------
+
+Korey L. Sewell
+-----------------------
+
+David Green
+-----------------------
+
+Benjamin S. Nash
+-----------------------
+
+Miguel J. Serrano
+-----------------------
+
diff --git a/README b/README
index 62214d8c5..7bcbe9523 100644
--- a/README
+++ b/README
@@ -1,21 +1,27 @@
-This is release m5_1.1 of the M5 simulator.
+This is release 2.0 of the M5 simulator.
 
-This file contains brief "getting started" instructions.  For more
-information, see http://m5.eecs.umich.edu.  If you have questions,
-please send mail to m5sim-users@lists.sourceforge.net.
+For information about building the simulator and getting started please refer
+to: http://m5.eecs.umich.edu/
+
+Specific Pages of Interest are:
+http://m5.eecs.umich.edu/wiki/index.php/Compiling_M5
+http://m5.eecs.umich.edu/wiki/index.php/Running_M5
+
+If you have questions, please send mail to m5sim-users@lists.sourceforge.net.
 
 WHAT'S INCLUDED (AND NOT)
 -------------------------
 
 The basic source release includes these subdirectories:
- - m5: the simulator itself
- - m5-test: regression tests
- - ext: less-common external packages needed to build m5
- - alpha-system: source for Alpha console and PALcode
+ - m5: 
+   - src: source code of the m5 simulator
+   - test: regression tests
+   - ext: less-common external packages needed to build m5
+   - system/alpha: source for Alpha console and PALcode
 
 To run full-system simulations, you will need compiled console,
 PALcode, and kernel binaries and one or more disk images.  These files
-are collected in a separate archive, m5_system_1.1.tar.bz2.  This file
+are collected in a separate archive, m5_system_2.0.tar.bz2.  This file
 is included on the CD release, or you can download it separately from
 Sourceforge.
 
@@ -31,66 +37,8 @@ set of Linux source patches (linux_m5-2.6.8.1.diff), and the scons
 program needed to build M5.  If you do not have the CD, the same HTML
 documentation is available online at http://m5.eecs.umich.edu/docs,
 the Linux source patches are available at
-http://m5.eecs.umich.edu/dist/linux_m5-2.6.8.1.diff, and the scons
-program is available from http://www.scons.org.
-
-WHAT'S NEEDED
--------------
-- GCC version 3.3 or newer
-- Python 2.3 or newer
-- SCons 0.96.1 or newer (see http://www.scons.org)
-
-WHAT'S RECOMMENDED
-------------------
-- MySQL (for statistics complex statistics storage/retrieval)
-- Python-MysqlDB (for statistics analysis) 
-
-GETTING STARTED
----------------
-
-There are two different build targets and three optimizations levels:
-
-Target:
--------
-ALPHA_SE - Syscall emulation simulation
-ALPHA_FS - Full system simulation
-
-Optimization:
--------------
-m5.debug - debug version of the code with tracing and without optimization
-m5.opt   - optimized version of code with tracing
-m5.fast  - optimized version of the code without tracing and asserts
-
-Different targets are built in different subdirectories of m5/build.
-Binaries with the same target but different optimization levels share
-the same directory.  Note that you can build m5 in any directory you
-choose;p just configure the target directory using the 'mkbuilddir'
-script in m5/build.
-
-The following steps will build and test the simulator.  The variable
-"$top" refers to the top directory where you've unpacked the files,
-i.e., the one containing the m5, m5-test, and ext directories.  If you
-have a multiprocessor system, you should give scons a "-j N" argument (like
-make) to run N jobs in parallel.
-
-To build and test the syscall-emulation simulator:
-
-	cd $top/m5/build
-	scons ALPHA_SE/test/opt/quick
-
-This process takes under 10 minutes on a dual 3GHz Xeon system (using
-the '-j 4' option).
-
-To build and test the full-system simulator:
-
-1. Unpack the full-system binaries from m5_system_1.1.tar.bz2.  (See
-   above for directions on obtaining this file if you don't have it.)
-   This package includes disk images and kernel, palcode, and console
-   binaries for Linux and FreeBSD.
-2. Edit the SYSTEMDIR search path in $top/m5-test/SysPaths.py to
-   include the path to your local copy of the binaries.
-3. In $top/m5/build, run "scons ALPHA_FS/test/opt/quick".
+http://m5.eecs.umich.edu/dist/linux_m5-2.6.8.1.diff, the scons
+program is available from http://www.scons.org, and swig is available from
+http://www.swig.org.
 
-This process also takes under 10 minutes on a dual 3GHz Xeon system
-(again using the '-j 4' option).
 
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 983c9b2e9..6eb9b1844 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,3 +1,11 @@
+XXX. X, 2006: m5_2.0
+--------------------
+Major update to M5 including:
+- New CPU model
+- Sew memory system
+- More extensive python integration
+- Preliminary syscall emulation support for MIPS and SPARC
+
 Oct. 8, 2005: m5_1.1
 --------------------
 Update release for IOSCA workshop mini-tutorial.  New features include:
diff --git a/SConstruct b/SConstruct
index 0cf15b1f9..b18fe66d3 100644
--- a/SConstruct
+++ b/SConstruct
@@ -39,17 +39,20 @@
 #
 # You can build M5 in a different directory as long as there is a
 # 'build/<CONFIG>' somewhere along the target path.  The build system
-# expdects that all configs under the same build directory are being
+# expects that all configs under the same build directory are being
 # built for the same host system.
 #
 # Examples:
-#   These two commands are equivalent.  The '-u' option tells scons to
-#   search up the directory tree for this SConstruct file.
+#
+#   The following two commands are equivalent.  The '-u' option tells
+#   scons to search up the directory tree for this SConstruct file.
 #   % cd <path-to-src>/m5 ; scons build/ALPHA_FS/m5.debug
 #   % cd <path-to-src>/m5/build/ALPHA_FS; scons -u m5.debug
-#   These two commands are equivalent and demonstrate building in a
-#   directory outside of the source tree.  The '-C' option tells scons
-#   to chdir to the specified directory to find this SConstruct file.
+#
+#   The following two commands are equivalent and demonstrate building
+#   in a directory outside of the source tree.  The '-C' option tells
+#   scons to chdir to the specified directory to find this SConstruct
+#   file.
 #   % cd <path-to-src>/m5 ; scons /local/foo/build/ALPHA_FS/m5.debug
 #   % cd /local/foo/build/ALPHA_FS; scons -C <path-to-src>/m5 m5.debug
 #
@@ -156,7 +159,7 @@ env = Environment(ENV = os.environ,  # inherit user's environment vars
                   ROOT = ROOT,
                   SRCDIR = SRCDIR)
 
-env.SConsignFile("sconsign")
+env.SConsignFile(os.path.join(build_root,"sconsign"))
 
 # Default duplicate option is to use hard links, but this messes up
 # when you use emacs to edit a file in the target dir, as emacs moves
@@ -260,8 +263,8 @@ env['ALL_ISA_LIST'] = ['alpha', 'sparc', 'mips']
 
 # Define the universe of supported CPU models
 env['ALL_CPU_LIST'] = ['AtomicSimpleCPU', 'TimingSimpleCPU',
-                       'FullCPU', 'AlphaFullCPU',
-                       'OzoneSimpleCPU', 'OzoneCPU', 'CheckerCPU']
+                       'FullCPU', 'O3CPU',
+                       'OzoneCPU']
 
 # Sticky options get saved in the options file so they persist from
 # one invocation to the next (unless overridden, in which case the new
@@ -289,6 +292,7 @@ sticky_opts.AddOptions(
                False),
     BoolOption('USE_MYSQL', 'Use MySQL for stats output', have_mysql),
     BoolOption('USE_FENV', 'Use <fenv.h> IEEE mode control', have_fenv),
+    BoolOption('USE_CHECKER', 'Use checker for detailed CPU models', False),
     ('CC', 'C compiler', os.environ.get('CC', env['CC'])),
     ('CXX', 'C++ compiler', os.environ.get('CXX', env['CXX'])),
     BoolOption('BATCH', 'Use batch pool for build and tests', False),
@@ -301,9 +305,10 @@ nonsticky_opts.AddOptions(
     BoolOption('update_ref', 'Update test reference outputs', False)
     )
 
-# These options get exported to #defines in config/*.hh (see m5/SConscript).
+# These options get exported to #defines in config/*.hh (see src/SConscript).
 env.ExportOptions = ['FULL_SYSTEM', 'ALPHA_TLASER', 'USE_FENV', \
-                     'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP']
+                     'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP', \
+                     'USE_CHECKER']
 
 # Define a handy 'no-op' action
 def no_action(target, source, env):
@@ -482,7 +487,7 @@ for build_path in build_paths:
     if env['USE_SSE2']:
         env.Append(CCFLAGS='-msse2')
 
-    # The m5/SConscript file sets up the build rules in 'env' according
+    # The src/SConscript file sets up the build rules in 'env' according
     # to the configured options.  It returns a list of environments,
     # one for each variant build (debug, opt, etc.)
     envList = SConscript('src/SConscript', build_dir = build_path,
diff --git a/configs/test/SysPaths.py b/configs/test/SysPaths.py
index c7c7db4e7..3f96a546f 100644
--- a/configs/test/SysPaths.py
+++ b/configs/test/SysPaths.py
@@ -1,32 +1,40 @@
-from m5 import *
-
-import os.path
-import sys
-
-# Edit the following list to include the possible paths to the binary
-# and disk image directories.  The first directory on the list that
-# exists will be selected.
-SYSTEMDIR_PATH = ['/n/poolfs/z/dist/m5/system']
-
-SYSTEMDIR = None
-for d in SYSTEMDIR_PATH:
-    if os.path.exists(d):
-        SYSTEMDIR = d
-        break
-
-if not SYSTEMDIR:
-    print >>sys.stderr, "Can't find a path to system files."
-    sys.exit(1)
-
-BINDIR = SYSTEMDIR + '/binaries'
-DISKDIR = SYSTEMDIR + '/disks'
+import os, sys
+from os.path import isdir, join as joinpath
+from os import environ as env
 
 def disk(file):
-    return os.path.join(DISKDIR, file)
+    system()
+    return joinpath(disk.dir, file)
 
 def binary(file):
-    return os.path.join(BINDIR, file)
+    system()
+    return joinpath(binary.dir, file)
 
 def script(file):
-    return os.path.join(SYSTEMDIR, 'boot', file)
-
+    system()
+    return joinpath(script.dir, file)
+
+def system():
+    if not system.dir:
+        try:
+                path = env['M5_PATH'].split(':')
+        except KeyError:
+                path = [ '/dist/m5/system', '/n/poolfs/z/dist/m5/system' ]
+
+        for system.dir in path:
+            if os.path.isdir(system.dir):
+                break
+        else:
+            raise ImportError, "Can't find a path to system files."
+
+    if not binary.dir:
+        binary.dir = joinpath(system.dir, 'binaries')
+    if not disk.dir:
+        disk.dir = joinpath(system.dir, 'disks')
+    if not script.dir:
+        script.dir = joinpath(system.dir, 'boot')
+
+system.dir = None
+binary.dir = None
+disk.dir = None
+script.dir = None
diff --git a/configs/test/fs.py b/configs/test/fs.py
index 55e7003a4..41c3f8cc0 100644
--- a/configs/test/fs.py
+++ b/configs/test/fs.py
@@ -1,9 +1,10 @@
+import optparse, os, sys
+
 import m5
 from m5.objects import *
-import os
 from SysPaths import *
 
-parser = optparse.OptionParser(option_list=m5.standardOptions)
+parser = optparse.OptionParser()
 
 parser.add_option("-t", "--timing", action="store_true")
 
@@ -16,6 +17,8 @@ if args:
 # Base for tests is directory containing this file.
 test_base = os.path.dirname(__file__)
 
+script.dir =  '/z/saidi/work/m5.newmem/configs/boot'
+
 linux_image = env.get('LINUX_IMAGE', disk('linux-latest.img'))
 
 class IdeControllerPciData(PciConfigData):
@@ -97,7 +100,7 @@ class SpecwebFilesetDisk(IdeDisk):
 class BaseTsunami(Tsunami):
     cchip = TsunamiCChip(pio_addr=0x801a0000000)
     pchip = TsunamiPChip(pio_addr=0x80180000000)
-    pciconfig = PciConfigAll(pio_addr=0x801fe000000)
+    pciconfig = PciConfigAll()
     fake_sm_chip = IsaFake(pio_addr=0x801fc000370)
 
     fake_uart1 = IsaFake(pio_addr=0x801fc0002f8)
@@ -129,17 +132,7 @@ class BaseTsunami(Tsunami):
     ethernet = NSGigE(configdata=NSGigEPciData(),
                       pci_bus=0, pci_dev=1, pci_func=0)
     etherint = NSGigEInt(device=Parent.ethernet)
-#    ethernet = Sinic(configdata=SinicPciData(),
-#                      pci_bus=0, pci_dev=1, pci_func=0)
-#    etherint = SinicInt(device=Parent.ethernet)
     console = AlphaConsole(pio_addr=0x80200000000, disk=Parent.simple_disk)
-#    bridge = PciFake(configdata=BridgePciData(), pci_bus=0, pci_dev=2, pci_func=0)
-
-#class FreeBSDTsunami(BaseTsunami):
-#    disk0 = FreeBSDRootDisk(delay='0us', driveID='master')
-#    ide = IdeController(disks=[Parent.disk0],
-#                        configdata=IdeControllerPciData(),
-#                        pci_func=0, pci_dev=0, pci_bus=0)
 
 class LinuxTsunami(BaseTsunami):
     disk0 = LinuxRootDisk(driveID='master')
@@ -149,56 +142,62 @@ class LinuxTsunami(BaseTsunami):
                         configdata=IdeControllerPciData(),
                         pci_func=0, pci_dev=0, pci_bus=0)
 
-class LinuxAlphaSystem(LinuxAlphaSystem):
+class MyLinuxAlphaSystem(LinuxAlphaSystem):
     magicbus = Bus(bus_id=0)
     magicbus2 = Bus(bus_id=1)
     bridge = Bridge()
     physmem = PhysicalMemory(range = AddrRange('128MB'))
-    c0a = Connector(side_a=Parent.magicbus, side_b=Parent.bridge, side_b_name="side_a")
-    c0b = Connector(side_a=Parent.magicbus2, side_b=Parent.bridge, side_b_name="side_b")
-    c1 = Connector(side_a=Parent.physmem, side_b=Parent.magicbus2)
+    bridge.side_a = magicbus.port
+    bridge.side_b = magicbus2.port
+    physmem.port = magicbus2.port
     tsunami = LinuxTsunami()
-    c2 = Connector(side_a=Parent.tsunami.cchip, side_a_name='pio', side_b=Parent.magicbus)
-    c3 = Connector(side_a=Parent.tsunami.pchip, side_a_name='pio', side_b=Parent.magicbus)
-    c4 = Connector(side_a=Parent.tsunami.pciconfig, side_a_name='pio', side_b=Parent.magicbus)
-    c5 = Connector(side_a=Parent.tsunami.fake_sm_chip, side_a_name='pio', side_b=Parent.magicbus)
-    c6 = Connector(side_a=Parent.tsunami.ethernet, side_a_name='pio', side_b=Parent.magicbus)
-    c6a = Connector(side_a=Parent.tsunami.ethernet, side_a_name='dma', side_b=Parent.magicbus)
-    c7 = Connector(side_a=Parent.tsunami.fake_uart1, side_a_name='pio', side_b=Parent.magicbus)
-    c8 = Connector(side_a=Parent.tsunami.fake_uart2, side_a_name='pio', side_b=Parent.magicbus)
-    c9 = Connector(side_a=Parent.tsunami.fake_uart3, side_a_name='pio', side_b=Parent.magicbus)
-    c10 = Connector(side_a=Parent.tsunami.fake_uart4, side_a_name='pio', side_b=Parent.magicbus)
-    c11 = Connector(side_a=Parent.tsunami.ide, side_a_name='pio', side_b=Parent.magicbus)
-    c13 = Connector(side_a=Parent.tsunami.ide, side_a_name='dma', side_b=Parent.magicbus)
-    c12 = Connector(side_a=Parent.tsunami.fake_ppc, side_a_name='pio', side_b=Parent.magicbus)
-    c14 = Connector(side_a=Parent.tsunami.fake_OROM, side_a_name='pio', side_b=Parent.magicbus)
-    c16 = Connector(side_a=Parent.tsunami.fake_pnp_addr, side_a_name='pio', side_b=Parent.magicbus)
-    c17 = Connector(side_a=Parent.tsunami.fake_pnp_write, side_a_name='pio', side_b=Parent.magicbus)
-    c18 = Connector(side_a=Parent.tsunami.fake_pnp_read0, side_a_name='pio', side_b=Parent.magicbus)
-    c19 = Connector(side_a=Parent.tsunami.fake_pnp_read1, side_a_name='pio', side_b=Parent.magicbus)
-    c20 = Connector(side_a=Parent.tsunami.fake_pnp_read2, side_a_name='pio', side_b=Parent.magicbus)
-    c21 = Connector(side_a=Parent.tsunami.fake_pnp_read3, side_a_name='pio', side_b=Parent.magicbus)
-    c22 = Connector(side_a=Parent.tsunami.fake_pnp_read4, side_a_name='pio', side_b=Parent.magicbus)
-    c23 = Connector(side_a=Parent.tsunami.fake_pnp_read5, side_a_name='pio', side_b=Parent.magicbus)
-    c24 = Connector(side_a=Parent.tsunami.fake_pnp_read6, side_a_name='pio', side_b=Parent.magicbus)
-    c25 = Connector(side_a=Parent.tsunami.fake_pnp_read7, side_a_name='pio', side_b=Parent.magicbus)
-    c27 = Connector(side_a=Parent.tsunami.fake_ata0, side_a_name='pio', side_b=Parent.magicbus)
-    c28 = Connector(side_a=Parent.tsunami.fake_ata1, side_a_name='pio', side_b=Parent.magicbus)
-    c30 = Connector(side_a=Parent.tsunami.fb, side_a_name='pio', side_b=Parent.magicbus)
-    c31 = Connector(side_a=Parent.tsunami.io, side_a_name='pio', side_b=Parent.magicbus)
-    c32 = Connector(side_a=Parent.tsunami.uart, side_a_name='pio', side_b=Parent.magicbus)
-    c33 = Connector(side_a=Parent.tsunami.console, side_a_name='pio', side_b=Parent.magicbus)
+    tsunami.cchip.pio = magicbus.port
+    tsunami.pchip.pio = magicbus.port
+    tsunami.pciconfig.pio = magicbus.default
+    tsunami.fake_sm_chip.pio = magicbus.port
+    tsunami.ethernet.pio = magicbus.port
+    tsunami.ethernet.dma = magicbus.port
+    tsunami.ethernet.config = magicbus.port
+    tsunami.fake_uart1.pio = magicbus.port
+    tsunami.fake_uart2.pio = magicbus.port
+    tsunami.fake_uart3.pio = magicbus.port
+    tsunami.fake_uart4.pio = magicbus.port
+    tsunami.ide.pio = magicbus.port
+    tsunami.ide.dma = magicbus.port
+    tsunami.ide.config = magicbus.port
+    tsunami.fake_ppc.pio = magicbus.port
+    tsunami.fake_OROM.pio = magicbus.port
+    tsunami.fake_pnp_addr.pio = magicbus.port
+    tsunami.fake_pnp_write.pio = magicbus.port
+    tsunami.fake_pnp_read0.pio = magicbus.port
+    tsunami.fake_pnp_read1.pio = magicbus.port
+    tsunami.fake_pnp_read2.pio = magicbus.port
+    tsunami.fake_pnp_read3.pio = magicbus.port
+    tsunami.fake_pnp_read4.pio = magicbus.port
+    tsunami.fake_pnp_read5.pio = magicbus.port
+    tsunami.fake_pnp_read6.pio = magicbus.port
+    tsunami.fake_pnp_read7.pio = magicbus.port
+    tsunami.fake_ata0.pio = magicbus.port
+    tsunami.fake_ata1.pio = magicbus.port
+    tsunami.fb.pio = magicbus.port
+    tsunami.io.pio = magicbus.port
+    tsunami.uart.pio = magicbus.port
+    tsunami.console.pio = magicbus.port
     raw_image = RawDiskImage(image_file=disk('linux-latest.img'),
                              read_only=True)
     simple_disk = SimpleDisk(disk=Parent.raw_image)
     intrctrl = IntrControl()
     if options.timing:
         cpu = TimingSimpleCPU()
+        mem_mode = 'timing'
     else:
         cpu = AtomicSimpleCPU()
-    cpu.mem = Parent.magicbus2
+    cpu.mem = magicbus2
+    cpu.icache_port = magicbus2.port
+    cpu.dcache_port = magicbus2.port
     cpu.itb = AlphaITB()
     cpu.dtb = AlphaDTB()
+    cpu.clock = '2GHz'
     sim_console = SimConsole(listener=ConsoleListener(port=3456))
     kernel = binary('vmlinux')
     pal = binary('ts_osfpal')
@@ -221,14 +220,23 @@ def DualRoot(clientSystem, serverSystem):
     self.etherlink = EtherLink(int1 = Parent.client.tsunami.etherint[0],
                                int2 = Parent.server.tsunami.etherint[0],
                                dump = Parent.etherdump)
-    self.clock = '5GHz'
+    self.clock = '1THz'
     return self
 
-root = DualRoot(LinuxAlphaSystem(readfile=script('netperf-stream-nt-client.rcS')),
-                LinuxAlphaSystem(readfile=script('netperf-server.rcS')))
+root = DualRoot(
+    MyLinuxAlphaSystem(readfile=script('netperf-stream-nt-client.rcS')),
+    MyLinuxAlphaSystem(readfile=script('netperf-server.rcS')))
 
 m5.instantiate(root)
 
+#exit_event = m5.simulate(2600000000000)
+#if exit_event.getCause() != "user interrupt received":
+#    m5.checkpoint(root, 'cpt')
+#    exit_event = m5.simulate(300000000000)
+#    if exit_event.getCause() != "user interrupt received":
+#        m5.checkpoint(root, 'cptA')
+
+
 exit_event = m5.simulate()
 
-print 'Exiting @', m5.curTick(), 'because', exit_event.getCause()
+print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause()
diff --git a/configs/test/test.py b/configs/test/test.py
index 8c5b06e6a..feb44e2d1 100644
--- a/configs/test/test.py
+++ b/configs/test/test.py
@@ -4,14 +4,27 @@
 # MIPS: "m5 test.py -a Mips -c hello_mips"
 
 import os, optparse, sys
+
 import m5
 from m5.objects import *
+from FullO3Config import *
 
 # parse command-line arguments
-parser = optparse.OptionParser(option_list=m5.standardOptions)
+parser = optparse.OptionParser()
 
-parser.add_option("-c", "--cmd", default="hello")
-parser.add_option("-t", "--timing", action="store_true")
+parser.add_option("-c", "--cmd", default="hello",
+        help="The binary to run in syscall emulation mode.")
+parser.add_option("-o", "--options", default="",
+        help="The options to pass to the binary, use \" \" around the entire\
+                string.")
+parser.add_option("-i", "--input", default="",
+        help="A file of input to give to the binary.")
+parser.add_option("-t", "--timing", action="store_true",
+        help="Use simple timing CPU.")
+parser.add_option("-d", "--detailed", action="store_true",
+        help="Use detailed CPU.")
+parser.add_option("-m", "--maxtick", type="int",
+        help="Set the maximum number of ticks to run  for")
 
 (options, args) = parser.parse_args()
 
@@ -24,27 +37,64 @@ this_dir = os.path.dirname(__file__)
 
 process = LiveProcess()
 process.executable = os.path.join(this_dir, options.cmd)
-process.cmd = options.cmd
+process.cmd = options.cmd + " " + options.options
+if options.input != "":
+    process.input = options.input
 
 magicbus = Bus()
 mem = PhysicalMemory()
 
+if options.timing and options.detailed:
+       print "Error: you may only specify one cpu model";
+       sys.exit(1)
+
 if options.timing:
     cpu = TimingSimpleCPU()
+elif options.detailed:
+    #check for SMT workload
+    workloads = options.cmd.split(';')
+    if len(workloads) > 1:
+        process = []
+        smt_idx = 0
+        inputs = []
+
+        if options.input != "":
+            inputs = options.input.split(';')
+
+        for wrkld in workloads:
+            smt_process = LiveProcess()
+            smt_process.executable = os.path.join(this_dir, wrkld)
+            smt_process.cmd = wrkld + " " + options.options
+            if inputs and inputs[smt_idx]:
+                smt_process.input = inputs[smt_idx]
+            process += [smt_process, ]
+            smt_idx += 1
+
+    cpu = DetailedO3CPU()
 else:
     cpu = AtomicSimpleCPU()
 cpu.workload = process
 cpu.mem = magicbus
+cpu.icache_port=magicbus.port
+cpu.dcache_port=magicbus.port
 
 system = System(physmem = mem, cpu = cpu)
-system.c1 =  Connector(side_a = mem, side_b = magicbus)
+
+if options.timing or options.detailed:
+    system.mem_mode = 'timing'
+
+
+mem.port = magicbus.port
 root = Root(system = system)
 
 # instantiate configuration
 m5.instantiate(root)
 
 # simulate until program terminates
-exit_event = m5.simulate()
+if options.maxtick:
+    exit_event = m5.simulate(options.maxtick)
+else:
+    exit_event = m5.simulate()
 
-print 'Exiting @', m5.curTick(), 'because', exit_event.getCause()
+print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()
 
diff --git a/src/SConscript b/src/SConscript
index a1c18711c..9825cafe7 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -62,7 +62,6 @@ base_sources = Split('''
 	base/range.cc
 	base/random.cc
 	base/sat_counter.cc
-        base/serializer.cc
 	base/socket.cc
 	base/statistics.cc
 	base/str.cc
@@ -85,33 +84,54 @@ base_sources = Split('''
 	cpu/base.cc
 	cpu/cpuevent.cc
 	cpu/exetrace.cc
+        cpu/func_unit.cc
         cpu/op_class.cc
 	cpu/pc_event.cc
         cpu/quiesce_event.cc
 	cpu/static_inst.cc
-        cpu/sampler/sampler.cc
         cpu/simple_thread.cc
         cpu/thread_state.cc
 
-        encumbered/cpu/full/fu_pool.cc
-        
         mem/bridge.cc
         mem/bus.cc
-        mem/connector.cc
         mem/mem_object.cc
         mem/packet.cc
         mem/physical.cc
         mem/port.cc
 
+        mem/cache/base_cache.cc
+        mem/cache/cache.cc
+        mem/cache/coherence/coherence_protocol.cc
+        mem/cache/coherence/uni_coherence.cc
+        mem/cache/miss/blocking_buffer.cc
+        mem/cache/miss/miss_queue.cc
+        mem/cache/miss/mshr.cc
+        mem/cache/miss/mshr_queue.cc
+        mem/cache/prefetch/base_prefetcher.cc
+        mem/cache/prefetch/ghb_prefetcher.cc
+        mem/cache/prefetch/prefetcher.cc
+        mem/cache/prefetch/stride_prefetcher.cc
+        mem/cache/prefetch/tagged_prefetcher.cc
+        mem/cache/tags/base_tags.cc
+        mem/cache/tags/cache_tags.cc
+        mem/cache/tags/fa_lru.cc
+        mem/cache/tags/iic.cc
+        mem/cache/tags/lru.cc
+        mem/cache/tags/repl/gen.cc
+        mem/cache/tags/repl/repl.cc
+        mem/cache/tags/split.cc
+        mem/cache/tags/split_lifo.cc
+        mem/cache/tags/split_lru.cc
+
+        mem/cache/cache_builder.cc
+
 	sim/builder.cc
-	sim/configfile.cc
 	sim/debug.cc
 	sim/eventq.cc
 	sim/faults.cc
 	sim/main.cc
-        python/swig/main_wrap.cc
+        python/swig/cc_main_wrap.cc
 	sim/param.cc
-	sim/profile.cc
 	sim/root.cc
 	sim/serialize.cc
 	sim/sim_events.cc
diff --git a/src/arch/SConscript b/src/arch/SConscript
index c90694a68..bc517341a 100644
--- a/src/arch/SConscript
+++ b/src/arch/SConscript
@@ -28,7 +28,7 @@
 #
 # Authors: Steve Reinhardt
 
-import os.path
+import os.path, sys
 
 # Import build environment variable from SConstruct.
 Import('env')
@@ -128,13 +128,19 @@ isa_desc_gen_files = Split('decoder.cc decoder.hh')
 isa_desc_gen_files += [CpuModel.dict[cpu].filename
                        for cpu in env['CPU_MODELS']]
 
+# Also include the CheckerCPU as one of the models if it is being
+# enabled via command line.
+if env['USE_CHECKER']:
+    isa_desc_gen_files += [CpuModel.dict['CheckerCPU'].filename]
+
 # The emitter patches up the sources & targets to include the
 # autogenerated files as targets and isa parser itself as a source.
 def isa_desc_emitter(target, source, env):
     return (isa_desc_gen_files, [isa_parser, cpu_models_file] + source)
 
 # Pieces are in place, so create the builder.
-isa_desc_builder = Builder(action='python2.4 $SOURCES $TARGET.dir $CPU_MODELS',
+python = sys.executable  # use same Python binary used to run scons
+isa_desc_builder = Builder(action=python + ' $SOURCES $TARGET.dir $CPU_MODELS',
                            emitter = isa_desc_emitter)
 
 env.Append(BUILDERS = { 'ISADesc' : isa_desc_builder })
diff --git a/src/arch/alpha/ev5.cc b/src/arch/alpha/ev5.cc
index 7d6894733..ae3b668ea 100644
--- a/src/arch/alpha/ev5.cc
+++ b/src/arch/alpha/ev5.cc
@@ -59,8 +59,12 @@ AlphaISA::initCPU(ThreadContext *tc, int cpuId)
     tc->setIntReg(16, cpuId);
     tc->setIntReg(0, cpuId);
 
-    tc->setPC(tc->readMiscReg(IPR_PAL_BASE) + (new ResetFault)->vect());
+    AlphaFault *reset = new ResetFault;
+
+    tc->setPC(tc->readMiscReg(IPR_PAL_BASE) + reset->vect());
     tc->setNextPC(tc->readPC() + sizeof(MachInst));
+
+    delete reset;
 }
 
 ////////////////////////////////////////////////////////////////////////
diff --git a/src/arch/alpha/faults.cc b/src/arch/alpha/faults.cc
index 8493223ff..eef4361fd 100644
--- a/src/arch/alpha/faults.cc
+++ b/src/arch/alpha/faults.cc
@@ -35,6 +35,9 @@
 #include "base/trace.hh"
 #if FULL_SYSTEM
 #include "arch/alpha/ev5.hh"
+#else
+#include "sim/process.hh"
+#include "mem/page_table.hh"
 #endif
 
 namespace AlphaISA
@@ -56,6 +59,12 @@ FaultName ArithmeticFault::_name = "arith";
 FaultVect ArithmeticFault::_vect = 0x0501;
 FaultStat ArithmeticFault::_count;
 
+#if !FULL_SYSTEM
+FaultName PageTableFault::_name = "page_table_fault";
+FaultVect PageTableFault::_vect = 0x0000;
+FaultStat PageTableFault::_count;
+#endif
+
 FaultName InterruptFault::_name = "interrupt";
 FaultVect InterruptFault::_vect = 0x0101;
 FaultStat InterruptFault::_count;
@@ -173,6 +182,30 @@ void ItbFault::invoke(ThreadContext * tc)
     AlphaFault::invoke(tc);
 }
 
+#else //!FULL_SYSTEM
+
+void PageTableFault::invoke(ThreadContext *tc)
+{
+    Process *p = tc->getProcessPtr();
+
+    // address is higher than the stack region or in the current stack region
+    if (vaddr > p->stack_base || vaddr > p->stack_min)
+        FaultBase::invoke(tc);
+
+    // We've accessed the next page
+    if (vaddr > p->stack_min - PageBytes) {
+        warn("Increasing stack %#x:%#x to %#x:%#x because of access to %#x",
+                p->stack_min, p->stack_base, p->stack_min - PageBytes,
+                p->stack_base, vaddr);
+        p->stack_min -= PageBytes;
+        if (p->stack_base - p->stack_min > 8*1024*1024)
+            fatal("Over max stack size for one thread\n");
+        p->pTable->allocate(p->stack_min, PageBytes);
+    } else {
+        FaultBase::invoke(tc);
+    }
+}
+
 #endif
 
 } // namespace AlphaISA
diff --git a/src/arch/alpha/faults.hh b/src/arch/alpha/faults.hh
index f952cf9d6..11a568174 100644
--- a/src/arch/alpha/faults.hh
+++ b/src/arch/alpha/faults.hh
@@ -81,6 +81,29 @@ class AlignmentFault : public AlphaFault
     bool isAlignmentFault() {return true;}
 };
 
+#if !FULL_SYSTEM
+class PageTableFault : public AlphaFault
+{
+  private:
+    Addr vaddr;
+    static FaultName _name;
+    static FaultVect _vect;
+    static FaultStat _count;
+  public:
+    PageTableFault(Addr va)
+        : vaddr(va) {}
+    FaultName name() {return _name;}
+    FaultVect vect() {return _vect;}
+    FaultStat & countStat() {return _count;}
+    void invoke(ThreadContext * tc);
+};
+
+static inline Fault genPageTableFault(Addr va)
+{
+    return new PageTableFault(va);
+}
+#endif
+
 static inline Fault genMachineCheckFault()
 {
     return new MachineCheckFault;
diff --git a/src/arch/alpha/freebsd/system.cc b/src/arch/alpha/freebsd/system.cc
index 7cf68e0db..8d50e1612 100644
--- a/src/arch/alpha/freebsd/system.cc
+++ b/src/arch/alpha/freebsd/system.cc
@@ -97,6 +97,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(FreebsdAlphaSystem)
 
     Param<Tick> boot_cpu_frequency;
     SimObjectParam<PhysicalMemory *> physmem;
+    SimpleEnumParam<System::MemoryMode> mem_mode;
 
     Param<string> kernel;
     Param<string> console;
@@ -115,6 +116,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(FreebsdAlphaSystem)
 
     INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"),
     INIT_PARAM(physmem, "phsyical memory"),
+    INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)",
+            System::MemoryModeStrings),
     INIT_PARAM(kernel, "file that contains the kernel code"),
     INIT_PARAM(console, "file that contains the console code"),
     INIT_PARAM(pal, "file that contains palcode"),
@@ -133,6 +136,7 @@ CREATE_SIM_OBJECT(FreebsdAlphaSystem)
     p->name = getInstanceName();
     p->boot_cpu_frequency = boot_cpu_frequency;
     p->physmem = physmem;
+    p->mem_mode = mem_mode;
     p->kernel_path = kernel;
     p->console_path = console;
     p->palcode = pal;
diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa
index fbdb119b6..f449d2d69 100644
--- a/src/arch/alpha/isa/decoder.isa
+++ b/src/arch/alpha/isa/decoder.isa
@@ -47,9 +47,11 @@ decode OPCODE default Unknown::unknown() {
         0x23: ldt({{ Fa = Mem.df; }});
         0x2a: ldl_l({{ Ra.sl = Mem.sl; }}, mem_flags = LOCKED);
         0x2b: ldq_l({{ Ra.uq = Mem.uq; }}, mem_flags = LOCKED);
+#ifdef USE_COPY
         0x20: MiscPrefetch::copy_load({{ EA = Ra; }},
                                       {{ fault = xc->copySrcTranslate(EA); }},
                                       inst_flags = [IsMemRef, IsLoad, IsCopy]);
+#endif
     }
 
     format LoadOrPrefetch {
@@ -69,9 +71,11 @@ decode OPCODE default Unknown::unknown() {
         0x0f: stq_u({{ Mem.uq = Ra.uq; }}, {{ EA = (Rb + disp) & ~7; }});
         0x26: sts({{ Mem.ul = t_to_s(Fa.uq); }});
         0x27: stt({{ Mem.df = Fa; }});
+#ifdef USE_COPY
         0x24: MiscPrefetch::copy_store({{ EA = Rb; }},
                                        {{ fault = xc->copy(EA); }},
                                        inst_flags = [IsMemRef, IsStore, IsCopy]);
+#endif
     }
 
     format StoreCond {
@@ -659,11 +663,11 @@ decode OPCODE default Unknown::unknown() {
             0xe000: rc({{
                 Ra = xc->readIntrFlag();
                 xc->setIntrFlag(0);
-            }}, IsNonSpeculative);
+            }}, IsNonSpeculative, IsUnverifiable);
             0xf000: rs({{
                 Ra = xc->readIntrFlag();
                 xc->setIntrFlag(1);
-            }}, IsNonSpeculative);
+            }}, IsNonSpeculative, IsUnverifiable);
         }
 #else
         format FailUnimpl {
@@ -701,7 +705,7 @@ decode OPCODE default Unknown::unknown() {
             }}, IsNonSpeculative);
             0x83: callsys({{
                 xc->syscall(R0);
-            }}, IsNonSpeculative);
+            }}, IsSerializeAfter, IsNonSpeculative);
             // Read uniq reg into ABI return value register (r0)
             0x9e: rduniq({{ R0 = Runiq; }}, IsIprAccess);
             // Write uniq reg with value from ABI arg register (r16)
diff --git a/src/arch/alpha/linux/system.cc b/src/arch/alpha/linux/system.cc
index bb35f046d..ef4e18cb5 100644
--- a/src/arch/alpha/linux/system.cc
+++ b/src/arch/alpha/linux/system.cc
@@ -150,9 +150,6 @@ LinuxAlphaSystem::~LinuxAlphaSystem()
     delete debugPrintkEvent;
     delete idleStartEvent;
     delete printThreadEvent;
-    delete intStartEvent;
-    delete intEndEvent;
-    delete intEndEvent2;
 }
 
 
@@ -194,6 +191,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(LinuxAlphaSystem)
 
     Param<Tick> boot_cpu_frequency;
     SimObjectParam<PhysicalMemory *> physmem;
+    SimpleEnumParam<System::MemoryMode> mem_mode;
 
     Param<string> kernel;
     Param<string> console;
@@ -212,6 +210,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(LinuxAlphaSystem)
 
     INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"),
     INIT_PARAM(physmem, "phsyical memory"),
+    INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)",
+            System::MemoryModeStrings),
     INIT_PARAM(kernel, "file that contains the kernel code"),
     INIT_PARAM(console, "file that contains the console code"),
     INIT_PARAM(pal, "file that contains palcode"),
@@ -230,6 +230,7 @@ CREATE_SIM_OBJECT(LinuxAlphaSystem)
     p->name = getInstanceName();
     p->boot_cpu_frequency = boot_cpu_frequency;
     p->physmem = physmem;
+    p->mem_mode = mem_mode;
     p->kernel_path = kernel;
     p->console_path = console;
     p->palcode = pal;
diff --git a/src/arch/alpha/regfile.hh b/src/arch/alpha/regfile.hh
index 1025412cd..9ecad6f42 100644
--- a/src/arch/alpha/regfile.hh
+++ b/src/arch/alpha/regfile.hh
@@ -112,6 +112,10 @@ namespace AlphaISA
             lock_flag = 0;
             lock_addr = 0;
         }
+
+        void serialize(std::ostream &os);
+
+        void unserialize(Checkpoint *cp, const std::string &section);
 #if FULL_SYSTEM
       protected:
         typedef uint64_t InternalProcReg;
diff --git a/src/arch/alpha/system.cc b/src/arch/alpha/system.cc
index 3aaba7d58..a7e615531 100644
--- a/src/arch/alpha/system.cc
+++ b/src/arch/alpha/system.cc
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Ali Saidi
+ *          Nathan Binkert
  */
 
 #include "arch/alpha/ev5.hh"
@@ -220,6 +221,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaSystem)
 
     Param<Tick> boot_cpu_frequency;
     SimObjectParam<PhysicalMemory *> physmem;
+    SimpleEnumParam<System::MemoryMode> mem_mode;
 
     Param<std::string> kernel;
     Param<std::string> console;
@@ -238,6 +240,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaSystem)
 
     INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"),
     INIT_PARAM(physmem, "phsyical memory"),
+    INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)",
+            System::MemoryModeStrings),
     INIT_PARAM(kernel, "file that contains the kernel code"),
     INIT_PARAM(console, "file that contains the console code"),
     INIT_PARAM(pal, "file that contains palcode"),
@@ -256,6 +260,7 @@ CREATE_SIM_OBJECT(AlphaSystem)
     p->name = getInstanceName();
     p->boot_cpu_frequency = boot_cpu_frequency;
     p->physmem = physmem;
+    p->mem_mode = mem_mode;
     p->kernel_path = kernel;
     p->console_path = console;
     p->palcode = pal;
diff --git a/src/arch/alpha/system.hh b/src/arch/alpha/system.hh
index b26a5e301..0f4f64581 100644
--- a/src/arch/alpha/system.hh
+++ b/src/arch/alpha/system.hh
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Ali Saidi
+ *          Nathan Binkert
  */
 
 #ifndef __ARCH_ALPHA_SYSTEM_HH__
diff --git a/src/arch/alpha/tru64/system.cc b/src/arch/alpha/tru64/system.cc
index 6c0edc1ee..3ef1e4d3c 100644
--- a/src/arch/alpha/tru64/system.cc
+++ b/src/arch/alpha/tru64/system.cc
@@ -95,6 +95,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(Tru64AlphaSystem)
 
     Param<Tick> boot_cpu_frequency;
     SimObjectParam<PhysicalMemory *> physmem;
+    SimpleEnumParam<System::MemoryMode> mem_mode;
 
     Param<string> kernel;
     Param<string> console;
@@ -113,6 +114,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(Tru64AlphaSystem)
 
     INIT_PARAM(boot_cpu_frequency, "frequency of the boot cpu"),
     INIT_PARAM(physmem, "phsyical memory"),
+    INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)",
+            System::MemoryModeStrings),
     INIT_PARAM(kernel, "file that contains the kernel code"),
     INIT_PARAM(console, "file that contains the console code"),
     INIT_PARAM(pal, "file that contains palcode"),
@@ -131,6 +134,7 @@ CREATE_SIM_OBJECT(Tru64AlphaSystem)
     p->name = getInstanceName();
     p->boot_cpu_frequency = boot_cpu_frequency;
     p->physmem = physmem;
+    p->mem_mode = mem_mode;
     p->kernel_path = kernel;
     p->console_path = console;
     p->palcode = pal;
diff --git a/src/arch/mips/SConscript b/src/arch/mips/SConscript
index 6295a6c11..8353bcde7 100644
--- a/src/arch/mips/SConscript
+++ b/src/arch/mips/SConscript
@@ -52,8 +52,7 @@ base_sources = Split('''
 
 # Full-system sources
 full_system_sources = Split('''
-	memory.cc
-	mips34k.cc
+	#Insert Full-System Files Here
 	''')
 
 # Syscall emulation (non-full-system) sources
diff --git a/src/arch/mips/faults.cc b/src/arch/mips/faults.cc
index 810c3fed4..cfeb045eb 100644
--- a/src/arch/mips/faults.cc
+++ b/src/arch/mips/faults.cc
@@ -32,6 +32,10 @@
 #include "cpu/thread_context.hh"
 #include "cpu/base.hh"
 #include "base/trace.hh"
+#if !FULL_SYSTEM
+#include "sim/process.hh"
+#include "mem/page_table.hh"
+#endif
 
 namespace MipsISA
 {
@@ -52,6 +56,12 @@ FaultName ArithmeticFault::_name = "arith";
 FaultVect ArithmeticFault::_vect = 0x0501;
 FaultStat ArithmeticFault::_count;
 
+#if !FULL_SYSTEM
+FaultName PageTableFault::_name = "page_table_fault";
+FaultVect PageTableFault::_vect = 0x0000;
+FaultStat PageTableFault::_count;
+#endif
+
 FaultName InterruptFault::_name = "interrupt";
 FaultVect InterruptFault::_vect = 0x0101;
 FaultStat InterruptFault::_count;
@@ -127,7 +137,28 @@ void ArithmeticFault::invoke(ThreadContext * tc)
     panic("Arithmetic traps are unimplemented!");
 }
 
-#endif
+#else //!FULL_SYSTEM
 
+void PageTableFault::invoke(ThreadContext *tc)
+{
+    Process *p = tc->getProcessPtr();
+
+    // address is higher than the stack region or in the current stack region
+    if (vaddr > p->stack_base || vaddr > p->stack_min)
+        FaultBase::invoke(tc);
+
+    // We've accessed the next page
+    if (vaddr > p->stack_min - PageBytes) {
+        p->stack_min -= PageBytes;
+        if (p->stack_base - p->stack_min > 8*1024*1024)
+            fatal("Over max stack size for one thread\n");
+        p->pTable->allocate(p->stack_min, PageBytes);
+        warn("Increasing stack size by one page.");
+    } else {
+        FaultBase::invoke(tc);
+    }
+}
+
+#endif
 } // namespace MipsISA
 
diff --git a/src/arch/mips/faults.hh b/src/arch/mips/faults.hh
index d8bf59cc1..95c61cfbc 100644
--- a/src/arch/mips/faults.hh
+++ b/src/arch/mips/faults.hh
@@ -79,6 +79,30 @@ class AlignmentFault : public MipsFault
     bool isAlignmentFault() {return true;}
 };
 
+#if !FULL_SYSTEM
+class PageTableFault : public MipsFault
+{
+  private:
+    Addr vaddr;
+    static FaultName _name;
+    static FaultVect _vect;
+    static FaultStat _count;
+  public:
+    PageTableFault(Addr va)
+        : vaddr(va) {}
+    FaultName name() {return _name;}
+    FaultVect vect() {return _vect;}
+    FaultStat & countStat() {return _count;}
+    void invoke(ThreadContext * tc);
+};
+
+static inline Fault genPageTableFault(Addr va)
+{
+    return new PageTableFault(va);
+}
+#endif
+
+
 static inline Fault genMachineCheckFault()
 {
     return new MachineCheckFault;
diff --git a/src/arch/mips/isa/base.isa b/src/arch/mips/isa/base.isa
index b733da7da..f07b06e03 100644
--- a/src/arch/mips/isa/base.isa
+++ b/src/arch/mips/isa/base.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -38,7 +38,6 @@ output header {{
 
     using namespace MipsISA;
 
-
     /**
      * Base class for all MIPS static instructions.
      */
diff --git a/src/arch/mips/isa/bitfields.isa b/src/arch/mips/isa/bitfields.isa
index e8d4578c7..35815bf1f 100644
--- a/src/arch/mips/isa/bitfields.isa
+++ b/src/arch/mips/isa/bitfields.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa
index a64f74c4f..9ac982e34 100644
--- a/src/arch/mips/isa/decoder.isa
+++ b/src/arch/mips/isa/decoder.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -158,14 +158,16 @@ decode OPCODE_HI default Unknown::unknown() {
                 }
 
                 format HiLoMiscOp {
-                    0x2: div({{
-                        HI = Rs.sd % Rt.sd;
-                        LO = Rs.sd / Rt.sd;
-                    }});
-                    0x3: divu({{
-                        HI = Rs.ud % Rt.ud;
-                        LO = Rs.ud / Rt.ud;
-                    }});
+                    0x2: div({{ if (Rt.sd != 0) {
+                                    HI = Rs.sd % Rt.sd;
+                                    LO = Rs.sd / Rt.sd;
+                                }
+                             }});
+                    0x3: divu({{ if (Rt.ud != 0) {
+                                     HI = Rs.ud % Rt.ud;
+                                     LO = Rs.ud / Rt.ud;
+                                 }
+                              }});
                 }
             }
 
@@ -333,7 +335,7 @@ decode OPCODE_HI default Unknown::unknown() {
             0x0: decode RS_HI {
                 0x0: decode RS_LO {
                     format CP1Control {
-                        0x0: mfc1 ({{ Rt.uw = Fs.uw<31:0>; }});
+                        0x0: mfc1 ({{ Rt.uw = Fs.uw; }});
 
                         0x2: cfc1({{
                             switch (FS)
@@ -438,9 +440,10 @@ decode OPCODE_HI default Unknown::unknown() {
                                 0x3: div_s({{ Fd.sf = Fs.sf / Ft.sf;}});
                                 0x4: sqrt_s({{ Fd.sf = sqrt(Fs.sf);}});
                                 0x5: abs_s({{ Fd.sf = fabs(Fs.sf);}});
-                                0x6: mov_s({{ Fd.sf = Fs.sf;}});
                                 0x7: neg_s({{ Fd.sf = -Fs.sf;}});
                             }
+
+                            0x6: BasicOp::mov_s({{ Fd.sf = Fs.sf;}});
                         }
 
                         0x1: decode FUNCTION_LO {
@@ -549,9 +552,10 @@ decode OPCODE_HI default Unknown::unknown() {
                                 0x3: div_d({{ Fd.df = Fs.df / Ft.df; }});
                                 0x4: sqrt_d({{ Fd.df = sqrt(Fs.df);  }});
                                 0x5: abs_d({{ Fd.df = fabs(Fs.df);   }});
-                                0x6: mov_d({{ Fd.df = Fs.df;         }});
                                 0x7: neg_d({{ Fd.df = -1 * Fs.df;    }});
                             }
+
+                            0x6: BasicOp::mov_d({{ Fd.df = Fs.df;    }});
                         }
 
                         0x1: decode FUNCTION_LO {
@@ -853,17 +857,19 @@ decode OPCODE_HI default Unknown::unknown() {
         0x3: decode FUNCTION_HI {
             0x0: decode FUNCTION_LO {
                 format LoadIndexedMemory {
-                    0x0: lwxc1({{ Ft.uw = Mem.uw;}});
-                    0x1: ldxc1({{ Ft.ud = Mem.ud;}});
-                    0x5: luxc1({{ Ft.uw = Mem.ud;}});
+                    0x0: lwxc1({{ Fd.uw = Mem.uw;}});
+                    0x1: ldxc1({{ Fd.ud = Mem.ud;}});
+                    0x5: luxc1({{ Fd.ud = Mem.ud;}},
+                               {{ EA = (Rs + Rt) & ~7; }});
                 }
             }
 
             0x1: decode FUNCTION_LO {
                 format StoreIndexedMemory {
-                    0x0: swxc1({{ Mem.uw = Ft.uw;}});
-                    0x1: sdxc1({{ Mem.ud = Ft.ud;}});
-                    0x5: suxc1({{ Mem.ud = Ft.ud;}});
+                    0x0: swxc1({{ Mem.uw = Fs.uw;}});
+                    0x1: sdxc1({{ Mem.ud = Fs.ud;}});
+                    0x5: suxc1({{ Mem.ud = Fs.ud;}},
+                               {{ EA = (Rs + Rt) & ~7; }});
                 }
 
                 0x7: Prefetch::prefx({{ EA = Rs + Rt; }});
@@ -991,7 +997,7 @@ decode OPCODE_HI default Unknown::unknown() {
         0x7: decode FUNCTION_HI {
             0x0: decode FUNCTION_LO {
                 format BasicOp {
-                    0x1: ext({{ Rt.uw = bits(Rs.uw, MSB+LSB, LSB); }});
+                    0x0: ext({{ Rt.uw = bits(Rs.uw, MSB+LSB, LSB); }});
                     0x4: ins({{ Rt.uw = bits(Rt.uw, 31, MSB+1) << (MSB+1) |
                                         bits(Rs.uw, MSB-LSB, 0) << LSB |
                                         bits(Rt.uw, LSB-1, 0);
@@ -1014,8 +1020,8 @@ decode OPCODE_HI default Unknown::unknown() {
                                           Rt.uw<7:0>   << 8  |
                                           Rt.uw<15:8>;
                     }});
-                    0x10: seb({{ Rd.sw = Rt.sw<7:0>}});
-                    0x18: seh({{ Rd.sw = Rt.sw<15:0>}});
+                    0x10: seb({{ Rd.sw = Rt.sb; }});
+                    0x18: seh({{ Rd.sw = Rt.sh; }});
                 }
             }
 
diff --git a/src/arch/mips/isa/formats/basic.isa b/src/arch/mips/isa/formats/basic.isa
index 35ce09205..29dafd541 100644
--- a/src/arch/mips/isa/formats/basic.isa
+++ b/src/arch/mips/isa/formats/basic.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/src/arch/mips/isa/formats/branch.isa b/src/arch/mips/isa/formats/branch.isa
index 827e3ccf0..5230ce9cc 100644
--- a/src/arch/mips/isa/formats/branch.isa
+++ b/src/arch/mips/isa/formats/branch.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/src/arch/mips/isa/formats/control.isa b/src/arch/mips/isa/formats/control.isa
index 509ee7e87..6c7d396f3 100644
--- a/src/arch/mips/isa/formats/control.isa
+++ b/src/arch/mips/isa/formats/control.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/src/arch/mips/isa/formats/fp.isa b/src/arch/mips/isa/formats/fp.isa
index d05b04d0e..cdb892b3f 100644
--- a/src/arch/mips/isa/formats/fp.isa
+++ b/src/arch/mips/isa/formats/fp.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -142,10 +142,10 @@ output exec {{
                 cpu->setFloatRegBits(inst, 0, mips_nan, size);
 
                 //Read FCSR from FloatRegFile
-                uint32_t fcsr_bits = cpu->tc->readFloatRegBits(FCSR);
+                uint32_t fcsr_bits = cpu->tcBase()->readFloatRegBits(FCSR);
 
                 //Write FCSR from FloatRegFile
-                cpu->tc->setFloatRegBits(FCSR, genInvalidVector(fcsr_bits));
+                cpu->tcBase()->setFloatRegBits(FCSR, genInvalidVector(fcsr_bits));
 
                 if (traceData) { traceData->setData(mips_nan); }
                 return true;
@@ -158,12 +158,12 @@ output exec {{
         fpResetCauseBits(%(CPU_exec_context)s *cpu)
         {
             //Read FCSR from FloatRegFile
-            uint32_t fcsr = cpu->tc->readFloatRegBits(FCSR);
+            uint32_t fcsr = cpu->tcBase()->readFloatRegBits(FCSR);
 
             fcsr = bits(fcsr, 31, 18) << 18 | bits(fcsr, 11, 0);
 
             //Write FCSR from FloatRegFile
-            cpu->tc->setFloatRegBits(FCSR, fcsr);
+            cpu->tcBase()->setFloatRegBits(FCSR, fcsr);
         }
 }};
 
@@ -176,8 +176,9 @@ def template FloatingPointExecute {{
 
                 //When is the right time to reset cause bits?
                 //start of every instruction or every cycle?
+#if FULL_SYSTEM
                 fpResetCauseBits(xc);
-
+#endif
                 %(op_decl)s;
                 %(op_rd)s;
 
@@ -192,7 +193,10 @@ def template FloatingPointExecute {{
                     //----
                     //Check for IEEE 754 FP Exceptions
                     //fault = fpNanOperands((FPOp*)this, xc, Fd, traceData);
-                    if (!fpInvalidOp((FPOp*)this, xc, Fd, traceData) &&
+                    if (
+#if FULL_SYSTEM
+                        !fpInvalidOp((FPOp*)this, xc, Fd, traceData) &&
+#endif
                         fault == NoFault)
                     {
                         %(op_wb)s;
diff --git a/src/arch/mips/isa/formats/int.isa b/src/arch/mips/isa/formats/int.isa
index 7b5affb5c..56a4ec204 100644
--- a/src/arch/mips/isa/formats/int.isa
+++ b/src/arch/mips/isa/formats/int.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -228,7 +228,7 @@ def format IntOp(code, *opt_flags) {{
     iop = InstObjParams(name, Name, 'IntOp', CodeBlock(code), opt_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
-    decode_block = OperateNopCheckDecode.subst(iop)
+    decode_block = RegNopCheckDecode.subst(iop)
     exec_output = BasicExecute.subst(iop)
 }};
 
@@ -236,7 +236,7 @@ def format IntImmOp(code, *opt_flags) {{
     iop = InstObjParams(name, Name, 'IntImmOp', CodeBlock(code), opt_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
-    decode_block = OperateNopCheckDecode.subst(iop)
+    decode_block = ImmNopCheckDecode.subst(iop)
     exec_output = BasicExecute.subst(iop)
 }};
 
@@ -252,7 +252,7 @@ def format HiLoOp(code, *opt_flags) {{
     iop = InstObjParams(name, Name, 'HiLoOp', CodeBlock(code), opt_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
-    decode_block = OperateNopCheckDecode.subst(iop)
+    decode_block = BasicDecode.subst(iop)
     exec_output = HiLoExecute.subst(iop)
 }};
 
@@ -260,7 +260,7 @@ def format HiLoMiscOp(code, *opt_flags) {{
     iop = InstObjParams(name, Name, 'HiLoMiscOp', CodeBlock(code), opt_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
-    decode_block = OperateNopCheckDecode.subst(iop)
+    decode_block = BasicDecode.subst(iop)
     exec_output = HiLoExecute.subst(iop)
 }};
 
diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa
index f52247056..f03f7becd 100644
--- a/src/arch/mips/isa/formats/mem.isa
+++ b/src/arch/mips/isa/formats/mem.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2005 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -58,14 +58,8 @@ output header {{
                StaticInstPtr _memAccPtr = nullStaticInstPtr)
             : MipsStaticInst(mnem, _machInst, __opClass),
               memAccessFlags(0), eaCompPtr(_eaCompPtr), memAccPtr(_memAccPtr),
-              disp(OFFSET)
+              disp(sext<16>(OFFSET))
         {
-            //If Bit 15 is 1 then Sign Extend
-            int32_t temp = disp & 0x00008000;
-
-            if (temp > 0) {
-                disp |= 0xFFFF0000;
-            }
         }
 
         std::string
@@ -77,6 +71,24 @@ output header {{
         const StaticInstPtr &memAccInst() const { return memAccPtr; }
     };
 
+     /**
+     * Base class for a few miscellaneous memory-format insts
+     * that don't interpret the disp field
+     */
+    class MemoryNoDisp : public Memory
+    {
+      protected:
+        /// Constructor
+        MemoryNoDisp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                     StaticInstPtr _eaCompPtr = nullStaticInstPtr,
+                     StaticInstPtr _memAccPtr = nullStaticInstPtr)
+            : Memory(mnem, _machInst, __opClass, _eaCompPtr, _memAccPtr)
+        {
+        }
+
+        std::string
+        generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+    };
 }};
 
 
@@ -84,10 +96,18 @@ output decoder {{
     std::string
     Memory::generateDisassembly(Addr pc, const SymbolTable *symtab) const
     {
-        return csprintf("%-10s %c%d,%d(r%d)", mnemonic,
+        return csprintf("%-10s %c%d, %d(r%d)", mnemonic,
                         flags[IsFloating] ? 'f' : 'r', RT, disp, RS);
     }
 
+    std::string
+    MemoryNoDisp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+    {
+        return csprintf("%-10s %c%d, r%d(r%d)", mnemonic,
+                        flags[IsFloating] ? 'f' : 'r',
+                        flags[IsFloating] ? FD : RD,
+                        RS, RT);
+    }
 }};
 
 def template LoadStoreDeclare {{
@@ -479,23 +499,11 @@ def template MiscCompleteAcc {{
     }
 }};
 
-// load instructions use Rt as dest, so check for
-// Rt == 0 to detect nops
-def template LoadNopCheckDecode {{
- {
-     MipsStaticInst *i = new %(class_name)s(machInst);
-     if (RT == 0) {
-         i = makeNop(i);
-     }
-     return i;
- }
-}};
-
 def format LoadMemory(memacc_code, ea_code = {{ EA = Rs + disp; }},
                      mem_flags = [], inst_flags = []) {{
     (header_output, decoder_output, decode_block, exec_output) = \
         LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
-                      decode_template = LoadNopCheckDecode,
+                      decode_template = ImmNopCheckDecode,
                       exec_template_base = 'Load')
 }};
 
@@ -510,7 +518,7 @@ def format LoadIndexedMemory(memacc_code, ea_code = {{ EA = Rs + Rt; }},
                      mem_flags = [], inst_flags = []) {{
     (header_output, decoder_output, decode_block, exec_output) = \
         LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
-                      decode_template = LoadNopCheckDecode,
+                      decode_template = ImmNopCheckDecode,
                       exec_template_base = 'Load')
 }};
 
@@ -534,7 +542,7 @@ def format LoadUnalignedMemory(memacc_code, ea_code = {{ EA = (Rs + disp) & ~3;
 
     (header_output, decoder_output, decode_block, exec_output) = \
         LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
-                      decode_template = LoadNopCheckDecode,
+                      decode_template = ImmNopCheckDecode,
                       exec_template_base = 'Load')
 }};
 
@@ -551,7 +559,6 @@ def format StoreUnalignedMemory(memacc_code, ea_code = {{ EA = (Rs + disp) & ~3;
 
     (header_output, decoder_output, decode_block, exec_output) = \
         LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
-                      decode_template = LoadNopCheckDecode,
                       exec_template_base = 'Store')
 }};
 
diff --git a/src/arch/mips/isa/formats/mt.isa b/src/arch/mips/isa/formats/mt.isa
index 521b01123..96435f8c9 100644
--- a/src/arch/mips/isa/formats/mt.isa
+++ b/src/arch/mips/isa/formats/mt.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -35,14 +35,15 @@
 
 output header {{
         /**
-         * Base class for integer operations.
+         * Base class for MIPS MT ASE operations.
          */
         class MT : public MipsStaticInst
         {
                 protected:
 
                 /// Constructor
-                MT(const char *mnem, MachInst _machInst, OpClass __opClass) : MipsStaticInst(mnem, _machInst, __opClass)
+                MT(const char *mnem, MachInst _machInst, OpClass __opClass) :
+                    MipsStaticInst(mnem, _machInst, __opClass)
                 {
                 }
 
diff --git a/src/arch/mips/isa/formats/noop.isa b/src/arch/mips/isa/formats/noop.isa
index 4fd8235e4..7f3d313ad 100644
--- a/src/arch/mips/isa/formats/noop.isa
+++ b/src/arch/mips/isa/formats/noop.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -73,7 +73,8 @@ output decoder {{
     MipsStaticInst *
     makeNop(MipsStaticInst *inst)
     {
-        MipsStaticInst *nop = new Nop(inst->disassemble(0), inst->machInst);
+        std::string nop_str = "(" + inst->disassemble(0) + ")";
+        MipsStaticInst *nop = new Nop(nop_str, inst->machInst);
         delete inst;
         return nop;
     }
@@ -87,16 +88,36 @@ output exec {{
     }
 }};
 
-// integer & FP operate instructions use RT as dest, so check for
-// RT == 0 to detect nops
-def template OperateNopCheckDecode {{
+// Int & FP operate instructions use RD as dest, so check for
+// RD == 0 to detect nops
+def template RegNopCheckDecode {{
  {
      MipsStaticInst *i = new %(class_name)s(machInst);
+     //if (RD == 0) {
+         //i = makeNop(i);
+         //}
+     return i;
+ }
+}};
 
+def template OperateNopCheckDecode {{
+ {
+     MipsStaticInst *i = new %(class_name)s(machInst);
      //if (RD == 0) {
-     //  i = makeNop(i);
+     // i = makeNop(i);
      //}
+     return i;
+ }
+}};
 
+// IntImm & Memory  instructions use Rt as dest, so check for
+// Rt == 0 to detect nops
+def template ImmNopCheckDecode {{
+ {
+     MipsStaticInst *i = new %(class_name)s(machInst);
+     //if (RT == 0) {
+     // i = makeNop(i);
+     // }
      return i;
  }
 }};
diff --git a/src/arch/mips/isa/formats/tlbop.isa b/src/arch/mips/isa/formats/tlbop.isa
index 75ab71c48..b974ccbed 100644
--- a/src/arch/mips/isa/formats/tlbop.isa
+++ b/src/arch/mips/isa/formats/tlbop.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/src/arch/mips/isa/formats/trap.isa b/src/arch/mips/isa/formats/trap.isa
index 574b808cc..b9066f374 100644
--- a/src/arch/mips/isa/formats/trap.isa
+++ b/src/arch/mips/isa/formats/trap.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -70,7 +70,7 @@ def template TrapExecute {{
 }};
 
 def format Trap(code, *flags) {{
-        code = 'panic(\"'
+        code = 'warn(\"'
         code += 'Trap Exception Handler Is Currently Not Implemented.'
         code += '\");'
         iop = InstObjParams(name, Name, 'MipsStaticInst', CodeBlock(code), flags)
diff --git a/src/arch/mips/isa/formats/unimp.isa b/src/arch/mips/isa/formats/unimp.isa
index e17b5f832..03068fa74 100644
--- a/src/arch/mips/isa/formats/unimp.isa
+++ b/src/arch/mips/isa/formats/unimp.isa
@@ -1,7 +1,7 @@
 // -*- mode:c++ -*-
 
 
-// Copyright (c) 2003-2005 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/src/arch/mips/isa/formats/unknown.isa b/src/arch/mips/isa/formats/unknown.isa
index 41387adca..70b3901e9 100644
--- a/src/arch/mips/isa/formats/unknown.isa
+++ b/src/arch/mips/isa/formats/unknown.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/src/arch/mips/isa/formats/util.isa b/src/arch/mips/isa/formats/util.isa
index b67a02d07..0cc375af3 100644
--- a/src/arch/mips/isa/formats/util.isa
+++ b/src/arch/mips/isa/formats/util.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/src/arch/mips/isa/includes.isa b/src/arch/mips/isa/includes.isa
index 555cec255..6b5f3c588 100644
--- a/src/arch/mips/isa/includes.isa
+++ b/src/arch/mips/isa/includes.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/src/arch/mips/isa/main.isa b/src/arch/mips/isa/main.isa
index 9da3fc0db..2d7c63cd5 100644
--- a/src/arch/mips/isa/main.isa
+++ b/src/arch/mips/isa/main.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2005 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/src/arch/mips/isa/operands.isa b/src/arch/mips/isa/operands.isa
index 316552ef4..3843dc053 100644
--- a/src/arch/mips/isa/operands.isa
+++ b/src/arch/mips/isa/operands.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// Copyright (c) 2006 The Regents of The University of Michigan
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -53,8 +53,8 @@ def operands {{
     'R2':  ('IntReg', 'uw','2', 'IsInteger', 5),
 
     #Special Integer Reg operands
-    'HI':  ('IntReg', 'uw','32', 'IsInteger', 6),
-    'LO':  ('IntReg', 'uw','33', 'IsInteger', 7),
+    'HI':  ('IntReg', 'uw','MipsISA::HI', 'IsInteger', 6),
+    'LO':  ('IntReg', 'uw','MipsISA::LO', 'IsInteger', 7),
 
     #Immediate Value operand
     'IntImm': ('IntReg', 'uw', 'INTIMM', 'IsInteger', 3),
@@ -66,11 +66,11 @@ def operands {{
     'Fr': ('FloatReg', 'sf', 'FR', 'IsFloating', 3),
 
     #Special Floating Point Control Reg Operands
-    'FIR':  ('FloatReg', 'uw', '32', 'IsFloating', 1),
-    'FCCR': ('FloatReg', 'uw', '33', 'IsFloating', 2),
-    'FEXR': ('FloatReg', 'uw', '34', 'IsFloating', 3),
-    'FENR': ('FloatReg', 'uw', '35', 'IsFloating', 3),
-    'FCSR': ('FloatReg', 'uw', '36', 'IsFloating', 3),
+    'FIR':  ('FloatReg', 'uw', 'MipsISA::FIR', 'IsFloating', 1),
+    'FCCR': ('FloatReg', 'uw', 'MipsISA::FCCR', 'IsFloating', 2),
+    'FEXR': ('FloatReg', 'uw', 'MipsISA::FEXR', 'IsFloating', 3),
+    'FENR': ('FloatReg', 'uw', 'MipsISA::FENR', 'IsFloating', 3),
+    'FCSR': ('FloatReg', 'uw', 'MipsISA::FCSR', 'IsFloating', 3),
 
      #Operands For Paired Singles FP Operations
     'Fd1': ('FloatReg', 'sf', 'FD', 'IsFloating', 4),
diff --git a/src/arch/mips/isa_traits.cc b/src/arch/mips/isa_traits.cc
index 9f3817a60..a8b41270e 100644
--- a/src/arch/mips/isa_traits.cc
+++ b/src/arch/mips/isa_traits.cc
@@ -30,7 +30,7 @@
  */
 
 #include "arch/mips/isa_traits.hh"
-#include "config/full_system.hh"
+//#include "config/full_system.hh"
 #include "cpu/static_inst.hh"
 #include "sim/serialize.hh"
 #include "base/bitfield.hh"
@@ -43,39 +43,20 @@ void
 MipsISA::copyRegs(ThreadContext *src, ThreadContext *dest)
 {
     panic("Copy Regs Not Implemented Yet\n");
-    /*fpcr = xc->readMiscReg(MipsISA::Fpcr_DepTag);
-    uniq = xc->readMiscReg(MipsISA::Uniq_DepTag);
-    lock_flag = xc->readMiscReg(MipsISA::Lock_Flag_DepTag);
-    lock_addr = xc->readMiscReg(MipsISA::Lock_Addr_DepTag);
+}
 
-#if FULL_SYSTEM
-    copyIprs(xc);
-    #endif*/
+void
+MipsISA::copyMiscRegs(ThreadContext *src, ThreadContext *dest)
+{
+    panic("Copy Misc. Regs Not Implemented Yet\n");
 }
 
 void
 MipsISA::MiscRegFile::copyMiscRegs(ThreadContext *tc)
 {
     panic("Copy Misc. Regs Not Implemented Yet\n");
-    /*fpcr = xc->readMiscReg(MipsISA::Fpcr_DepTag);
-    uniq = xc->readMiscReg(MipsISA::Uniq_DepTag);
-    lock_flag = xc->readMiscReg(MipsISA::Lock_Flag_DepTag);
-    lock_addr = xc->readMiscReg(MipsISA::Lock_Addr_DepTag);
-
-    #endif*/
 }
 
-#if FULL_SYSTEM
-
-static inline Addr
-TruncPage(Addr addr)
-{ return addr & ~(MipsISA::PageBytes - 1); }
-
-static inline Addr
-RoundPage(Addr addr)
-{ return (addr + MipsISA::PageBytes - 1) & ~(MipsISA::PageBytes - 1); }
-#endif
-
 void
 IntRegFile::serialize(std::ostream &os)
 {
@@ -100,12 +81,6 @@ RegFile::serialize(std::ostream &os)
     SERIALIZE_SCALAR(pc);
     SERIALIZE_SCALAR(npc);
     SERIALIZE_SCALAR(nnpc);
-#if FULL_SYSTEM
-    SERIALIZE_ARRAY(palregs, NumIntRegs);
-    SERIALIZE_ARRAY(ipr, NumInternalProcRegs);
-    SERIALIZE_SCALAR(intrflag);
-    SERIALIZE_SCALAR(pal_shadow);
-#endif
 }
 
 
@@ -121,43 +96,5 @@ RegFile::unserialize(Checkpoint *cp, const std::string &section)
     UNSERIALIZE_SCALAR(pc);
     UNSERIALIZE_SCALAR(npc);
     UNSERIALIZE_SCALAR(nnpc);
-#if FULL_SYSTEM
-    UNSERIALIZE_ARRAY(palregs, NumIntRegs);
-    UNSERIALIZE_ARRAY(ipr, NumInternalProcRegs);
-    UNSERIALIZE_SCALAR(intrflag);
-    UNSERIALIZE_SCALAR(pal_shadow);
-#endif
-}
 
-
-#if FULL_SYSTEM
-void
-PTE::serialize(std::ostream &os)
-{
-    SERIALIZE_SCALAR(tag);
-    SERIALIZE_SCALAR(ppn);
-    SERIALIZE_SCALAR(xre);
-    SERIALIZE_SCALAR(xwe);
-    SERIALIZE_SCALAR(asn);
-    SERIALIZE_SCALAR(asma);
-    SERIALIZE_SCALAR(fonr);
-    SERIALIZE_SCALAR(fonw);
-    SERIALIZE_SCALAR(valid);
 }
-
-
-void
-PTE::unserialize(Checkpoint *cp, const std::string &section)
-{
-    UNSERIALIZE_SCALAR(tag);
-    UNSERIALIZE_SCALAR(ppn);
-    UNSERIALIZE_SCALAR(xre);
-    UNSERIALIZE_SCALAR(xwe);
-    UNSERIALIZE_SCALAR(asn);
-    UNSERIALIZE_SCALAR(asma);
-    UNSERIALIZE_SCALAR(fonr);
-    UNSERIALIZE_SCALAR(fonw);
-    UNSERIALIZE_SCALAR(valid);
-}
-
-#endif //FULL_SYSTEM
diff --git a/src/arch/mips/isa_traits.hh b/src/arch/mips/isa_traits.hh
index dc8b6758a..2f485c7fd 100644
--- a/src/arch/mips/isa_traits.hh
+++ b/src/arch/mips/isa_traits.hh
@@ -57,12 +57,6 @@ namespace LittleEndianGuest {};
 class StaticInst;
 class StaticInstPtr;
 
-namespace MIPS34K {
-int DTB_ASN_ASN(uint64_t reg);
-int ITB_ASN_ASN(uint64_t reg);
-};
-
-#if !FULL_SYSTEM
 class SyscallReturn {
         public:
            template <class T>
@@ -95,7 +89,6 @@ class SyscallReturn {
            uint64_t retval;
            bool success;
 };
-#endif
 
 namespace MipsISA
 {
@@ -136,16 +129,10 @@ namespace MipsISA
     template <class TC>
     void zeroRegisters(TC *tc);
 
-    const Addr MaxAddr = (Addr)-1;
+//    const Addr MaxAddr = (Addr)-1;
 
     void copyRegs(ThreadContext *src, ThreadContext *dest);
 
-    uint64_t fpConvert(double fp_val, ConvertType cvt_type);
-    double roundFP(double val, int digits);
-    double truncFP(double val);
-    bool getFPConditionCode(uint32_t fcsr_reg, int cc);
-    uint32_t makeCCVector(uint32_t fcsr, int num, bool val);
-
     // Machine operations
 
     void saveMachineReg(AnyReg &savereg, const RegFile &reg_file,
@@ -191,12 +178,6 @@ namespace MipsISA
 
 };
 
-#if FULL_SYSTEM
-
-#include "arch/mips/mips34k.hh"
-
-#endif
-
 using namespace MipsISA;
 
 #endif // __ARCH_MIPS_ISA_TRAITS_HH__
diff --git a/src/arch/mips/process.cc b/src/arch/mips/process.cc
index 7762c2fa0..cb847fe04 100644
--- a/src/arch/mips/process.cc
+++ b/src/arch/mips/process.cc
@@ -1,3 +1,4 @@
+
 /*
  * Copyright (c) 2003-2004 The Regents of The University of Michigan
  * All rights reserved.
@@ -40,6 +41,8 @@
 using namespace std;
 using namespace MipsISA;
 
+Addr MipsLiveProcess::stack_start = 0x7FFFFFFF;
+
 MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile,
         System *_system, int stdin_fd, int stdout_fd, int stderr_fd,
         std::vector<std::string> &argv, std::vector<std::string> &envp)
@@ -48,10 +51,11 @@ MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile,
 {
     // Set up stack. On MIPS, stack starts at the top of kuseg
     // user address space. MIPS stack grows down from here
-    stack_base = 0x7FFFFFFF;
+    stack_base = stack_start;
 
     // Set pointer for next thread stack.  Reserve 8M for main stack.
     next_thread_stack_base = stack_base - (8 * 1024 * 1024);
+    stack_start = next_thread_stack_base;
 
     // Set up break point (Top of Heap)
     brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize();
diff --git a/src/arch/mips/process.hh b/src/arch/mips/process.hh
index b0ef20399..4baee134b 100644
--- a/src/arch/mips/process.hh
+++ b/src/arch/mips/process.hh
@@ -50,6 +50,9 @@ class MipsLiveProcess : public LiveProcess
                 std::vector<std::string> &envp);
 
     void startup();
+
+
+    static Addr stack_start;
 };
 
 
diff --git a/src/arch/mips/regfile/float_regfile.hh b/src/arch/mips/regfile/float_regfile.hh
index d1a60298a..61efbb416 100644
--- a/src/arch/mips/regfile/float_regfile.hh
+++ b/src/arch/mips/regfile/float_regfile.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * Copyright (c) 2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -24,8 +24,6 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Korey Sewell
  */
 
 #ifndef __ARCH_MIPS_FLOAT_REGFILE_HH__
@@ -34,13 +32,14 @@
 #include "arch/mips/types.hh"
 #include "arch/mips/constants.hh"
 #include "base/misc.hh"
+#include "base/bitfield.hh"
 #include "config/full_system.hh"
 #include "sim/byteswap.hh"
 #include "sim/faults.hh"
 #include "sim/host.hh"
 
 class Checkpoint;
-class ThreadContext;
+class ExecContext;
 class Regfile;
 
 namespace MipsISA
@@ -101,8 +100,9 @@ namespace MipsISA
             }
         }
 
-        Fault setReg(int floatReg, const FloatReg &val, int width)
+        Fault setReg(int floatReg, const FloatRegVal &val, int width)
         {
+            using namespace std;
             switch(width)
             {
               case SingleWidth:
@@ -117,8 +117,8 @@ namespace MipsISA
                 {
                     const void *double_ptr = &val;
                     FloatReg64 temp_double = *(FloatReg64 *) double_ptr;
-                    regs[floatReg + 1] = temp_double >> 32;
-                    regs[floatReg] = 0x0000FFFF & temp_double;
+                    regs[floatReg + 1] = bits(temp_double, 63, 32);
+                    regs[floatReg] = bits(temp_double, 31, 0);
                     break;
                 }
 
@@ -140,8 +140,8 @@ namespace MipsISA
                 break;
 
               case DoubleWidth:
-                regs[floatReg + 1] = val >> 32;
-                regs[floatReg] = val;
+                regs[floatReg + 1] = bits(val, 63, 32);
+                regs[floatReg] = bits(val, 31, 0);
                 break;
 
               default:
diff --git a/src/arch/mips/regfile/int_regfile.hh b/src/arch/mips/regfile/int_regfile.hh
index dc82a3c26..5add1b7be 100644
--- a/src/arch/mips/regfile/int_regfile.hh
+++ b/src/arch/mips/regfile/int_regfile.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * Copyright (c) 2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -65,11 +65,6 @@ namespace MipsISA
 
     };
 
-    enum MiscIntRegNums {
-       HI = NumIntArchRegs,
-       LO
-    };
-
 } // namespace MipsISA
 
 #endif
diff --git a/src/arch/mips/regfile/misc_regfile.hh b/src/arch/mips/regfile/misc_regfile.hh
index f8aeab8cb..87961f97e 100644
--- a/src/arch/mips/regfile/misc_regfile.hh
+++ b/src/arch/mips/regfile/misc_regfile.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * Copyright (c) 2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -45,18 +45,12 @@ namespace MipsISA
 
       protected:
         uint64_t	fpcr;		// floating point condition codes
-        uint64_t	uniq;		// process-unique register
         bool		lock_flag;	// lock flag for LL/SC
         Addr		lock_addr;	// lock address for LL/SC
 
         MiscReg miscRegFile[NumMiscRegs];
 
       public:
-        //These functions should be removed once the simplescalar cpu model
-        //has been replaced.
-        int getInstAsid();
-        int getDataAsid();
-
         void copyMiscRegs(ThreadContext *tc);
 
         MiscReg readReg(int misc_reg)
@@ -80,17 +74,6 @@ namespace MipsISA
             miscRegFile[misc_reg] = val; return NoFault;
         }
 
-#if FULL_SYSTEM
-        void clearIprs() { }
-
-      protected:
-        InternalProcReg ipr[NumInternalProcRegs]; // Internal processor regs
-
-      private:
-        MiscReg readIpr(int idx, Fault &fault, ThreadContext *tc) { }
-
-        Fault setIpr(int idx, uint64_t val, ThreadContext *tc) { }
-#endif
         friend class RegFile;
     };
 } // namespace MipsISA
diff --git a/src/arch/mips/regfile/regfile.hh b/src/arch/mips/regfile/regfile.hh
index af61e62cd..a68120299 100644
--- a/src/arch/mips/regfile/regfile.hh
+++ b/src/arch/mips/regfile/regfile.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * Copyright (c) 2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -81,12 +81,12 @@ namespace MipsISA
             return miscRegFile.setRegWithEffect(miscReg, val, tc);
         }
 
-        FloatReg readFloatReg(int floatReg)
+        FloatRegVal readFloatReg(int floatReg)
         {
             return floatRegFile.readReg(floatReg,SingleWidth);
         }
 
-        FloatReg readFloatReg(int floatReg, int width)
+        FloatRegVal readFloatReg(int floatReg, int width)
         {
             return floatRegFile.readReg(floatReg,width);
         }
@@ -101,12 +101,12 @@ namespace MipsISA
             return floatRegFile.readRegBits(floatReg,width);
         }
 
-        Fault setFloatReg(int floatReg, const FloatReg &val)
+        Fault setFloatReg(int floatReg, const FloatRegVal &val)
         {
             return floatRegFile.setReg(floatReg, val, SingleWidth);
         }
 
-        Fault setFloatReg(int floatReg, const FloatReg &val, int width)
+        Fault setFloatReg(int floatReg, const FloatRegVal &val, int width)
         {
             return floatRegFile.setReg(floatReg, val, width);
         }
@@ -168,16 +168,6 @@ namespace MipsISA
             nnpc = val;
         }
 
-
-#if FULL_SYSTEM
-        IntReg palregs[NumIntRegs];	// PAL shadow registers
-        InternalProcReg ipr[NumInternalProcRegs]; // internal processor regs
-        int intrflag;			// interrupt flag
-        bool pal_shadow;		// using pal_shadow registers
-        inline int instAsid() { return MIPS34K::ITB_ASN_ASN(ipr[IPR_ITB_ASN]); }
-        inline int dataAsid() { return MIPS34K::DTB_ASN_ASN(ipr[IPR_DTB_ASN]); }
-#endif // FULL_SYSTEM
-
         void serialize(std::ostream &os);
         void unserialize(Checkpoint *cp, const std::string &section);
 
@@ -193,9 +183,6 @@ namespace MipsISA
 
     void copyMiscRegs(ThreadContext *src, ThreadContext *dest);
 
-#if FULL_SYSTEM
-    void copyIprs(ThreadContext *src, ThreadContext *dest);
-#endif
 } // namespace MipsISA
 
 #endif
diff --git a/src/arch/mips/stacktrace.hh b/src/arch/mips/stacktrace.hh
index 38767cef7..f9e092dbd 100644
--- a/src/arch/mips/stacktrace.hh
+++ b/src/arch/mips/stacktrace.hh
@@ -25,11 +25,11 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Korey Sewell
+ * Authors: Ali Saidi
  */
 
-#ifndef __ARCH_ALPHA_STACKTRACE_HH__
-#define __ARCH_ALPHA_STACKTRACE_HH__
+#ifndef __ARCH_MIPS_STACKTRACE_HH__
+#define __ARCH_MIPS_STACKTRACE_HH__
 
 #include "base/trace.hh"
 #include "cpu/static_inst.hh"
@@ -118,4 +118,4 @@ StackTrace::trace(ThreadContext *tc, StaticInstPtr inst)
     return true;
 }
 
-#endif // __ARCH_ALPHA_STACKTRACE_HH__
+#endif // __ARCH_MIPS_STACKTRACE_HH__
diff --git a/src/arch/mips/types.hh b/src/arch/mips/types.hh
index 7cd2eed0c..6330044d9 100644
--- a/src/arch/mips/types.hh
+++ b/src/arch/mips/types.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * Copyright (c) 2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -42,14 +42,15 @@ namespace MipsISA
     typedef uint32_t IntReg;
 
     // floating point register file entry type
-    typedef double FloatReg;
     typedef uint32_t FloatReg32;
     typedef uint64_t FloatReg64;
     typedef uint64_t FloatRegBits;
 
+    typedef double FloatRegVal;
+    typedef double FloatReg;
+
     // cop-0/cop-1 system control register
     typedef uint64_t MiscReg;
-    typedef uint64_t InternalProcReg;
 
     typedef union {
         IntReg   intreg;
diff --git a/src/arch/sparc/faults.cc b/src/arch/sparc/faults.cc
index 57b4d4d86..7b7765935 100644
--- a/src/arch/sparc/faults.cc
+++ b/src/arch/sparc/faults.cc
@@ -33,6 +33,10 @@
 #include "cpu/thread_context.hh"
 #include "cpu/base.hh"
 #include "base/trace.hh"
+#if !FULL_SYSTEM
+#include "sim/process.hh"
+#include "mem/page_table.hh"
+#endif
 
 namespace SparcISA
 {
@@ -218,6 +222,13 @@ TrapType      TrapInstruction::_baseTrapType = 0x100;
 FaultPriority TrapInstruction::_priority = 16;
 FaultStat     TrapInstruction::_count;
 
+#if !FULL_SYSTEM
+FaultName PageTableFault::_name = "page_table_fault";
+TrapType PageTableFault::_trapType = 0x0000;
+FaultPriority PageTableFault::_priority = 0;
+FaultStat PageTableFault::_count;
+#endif
+
 #if FULL_SYSTEM
 
 void SparcFault::invoke(ThreadContext * tc)
@@ -249,9 +260,28 @@ void SparcFault::invoke(ThreadContext * tc)
 
 void TrapInstruction::invoke(ThreadContext * tc)
 {
-    tc->syscall(syscall_num);
+    // Should be handled in ISA.
 }
 
+void PageTableFault::invoke(ThreadContext *tc)
+{
+    Process *p = tc->getProcessPtr();
+
+    // address is higher than the stack region or in the current stack region
+    if (vaddr > p->stack_base || vaddr > p->stack_min)
+        FaultBase::invoke(tc);
+
+    // We've accessed the next page
+    if (vaddr > p->stack_min - PageBytes) {
+        p->stack_min -= PageBytes;
+        if (p->stack_base - p->stack_min > 8*1024*1024)
+            fatal("Over max stack size for one thread\n");
+        p->pTable->allocate(p->stack_min, PageBytes);
+        warn("Increasing stack size by one page.");
+    } else {
+        FaultBase::invoke(tc);
+    }
+}
 #endif
 
 } // namespace SparcISA
diff --git a/src/arch/sparc/faults.hh b/src/arch/sparc/faults.hh
index 9f595a28b..b279f4911 100644
--- a/src/arch/sparc/faults.hh
+++ b/src/arch/sparc/faults.hh
@@ -83,6 +83,31 @@ class MemAddressNotAligned : public SparcFault
     bool isAlignmentFault() {return true;}
 };
 
+#if !FULL_SYSTEM
+class PageTableFault : public SparcFault
+{
+  private:
+    Addr vaddr;
+    static FaultName _name;
+    static TrapType _trapType;
+    static FaultPriority _priority;
+    static FaultStat _count;
+  public:
+    PageTableFault(Addr va)
+        : vaddr(va) {}
+    FaultName name() {return _name;}
+    TrapType trapType() {return _trapType;}
+    FaultPriority priority() {return _priority;}
+    FaultStat & countStat() {return _count;}
+    void invoke(ThreadContext * tc);
+};
+
+static inline Fault genPageTableFault(Addr va)
+{
+    return new PageTableFault(va);
+}
+#endif
+
 static inline Fault genMachineCheckFault()
 {
     return new InternalProcessorError;
@@ -589,6 +614,7 @@ class TrapInstruction : public EnumeratedFault
 #endif
 };
 
+
 } // SparcISA namespace
 
 #endif // __FAULTS_HH__
diff --git a/src/arch/sparc/system.cc b/src/arch/sparc/system.cc
index e197e7918..63cbbe057 100644
--- a/src/arch/sparc/system.cc
+++ b/src/arch/sparc/system.cc
@@ -141,6 +141,7 @@ SparcSystem::unserialize(Checkpoint *cp, const std::string &section)
 BEGIN_DECLARE_SIM_OBJECT_PARAMS(SparcSystem)
 
     SimObjectParam<PhysicalMemory *> physmem;
+    SimpleEnumParam<System::MemoryMode> mem_mode;
 
     Param<std::string> kernel;
     Param<std::string> reset_bin;
@@ -161,6 +162,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SparcSystem)
 
     INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"),
     INIT_PARAM(physmem, "phsyical memory"),
+    INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)",
+            System::MemoryModeStrings),
     INIT_PARAM(kernel, "file that contains the kernel code"),
     INIT_PARAM(reset_bin, "file that contains the reset code"),
     INIT_PARAM(hypervisor_bin, "file that contains the hypervisor code"),
@@ -183,6 +186,7 @@ CREATE_SIM_OBJECT(SparcSystem)
     p->name = getInstanceName();
     p->boot_cpu_frequency = boot_cpu_frequency;
     p->physmem = physmem;
+    p->mem_mode = mem_mode;
     p->kernel_path = kernel;
     p->reset_bin = reset_bin;
     p->hypervisor_bin = hypervisor_bin;
diff --git a/src/base/fast_alloc.cc b/src/base/fast_alloc.cc
index 455fb8ed7..610dff66c 100644
--- a/src/base/fast_alloc.cc
+++ b/src/base/fast_alloc.cc
@@ -180,13 +180,11 @@ FastAlloc::dump_oldest(int n)
 // C interfaces to FastAlloc::dump_summary() and FastAlloc::dump_oldest().
 // gdb seems to have trouble with calling C++ functions directly.
 //
-extern "C" void
 fast_alloc_summary()
 {
     FastAlloc::dump_summary();
 }
 
-extern "C" void
 fast_alloc_oldest(int n)
 {
     FastAlloc::dump_oldest(n);
diff --git a/src/base/timebuf.hh b/src/base/timebuf.hh
index 6a326d25a..a484a3179 100644
--- a/src/base/timebuf.hh
+++ b/src/base/timebuf.hh
@@ -25,7 +25,8 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Kevin Lim
+ * Authors: Nathan Binkert
+ *          Kevin Lim
  */
 
 #ifndef __BASE_TIMEBUF_HH__
@@ -214,6 +215,11 @@ class TimeBuffer
     {
         return wire(this, 0);
     }
+
+    int getSize()
+    {
+        return size;
+    }
 };
 
 #endif // __BASE_TIMEBUF_HH__
diff --git a/src/base/trace.cc b/src/base/trace.cc
index 50426b992..9fa615f4d 100644
--- a/src/base/trace.cc
+++ b/src/base/trace.cc
@@ -247,7 +247,6 @@ DebugOut()
 //
 // Dump trace buffer to specified file (cout if NULL)
 //
-extern "C"
 void
 dumpTrace(const char *filename)
 {
@@ -269,7 +268,6 @@ dumpTrace(const char *filename)
 // same facility as the "trace to file" feature, and will print error
 // messages rather than clobbering an existing ostream pointer.
 //
-extern "C"
 void
 echoTrace(bool on)
 {
@@ -289,7 +287,6 @@ echoTrace(bool on)
     }
 }
 
-extern "C"
 void
 printTraceFlags()
 {
@@ -338,14 +335,12 @@ tweakTraceFlag(const char *string, bool value)
     cprintf("could not find flag %s\n", string);
 }
 
-extern "C"
 void
 setTraceFlag(const char *string)
 {
     tweakTraceFlag(string, true);
 }
 
-extern "C"
 void
 clearTraceFlag(const char *string)
 {
diff --git a/src/base/traceflags.py b/src/base/traceflags.py
index 7ff68bcaf..27c24107c 100644
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@@ -48,8 +48,10 @@ ccfilename = sys.argv[1] + '.cc'
 # To define a new flag, simply add it to this list.
 #
 baseFlags = [
+    'Activity',
     'AlphaConsole',
     'BADADDR',
+    'BE',
     'BPredRAS',
     'Bus',
     'BusAddrRanges',
@@ -84,6 +86,7 @@ baseFlags = [
     'EthernetPIO',
     'EthernetSM',
     'Event',
+    'FE',
     'Fault',
     'Fetch',
     'Flow',
@@ -97,6 +100,7 @@ baseFlags = [
     'GDBSend',
     'GDBWrite',
     'HWPrefetch',
+    'IBE',
     'IEW',
     'IIC',
     'IICMore',
@@ -115,10 +119,8 @@ baseFlags = [
     'MSHR',
     'Mbox',
     'MemDepUnit',
+    'O3CPU',
     'OzoneCPU',
-    'FE',
-    'IBE',
-    'BE',
     'OzoneLSQ',
     'PCEvent',
     'PCIA',
@@ -132,6 +134,7 @@ baseFlags = [
     'RenameMap',
     'SQL',
     'Sampler',
+    'Scoreboard',
     'ScsiCtrl',
     'ScsiDisk',
     'ScsiNone',
@@ -155,8 +158,6 @@ baseFlags = [
     'Uart',
     'VtoPhys',
     'WriteBarrier',
-    'Activity',
-    'Scoreboard',
     'Writeback',
     ]
 
@@ -175,7 +176,7 @@ compoundFlagMap = {
     'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ],
     'EthernetNoData' : [ 'Ethernet', 'EthernetPIO', 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ],
     'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ],
-    'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'Activity','Scoreboard','Writeback'],
+    'O3CPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'O3CPU', 'Activity','Scoreboard','Writeback'],
     'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU']
 }
 
diff --git a/src/cpu/SConscript b/src/cpu/SConscript
index 34bad132c..bc4ec7923 100644
--- a/src/cpu/SConscript
+++ b/src/cpu/SConscript
@@ -68,6 +68,13 @@ mem_comp_sig_template = '''
 virtual Fault completeAcc(uint8_t *data, %s *xc, Trace::InstRecord *traceData) const { panic("Not defined!"); return NoFault; };
 '''
 
+# Generate a temporary CPU list, including the CheckerCPU if
+# it's enabled.  This isn't used for anything else other than StaticInst
+# headers.
+temp_cpu_list = env['CPU_MODELS']
+if env['USE_CHECKER']:
+    temp_cpu_list.append('CheckerCPU')
+
 # Generate header.  
 def gen_cpu_exec_signatures(target, source, env):
     f = open(str(target[0]), 'w')
@@ -75,7 +82,7 @@ def gen_cpu_exec_signatures(target, source, env):
 #ifndef __CPU_STATIC_INST_EXEC_SIGS_HH__
 #define __CPU_STATIC_INST_EXEC_SIGS_HH__
 '''
-    for cpu in env['CPU_MODELS']:
+    for cpu in temp_cpu_list:
         xc_type = CpuModel.dict[cpu].strings['CPU_exec_context']
         print >> f, exec_sig_template % (xc_type, xc_type, xc_type)
     print >> f, '''
@@ -85,12 +92,19 @@ def gen_cpu_exec_signatures(target, source, env):
 # Generate string that gets printed when header is rebuilt
 def gen_sigs_string(target, source, env):
     return "Generating static_inst_exec_sigs.hh: " \
-           + ', '.join(env['CPU_MODELS'])
+           + ', '.join(temp_cpu_list)
 
 # Add command to generate header to environment.
 env.Command('static_inst_exec_sigs.hh', models_db,
             Action(gen_cpu_exec_signatures, gen_sigs_string,
-                   varlist = ['CPU_MODELS']))
+                   varlist = temp_cpu_list))
+
+env.Depends('static_inst_exec_sigs.hh', Value(env['USE_CHECKER']))
+env.Depends('static_inst_exec_sigs.hh', Value(env['CPU_MODELS']))
+
+# List of suppported CPUs by the Checker.  Errors out if USE_CHECKER=True
+# and one of these are not being used.
+CheckerSupportedCPUList = ['O3CPU', 'OzoneCPU']
 
 #################################################################
 #
@@ -116,15 +130,13 @@ if need_simple_base:
 if 'FastCPU' in env['CPU_MODELS']:
     sources += Split('fast/cpu.cc')
 
-if 'AlphaFullCPU' in env['CPU_MODELS']:
+need_bp_unit = False
+if 'O3CPU' in env['CPU_MODELS']:
+    need_bp_unit = True
+    sources += SConscript('o3/SConscript', exports = 'env')
     sources += Split('''
-        base_dyn_inst.cc
-        o3/2bit_local_pred.cc
-        o3/alpha_dyn_inst.cc
-        o3/alpha_cpu.cc
-        o3/alpha_cpu_builder.cc
+        o3/base_dyn_inst.cc
         o3/bpred_unit.cc
-        o3/btb.cc
         o3/commit.cc
         o3/decode.cc
         o3/fetch.cc
@@ -136,40 +148,54 @@ if 'AlphaFullCPU' in env['CPU_MODELS']:
         o3/lsq_unit.cc
         o3/lsq.cc
         o3/mem_dep_unit.cc
-        o3/ras.cc
         o3/rename.cc
         o3/rename_map.cc
         o3/rob.cc
         o3/scoreboard.cc
         o3/store_set.cc
-        o3/tournament_pred.cc
         ''')
+    if env['USE_CHECKER']:
+        sources += Split('o3/checker_builder.cc')
 
-if 'OzoneSimpleCPU' in env['CPU_MODELS']:
+if 'OzoneCPU' in env['CPU_MODELS']:
+    need_bp_unit = True
     sources += Split('''
+        ozone/base_dyn_inst.cc
+        ozone/bpred_unit.cc
         ozone/cpu.cc
         ozone/cpu_builder.cc
         ozone/dyn_inst.cc
         ozone/front_end.cc
-        ozone/inorder_back_end.cc
-        ozone/inst_queue.cc
-        ozone/rename_table.cc
-        ''')
-
-if 'OzoneCPU' in env['CPU_MODELS']:
-    sources += Split('''
-        ozone/lsq_unit.cc
         ozone/lw_back_end.cc
         ozone/lw_lsq.cc
+        ozone/rename_table.cc
         ''')
+    if env['USE_CHECKER']:
+        sources += Split('ozone/checker_builder.cc')
 
-if 'CheckerCPU' in env['CPU_MODELS']:
+if need_bp_unit:
     sources += Split('''
-        checker/cpu.cc
-        checker/o3_cpu_builder.cc
+        o3/2bit_local_pred.cc
+        o3/btb.cc
+        o3/ras.cc
+        o3/tournament_pred.cc
         ''')
 
-# FullCPU sources are included from m5/SConscript since they're not
+if env['USE_CHECKER']:
+    sources += Split('checker/cpu.cc')
+    checker_supports = False
+    for i in CheckerSupportedCPUList:
+        if i in env['CPU_MODELS']:
+            checker_supports = True
+    if not checker_supports:
+        print "Checker only supports CPU models",
+        for i in CheckerSupportedCPUList:
+            print i,
+        print ", please set USE_CHECKER=False or use one of those CPU models"              
+        Exit(1)
+
+
+# FullCPU sources are included from src/SConscript since they're not
 # below this point in the file hierarchy.
 
 # Convert file names to SCons File objects.  This takes care of the
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 55c04c498..ce440aeff 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -41,7 +41,6 @@
 #include "cpu/cpuevent.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/profile.hh"
-#include "cpu/sampler/sampler.hh"
 #include "sim/param.hh"
 #include "sim/process.hh"
 #include "sim/sim_events.hh"
@@ -60,11 +59,11 @@ int maxThreadsPerCPU = 1;
 
 #if FULL_SYSTEM
 BaseCPU::BaseCPU(Params *p)
-    : SimObject(p->name), clock(p->clock), checkInterrupts(true),
+    : MemObject(p->name), clock(p->clock), checkInterrupts(true),
       params(p), number_of_threads(p->numberOfThreads), system(p->system)
 #else
 BaseCPU::BaseCPU(Params *p)
-    : SimObject(p->name), clock(p->clock), params(p),
+    : MemObject(p->name), clock(p->clock), params(p),
       number_of_threads(p->numberOfThreads), system(p->system)
 #endif
 {
@@ -237,7 +236,7 @@ BaseCPU::registerThreadContexts()
 
 
 void
-BaseCPU::switchOut(Sampler *sampler)
+BaseCPU::switchOut()
 {
     panic("This CPU doesn't support sampling!");
 }
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 43122f238..2be6e4e81 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -36,17 +36,17 @@
 
 #include "base/statistics.hh"
 #include "config/full_system.hh"
-#include "cpu/sampler/sampler.hh"
 #include "sim/eventq.hh"
-#include "sim/sim_object.hh"
+#include "mem/mem_object.hh"
 #include "arch/isa_traits.hh"
 
 class BranchPred;
 class CheckerCPU;
 class ThreadContext;
 class System;
+class Port;
 
-class BaseCPU : public SimObject
+class BaseCPU : public MemObject
 {
   protected:
     // CPU's clock period in terms of the number of ticks of curTime.
@@ -148,7 +148,7 @@ class BaseCPU : public SimObject
 
     /// Prepare for another CPU to take over execution.  When it is
     /// is ready (drained pipe) it signals the sampler.
-    virtual void switchOut(Sampler *);
+    virtual void switchOut();
 
     /// Take over execution from the given CPU.  Used for warm-up and
     /// sampling.
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 948ee058a..9cc61f74c 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,7 @@
 #ifndef __CPU_BASE_DYN_INST_HH__
 #define __CPU_BASE_DYN_INST_HH__
 
+#include <bitset>
 #include <list>
 #include <string>
 
@@ -44,12 +45,6 @@
 #include "cpu/static_inst.hh"
 #include "mem/packet.hh"
 #include "sim/system.hh"
-/*
-#include "encumbered/cpu/full/bpred_update.hh"
-#include "encumbered/cpu/full/spec_memory.hh"
-#include "encumbered/cpu/full/spec_state.hh"
-#include "encumbered/mem/functional/main.hh"
-*/
 
 /**
  * @file
@@ -64,8 +59,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
 {
   public:
     // Typedef for the CPU.
-    typedef typename Impl::FullCPU FullCPU;
-    typedef typename FullCPU::ImplState ImplState;
+    typedef typename Impl::CPUType ImplCPU;
+    typedef typename ImplCPU::ImplState ImplState;
 
     // Binary machine instruction type.
     typedef TheISA::MachInst MachInst;
@@ -132,56 +127,34 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** The sequence number of the instruction. */
     InstSeqNum seqNum;
 
-    /** Is the instruction in the IQ */
-    bool iqEntry;
-
-    /** Is the instruction in the ROB */
-    bool robEntry;
-
-    /** Is the instruction in the LSQ */
-    bool lsqEntry;
-
-    /** Is the instruction completed. */
-    bool completed;
-
-    /** Is the instruction's result ready. */
-    bool resultReady;
-
-    /** Can this instruction issue. */
-    bool canIssue;
-
-    /** Has this instruction issued. */
-    bool issued;
-
-    /** Has this instruction executed (or made it through execute) yet. */
-    bool executed;
-
-    /** Can this instruction commit. */
-    bool canCommit;
-
-    /** Is this instruction committed. */
-    bool committed;
-
-    /** Is this instruction squashed. */
-    bool squashed;
-
-    /** Is this instruction squashed in the instruction queue. */
-    bool squashedInIQ;
-
-    /** Is this instruction squashed in the instruction queue. */
-    bool squashedInLSQ;
-
-    /** Is this instruction squashed in the instruction queue. */
-    bool squashedInROB;
-
-    /** Is this a recover instruction. */
-    bool recoverInst;
-
-    /** Is this a thread blocking instruction. */
-    bool blockingInst;	/* this inst has called thread_block() */
+    enum Status {
+        IqEntry,                 /// Instruction is in the IQ
+        RobEntry,                /// Instruction is in the ROB
+        LsqEntry,                /// Instruction is in the LSQ
+        Completed,               /// Instruction has completed
+        ResultReady,             /// Instruction has its result
+        CanIssue,                /// Instruction can issue and execute
+        Issued,                  /// Instruction has issued
+        Executed,                /// Instruction has executed
+        CanCommit,               /// Instruction can commit
+        AtCommit,                /// Instruction has reached commit
+        Committed,               /// Instruction has committed
+        Squashed,                /// Instruction is squashed
+        SquashedInIQ,            /// Instruction is squashed in the IQ
+        SquashedInLSQ,           /// Instruction is squashed in the LSQ
+        SquashedInROB,           /// Instruction is squashed in the ROB
+        RecoverInst,             /// Is a recover instruction
+        BlockingInst,            /// Is a blocking instruction
+        ThreadsyncWait,          /// Is a thread synchronization instruction
+        SerializeBefore,         /// Needs to serialize on
+                                 /// instructions ahead of it
+        SerializeAfter,          /// Needs to serialize instructions behind it
+        SerializeHandled,        /// Serialization has been handled
+        NumStatus
+    };
 
-    /** Is this a thread syncrhonization instruction. */
-    bool threadsyncWait;
+    /** The status of this BaseDynInst.  Several bits can be set. */
+    std::bitset<NumStatus> status;
 
     /** The thread this instruction is from. */
     short threadNumber;
@@ -192,8 +165,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** How many source registers are ready. */
     unsigned readyRegs;
 
-    /** Pointer to the FullCPU object. */
-    FullCPU *cpu;
+    /** Pointer to the Impl's CPU object. */
+    ImplCPU *cpu;
 
     /** Pointer to the thread state. */
     ImplState *thread;
@@ -202,10 +175,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
     Fault fault;
 
     /** The memory request. */
-//    MemReqPtr req;
     Request *req;
-//    Packet pkt;
 
+    /** Pointer to the data for the memory access. */
     uint8_t *memData;
 
     /** The effective virtual address (lds & stores only). */
@@ -223,12 +195,6 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** The memory request flags (from translation). */
     unsigned memReqFlags;
 
-    /** The size of the data to be stored. */
-    int storeSize;
-
-    /** The data to be stored. */
-    IntReg storeData;
-
     union Result {
         uint64_t integer;
         float fp;
@@ -273,7 +239,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
      *  @param cpu Pointer to the instruction's CPU.
      */
     BaseDynInst(ExtMachInst inst, Addr PC, Addr pred_PC, InstSeqNum seq_num,
-                FullCPU *cpu);
+                ImplCPU *cpu);
 
     /** BaseDynInst constructor given a StaticInst pointer.
      *  @param _staticInst The StaticInst for this BaseDynInst.
@@ -288,21 +254,6 @@ class BaseDynInst : public FastAlloc, public RefCounted
     void initVars();
 
   public:
-    /**
-     *  @todo: Make this function work; currently it is a dummy function.
-     *  @param fault Last fault.
-     *  @param cmd Last command.
-     *  @param addr Virtual address of access.
-     *  @param p Memory accessed.
-     *  @param nbytes Access size.
-     */
-//    void
-//    trace_mem(Fault fault,
-//	      MemCmd cmd,
-//	      Addr addr,
-//	      void *p,
-//	      int nbytes);
-
     /** Dumps out contents of this BaseDynInst. */
     void dump();
 
@@ -360,9 +311,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
     bool isThreadSync()   const { return staticInst->isThreadSync(); }
     bool isSerializing()  const { return staticInst->isSerializing(); }
     bool isSerializeBefore() const
-    { return staticInst->isSerializeBefore() || serializeBefore; }
+    { return staticInst->isSerializeBefore() || status[SerializeBefore]; }
     bool isSerializeAfter() const
-    { return staticInst->isSerializeAfter() || serializeAfter; }
+    { return staticInst->isSerializeAfter() || status[SerializeAfter]; }
     bool isMemBarrier()   const { return staticInst->isMemBarrier(); }
     bool isWriteBarrier() const { return staticInst->isWriteBarrier(); }
     bool isNonSpeculative() const { return staticInst->isNonSpeculative(); }
@@ -371,41 +322,32 @@ class BaseDynInst : public FastAlloc, public RefCounted
     bool isUnverifiable() const { return staticInst->isUnverifiable(); }
 
     /** Temporarily sets this instruction as a serialize before instruction. */
-    void setSerializeBefore() { serializeBefore = true; }
+    void setSerializeBefore() { status.set(SerializeBefore); }
 
     /** Clears the serializeBefore part of this instruction. */
-    void clearSerializeBefore() { serializeBefore = false; }
+    void clearSerializeBefore() { status.reset(SerializeBefore); }
 
     /** Checks if this serializeBefore is only temporarily set. */
-    bool isTempSerializeBefore() { return serializeBefore; }
-
-    /** Tracks if instruction has been externally set as serializeBefore. */
-    bool serializeBefore;
+    bool isTempSerializeBefore() { return status[SerializeBefore]; }
 
     /** Temporarily sets this instruction as a serialize after instruction. */
-    void setSerializeAfter() { serializeAfter = true; }
+    void setSerializeAfter() { status.set(SerializeAfter); }
 
     /** Clears the serializeAfter part of this instruction.*/
-    void clearSerializeAfter() { serializeAfter = false; }
+    void clearSerializeAfter() { status.reset(SerializeAfter); }
 
     /** Checks if this serializeAfter is only temporarily set. */
-    bool isTempSerializeAfter() { return serializeAfter; }
+    bool isTempSerializeAfter() { return status[SerializeAfter]; }
 
-    /** Tracks if instruction has been externally set as serializeAfter. */
-    bool serializeAfter;
+    /** Sets the serialization part of this instruction as handled. */
+    void setSerializeHandled() { status.set(SerializeHandled); }
 
     /** Checks if the serialization part of this instruction has been
      *  handled.  This does not apply to the temporary serializing
      *  state; it only applies to this instruction's own permanent
      *  serializing state.
      */
-    bool isSerializeHandled() { return serializeHandled; }
-
-    /** Sets the serialization part of this instruction as handled. */
-    void setSerializeHandled() { serializeHandled = true; }
-
-    /** Whether or not the serialization of this instruction has been handled. */
-    bool serializeHandled;
+    bool isSerializeHandled() { return status[SerializeHandled]; }
 
     /** Returns the opclass of this instruction. */
     OpClass opClass() const { return staticInst->opClass(); }
@@ -439,11 +381,13 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Returns the result of a floating point (double) instruction. */
     double readDoubleResult() { return instResult.dbl; }
 
+    /** Records an integer register being set to a value. */
     void setIntReg(const StaticInst *si, int idx, uint64_t val)
     {
         instResult.integer = val;
     }
 
+    /** Records an fp register being set to a value. */
     void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width)
     {
         if (width == 32)
@@ -454,16 +398,19 @@ class BaseDynInst : public FastAlloc, public RefCounted
             panic("Unsupported width!");
     }
 
+    /** Records an fp register being set to a value. */
     void setFloatReg(const StaticInst *si, int idx, FloatReg val)
     {
         instResult.fp = val;
     }
 
+    /** Records an fp register being set to an integer value. */
     void setFloatRegBits(const StaticInst *si, int idx, uint64_t val, int width)
     {
         instResult.integer = val;
     }
 
+    /** Records an fp register being set to an integer value. */
     void setFloatRegBits(const StaticInst *si, int idx, uint64_t val)
     {
         instResult.integer = val;
@@ -482,106 +429,112 @@ class BaseDynInst : public FastAlloc, public RefCounted
     }
 
     /** Sets this instruction as completed. */
-    void setCompleted() { completed = true; }
+    void setCompleted() { status.set(Completed); }
 
     /** Returns whether or not this instruction is completed. */
-    bool isCompleted() const { return completed; }
+    bool isCompleted() const { return status[Completed]; }
 
-    void setResultReady() { resultReady = true; }
+    /** Marks the result as ready. */
+    void setResultReady() { status.set(ResultReady); }
 
-    bool isResultReady() const { return resultReady; }
+    /** Returns whether or not the result is ready. */
+    bool isResultReady() const { return status[ResultReady]; }
 
     /** Sets this instruction as ready to issue. */
-    void setCanIssue() { canIssue = true; }
+    void setCanIssue() { status.set(CanIssue); }
 
     /** Returns whether or not this instruction is ready to issue. */
-    bool readyToIssue() const { return canIssue; }
+    bool readyToIssue() const { return status[CanIssue]; }
 
     /** Sets this instruction as issued from the IQ. */
-    void setIssued() { issued = true; }
+    void setIssued() { status.set(Issued); }
 
     /** Returns whether or not this instruction has issued. */
-    bool isIssued() const { return issued; }
+    bool isIssued() const { return status[Issued]; }
 
     /** Sets this instruction as executed. */
-    void setExecuted() { executed = true; }
+    void setExecuted() { status.set(Executed); }
 
     /** Returns whether or not this instruction has executed. */
-    bool isExecuted() const { return executed; }
+    bool isExecuted() const { return status[Executed]; }
 
     /** Sets this instruction as ready to commit. */
-    void setCanCommit() { canCommit = true; }
+    void setCanCommit() { status.set(CanCommit); }
 
     /** Clears this instruction as being ready to commit. */
-    void clearCanCommit() { canCommit = false; }
+    void clearCanCommit() { status.reset(CanCommit); }
 
     /** Returns whether or not this instruction is ready to commit. */
-    bool readyToCommit() const { return canCommit; }
+    bool readyToCommit() const { return status[CanCommit]; }
+
+    void setAtCommit() { status.set(AtCommit); }
+
+    bool isAtCommit() { return status[AtCommit]; }
 
     /** Sets this instruction as committed. */
-    void setCommitted() { committed = true; }
+    void setCommitted() { status.set(Committed); }
 
     /** Returns whether or not this instruction is committed. */
-    bool isCommitted() const { return committed; }
+    bool isCommitted() const { return status[Committed]; }
 
     /** Sets this instruction as squashed. */
-    void setSquashed() { squashed = true; }
+    void setSquashed() { status.set(Squashed); }
 
     /** Returns whether or not this instruction is squashed. */
-    bool isSquashed() const { return squashed; }
+    bool isSquashed() const { return status[Squashed]; }
 
     //Instruction Queue Entry
     //-----------------------
     /** Sets this instruction as a entry the IQ. */
-    void setInIQ() { iqEntry = true; }
+    void setInIQ() { status.set(IqEntry); }
 
     /** Sets this instruction as a entry the IQ. */
-    void removeInIQ() { iqEntry = false; }
+    void clearInIQ() { status.reset(IqEntry); }
+
+    /** Returns whether or not this instruction has issued. */
+    bool isInIQ() const { return status[IqEntry]; }
 
     /** Sets this instruction as squashed in the IQ. */
-    void setSquashedInIQ() { squashedInIQ = true; squashed = true;}
+    void setSquashedInIQ() { status.set(SquashedInIQ); status.set(Squashed);}
 
     /** Returns whether or not this instruction is squashed in the IQ. */
-    bool isSquashedInIQ() const { return squashedInIQ; }
-
-    /** Returns whether or not this instruction has issued. */
-    bool isInIQ() const { return iqEntry; }
+    bool isSquashedInIQ() const { return status[SquashedInIQ]; }
 
 
     //Load / Store Queue Functions
     //-----------------------
     /** Sets this instruction as a entry the LSQ. */
-    void setInLSQ() { lsqEntry = true; }
+    void setInLSQ() { status.set(LsqEntry); }
 
     /** Sets this instruction as a entry the LSQ. */
-    void removeInLSQ() { lsqEntry = false; }
+    void removeInLSQ() { status.reset(LsqEntry); }
+
+    /** Returns whether or not this instruction is in the LSQ. */
+    bool isInLSQ() const { return status[LsqEntry]; }
 
     /** Sets this instruction as squashed in the LSQ. */
-    void setSquashedInLSQ() { squashedInLSQ = true;}
+    void setSquashedInLSQ() { status.set(SquashedInLSQ);}
 
     /** Returns whether or not this instruction is squashed in the LSQ. */
-    bool isSquashedInLSQ() const { return squashedInLSQ; }
-
-    /** Returns whether or not this instruction is in the LSQ. */
-    bool isInLSQ() const { return lsqEntry; }
+    bool isSquashedInLSQ() const { return status[SquashedInLSQ]; }
 
 
     //Reorder Buffer Functions
     //-----------------------
     /** Sets this instruction as a entry the ROB. */
-    void setInROB() { robEntry = true; }
+    void setInROB() { status.set(RobEntry); }
 
     /** Sets this instruction as a entry the ROB. */
-    void removeInROB() { robEntry = false; }
+    void clearInROB() { status.reset(RobEntry); }
+
+    /** Returns whether or not this instruction is in the ROB. */
+    bool isInROB() const { return status[RobEntry]; }
 
     /** Sets this instruction as squashed in the ROB. */
-    void setSquashedInROB() { squashedInROB = true; }
+    void setSquashedInROB() { status.set(SquashedInROB); }
 
     /** Returns whether or not this instruction is squashed in the ROB. */
-    bool isSquashedInROB() const { return squashedInROB; }
-
-    /** Returns whether or not this instruction is in the ROB. */
-    bool isInROB() const { return robEntry; }
+    bool isSquashedInROB() const { return status[SquashedInROB]; }
 
     /** Read the PC of this instruction. */
     const Addr readPC() const { return PC; }
@@ -590,17 +543,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
     void setNextPC(uint64_t val)
     {
         nextPC = val;
-//        instResult.integer = val;
     }
 
+    /** Sets the ASID. */
     void setASID(short addr_space_id) { asid = addr_space_id; }
 
-    void setThread(unsigned tid) { threadNumber = tid; }
+    /** Sets the thread id. */
+    void setTid(unsigned tid) { threadNumber = tid; }
 
-    void setState(ImplState *state) { thread = state; }
+    /** Sets the pointer to the thread state. */
+    void setThreadState(ImplState *state) { thread = state; }
 
-    /** Returns the thread context.
-     */
+    /** Returns the thread context. */
     ThreadContext *tcBase() { return thread->getTC(); }
 
   private:
@@ -637,8 +591,6 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Store queue index. */
     int16_t sqIdx;
 
-    bool reachedCommit;
-
     /** Iterator pointing to this BaseDynInst in the list of all insts. */
     ListIt instListIt;
 
diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst_impl.hh
index 30fa10a6b..91424faad 100644
--- a/src/cpu/base_dyn_inst.cc
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -41,10 +41,6 @@
 #include "mem/request.hh"
 
 #include "cpu/base_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
-#include "cpu/o3/alpha_cpu.hh"
-//#include "cpu/ozone/simple_impl.hh"
-//#include "cpu/ozone/ozone_impl.hh"
 
 using namespace std;
 using namespace TheISA;
@@ -71,8 +67,8 @@ my_hash_t thishash;
 template <class Impl>
 BaseDynInst<Impl>::BaseDynInst(ExtMachInst machInst, Addr inst_PC,
                                Addr pred_PC, InstSeqNum seq_num,
-                               FullCPU *cpu)
-  : staticInst(machInst), traceData(NULL), cpu(cpu)/*, xc(cpu->xcBase())*/
+                               ImplCPU *cpu)
+  : staticInst(machInst), traceData(NULL), cpu(cpu)
 {
     seqNum = seq_num;
 
@@ -99,37 +95,18 @@ BaseDynInst<Impl>::initVars()
     memData = NULL;
     effAddr = 0;
     physEffAddr = 0;
-    storeSize = 0;
 
     readyRegs = 0;
 
-    // May want to turn this into a bit vector or something.
-    completed = false;
-    resultReady = false;
-    canIssue = false;
-    issued = false;
-    executed = false;
-    canCommit = false;
-    committed = false;
-    squashed = false;
-    squashedInIQ = false;
-    squashedInLSQ = false;
-    squashedInROB = false;
+    instResult.integer = 0;
+
+    status.reset();
+
     eaCalcDone = false;
     memOpDone = false;
+
     lqIdx = -1;
     sqIdx = -1;
-    reachedCommit = false;
-
-    blockingInst = false;
-    recoverInst = false;
-
-    iqEntry = false;
-    robEntry = false;
-
-    serializeBefore = false;
-    serializeAfter = false;
-    serializeHandled = false;
 
     // Eventually make this a parameter.
     threadNumber = 0;
@@ -242,31 +219,7 @@ template <class Impl>
 void
 BaseDynInst<Impl>::writeHint(Addr addr, int size, unsigned flags)
 {
-    // Need to create a MemReq here so we can do a translation.  This
-    // will casue a TLB miss trap if necessary... not sure whether
-    // that's the best thing to do or not.  We don't really need the
-    // MemReq otherwise, since wh64 has no functional effect.
-/*
-    MemReqPtr req = new MemReq(addr, thread->getXCProxy(), size, flags);
-    req->asid = asid;
-
-    fault = cpu->translateDataWriteReq(req);
-
-    if (fault == NoFault && !(req->flags & UNCACHEABLE)) {
-        // Record key MemReq parameters so we can generate another one
-        // just like it for the timing access without calling translate()
-        // again (which might mess up the TLB).
-        effAddr = req->vaddr;
-        physEffAddr = req->paddr;
-        memReqFlags = req->flags;
-    } else {
-        // ignore faults & accesses to uncacheable space... treat as no-op
-        effAddr = physEffAddr = MemReq::inval_addr;
-    }
-
-    storeSize = size;
-    storeData = 0;
-*/
+    // Not currently supported.
 }
 
 /**
@@ -276,22 +229,7 @@ template <class Impl>
 Fault
 BaseDynInst<Impl>::copySrcTranslate(Addr src)
 {
-/*
-    MemReqPtr req = new MemReq(src, thread->getXCProxy(), 64);
-    req->asid = asid;
-
-    // translate to physical address
-    Fault fault = cpu->translateDataReadReq(req);
-
-    if (fault == NoFault) {
-        thread->copySrcAddr = src;
-        thread->copySrcPhysAddr = req->paddr;
-    } else {
-        thread->copySrcAddr = 0;
-        thread->copySrcPhysAddr = 0;
-    }
-    return fault;
-*/
+    // Not currently supported.
     return NoFault;
 }
 
@@ -302,26 +240,7 @@ template <class Impl>
 Fault
 BaseDynInst<Impl>::copy(Addr dest)
 {
-/*
-    uint8_t data[64];
-    FunctionalMemory *mem = thread->mem;
-    assert(thread->copySrcPhysAddr);
-    MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64);
-    req->asid = asid;
-
-    // translate to physical address
-    Fault fault = cpu->translateDataWriteReq(req);
-
-    if (fault == NoFault) {
-        Addr dest_addr = req->paddr;
-        // Need to read straight from memory since we have more than 8 bytes.
-        req->paddr = thread->copySrcPhysAddr;
-        mem->read(req, data);
-        req->paddr = dest_addr;
-        mem->write(req, data);
-    }
-    return fault;
-*/
+    // Not currently supported.
     return NoFault;
 }
 
@@ -350,7 +269,7 @@ void
 BaseDynInst<Impl>::markSrcRegReady()
 {
     if (++readyRegs == numSrcRegs()) {
-        canIssue = true;
+        status.set(CanIssue);
     }
 }
 
@@ -358,13 +277,9 @@ template <class Impl>
 void
 BaseDynInst<Impl>::markSrcRegReady(RegIndex src_idx)
 {
-    ++readyRegs;
-
     _readySrcRegIdx[src_idx] = true;
 
-    if (readyRegs == numSrcRegs()) {
-        canIssue = true;
-    }
+    markSrcRegReady();
 }
 
 template <class Impl>
@@ -382,25 +297,3 @@ BaseDynInst<Impl>::eaSrcsReady()
 
     return true;
 }
-
-// Forward declaration
-template class BaseDynInst<AlphaSimpleImpl>;
-
-template <>
-int
-BaseDynInst<AlphaSimpleImpl>::instcount = 0;
-/*
-// Forward declaration
-template class BaseDynInst<SimpleImpl>;
-
-template <>
-int
-BaseDynInst<SimpleImpl>::instcount = 0;
-
-// Forward declaration
-template class BaseDynInst<OzoneImpl>;
-
-template <>
-int
-BaseDynInst<OzoneImpl>::instcount = 0;
-*/
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index ebc02f7be..1540a6b94 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -31,27 +31,17 @@
 #include <list>
 #include <string>
 
-#include "base/refcnt.hh"
 #include "cpu/base.hh"
-#include "cpu/base_dyn_inst.hh"
 #include "cpu/checker/cpu.hh"
 #include "cpu/simple_thread.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/static_inst.hh"
+#include "mem/packet_impl.hh"
 #include "sim/byteswap.hh"
-#include "sim/sim_object.hh"
-#include "sim/stats.hh"
-
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
-
-//#include "cpu/ozone/dyn_inst.hh"
-//#include "cpu/ozone/ozone_impl.hh"
-//#include "cpu/ozone/simple_impl.hh"
 
 #if FULL_SYSTEM
-#include "sim/system.hh"
 #include "arch/vtophys.hh"
+#include "kern/kernel_stats.hh"
 #endif // FULL_SYSTEM
 
 using namespace std;
@@ -77,6 +67,7 @@ CheckerCPU::CheckerCPU(Params *p)
     changedPC = willChangePC = changedNextPC = false;
 
     exitOnError = p->exitOnError;
+    warnOnlyOnLoadError = p->warnOnlyOnLoadError;
 #if FULL_SYSTEM
     itb = p->itb;
     dtb = p->dtb;
@@ -84,6 +75,8 @@ CheckerCPU::CheckerCPU(Params *p)
 #else
     process = p->process;
 #endif
+
+    result.integer = 0;
 }
 
 CheckerCPU::~CheckerCPU()
@@ -406,379 +399,10 @@ CheckerCPU::checkFlags(Request *req)
     }
 }
 
-template <class DynInstPtr>
 void
-Checker<DynInstPtr>::tick(DynInstPtr &completed_inst)
+CheckerCPU::dumpAndExit()
 {
-    DynInstPtr inst;
-
-    // Either check this instruction, or add it to a list of
-    // instructions waiting to be checked.  Instructions must be
-    // checked in program order, so if a store has committed yet not
-    // completed, there may be some instructions that are waiting
-    // behind it that have completed and must be checked.
-    if (!instList.empty()) {
-        if (youngestSN < completed_inst->seqNum) {
-            DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n",
-                    completed_inst->seqNum, completed_inst->readPC());
-            instList.push_back(completed_inst);
-            youngestSN = completed_inst->seqNum;
-        }
-
-        if (!instList.front()->isCompleted()) {
-            return;
-        } else {
-            inst = instList.front();
-            instList.pop_front();
-        }
-    } else {
-        if (!completed_inst->isCompleted()) {
-            if (youngestSN < completed_inst->seqNum) {
-                DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n",
-                        completed_inst->seqNum, completed_inst->readPC());
-                instList.push_back(completed_inst);
-                youngestSN = completed_inst->seqNum;
-            }
-            return;
-        } else {
-            if (youngestSN < completed_inst->seqNum) {
-                inst = completed_inst;
-                youngestSN = completed_inst->seqNum;
-            } else {
-                return;
-            }
-        }
-    }
-
-    // Try to check all instructions that are completed, ending if we
-    // run out of instructions to check or if an instruction is not
-    // yet completed.
-    while (1) {
-        DPRINTF(Checker, "Processing instruction [sn:%lli] PC:%#x.\n",
-                inst->seqNum, inst->readPC());
-        unverifiedResult.integer = inst->readIntResult();
-        unverifiedReq = inst->req;
-        unverifiedMemData = inst->memData;
-        numCycles++;
-
-        Fault fault = NoFault;
-
-        // maintain $r0 semantics
-        thread->setIntReg(ZeroReg, 0);
-#ifdef TARGET_ALPHA
-        thread->setFloatRegDouble(ZeroReg, 0.0);
-#endif // TARGET_ALPHA
-
-        // Check if any recent PC changes match up with anything we
-        // expect to happen.  This is mostly to check if traps or
-        // PC-based events have occurred in both the checker and CPU.
-        if (changedPC) {
-            DPRINTF(Checker, "Changed PC recently to %#x\n",
-                    thread->readPC());
-            if (willChangePC) {
-                if (newPC == thread->readPC()) {
-                    DPRINTF(Checker, "Changed PC matches expected PC\n");
-                } else {
-                    warn("%lli: Changed PC does not match expected PC, "
-                         "changed: %#x, expected: %#x",
-                         curTick, thread->readPC(), newPC);
-                    handleError();
-                }
-                willChangePC = false;
-            }
-            changedPC = false;
-        }
-        if (changedNextPC) {
-            DPRINTF(Checker, "Changed NextPC recently to %#x\n",
-                    thread->readNextPC());
-            changedNextPC = false;
-        }
-
-        // Try to fetch the instruction
-
-#if FULL_SYSTEM
-#define IFETCH_FLAGS(pc)	((pc) & 1) ? PHYSICAL : 0
-#else
-#define IFETCH_FLAGS(pc)	0
-#endif
-
-        uint64_t fetch_PC = thread->readPC() & ~3;
-
-        // set up memory request for instruction fetch
-        memReq = new Request(inst->threadNumber, fetch_PC,
-                             sizeof(uint32_t),
-                             IFETCH_FLAGS(thread->readPC()),
-                             fetch_PC, thread->readCpuId(), inst->threadNumber);
-
-        bool succeeded = translateInstReq(memReq);
-
-        if (!succeeded) {
-            if (inst->getFault() == NoFault) {
-                // In this case the instruction was not a dummy
-                // instruction carrying an ITB fault.  In the single
-                // threaded case the ITB should still be able to
-                // translate this instruction; in the SMT case it's
-                // possible that its ITB entry was kicked out.
-                warn("%lli: Instruction PC %#x was not found in the ITB!",
-                     curTick, thread->readPC());
-                handleError();
-
-                // go to the next instruction
-                thread->setPC(thread->readNextPC());
-                thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
-
-                return;
-            } else {
-                // The instruction is carrying an ITB fault.  Handle
-                // the fault and see if our results match the CPU on
-                // the next tick().
-                fault = inst->getFault();
-            }
-        }
-
-        if (fault == NoFault) {
-            Packet *pkt = new Packet(memReq, Packet::ReadReq,
-                                     Packet::Broadcast);
-
-            pkt->dataStatic(&machInst);
-
-            icachePort->sendFunctional(pkt);
-
-            delete pkt;
-
-            // keep an instruction count
-            numInst++;
-
-            // decode the instruction
-            machInst = gtoh(machInst);
-            // Checks that the instruction matches what we expected it to be.
-            // Checks both the machine instruction and the PC.
-            validateInst(inst);
-
-            curStaticInst = StaticInst::decode(makeExtMI(machInst,
-                                                         thread->readPC()));
-
-#if FULL_SYSTEM
-            thread->setInst(machInst);
-#endif // FULL_SYSTEM
-
-            fault = inst->getFault();
-        }
-
-        // Discard fetch's memReq.
-        delete memReq;
-        memReq = NULL;
-
-        // Either the instruction was a fault and we should process the fault,
-        // or we should just go ahead execute the instruction.  This assumes
-        // that the instruction is properly marked as a fault.
-        if (fault == NoFault) {
-
-            thread->funcExeInst++;
-
-            fault = curStaticInst->execute(this, NULL);
-
-            // Checks to make sure instrution results are correct.
-            validateExecution(inst);
-
-            if (curStaticInst->isLoad()) {
-                ++numLoad;
-            }
-        }
-
-        if (fault != NoFault) {
-#if FULL_SYSTEM
-            fault->invoke(tc);
-            willChangePC = true;
-            newPC = thread->readPC();
-            DPRINTF(Checker, "Fault, PC is now %#x\n", newPC);
-#else // !FULL_SYSTEM
-            fatal("fault (%d) detected @ PC 0x%08p", fault, thread->readPC());
-#endif // FULL_SYSTEM
-        } else {
-#if THE_ISA != MIPS_ISA
-            // go to the next instruction
-            thread->setPC(thread->readNextPC());
-            thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
-#else
-            // go to the next instruction
-            thread->setPC(thread->readNextPC());
-            thread->setNextPC(thread->readNextNPC());
-            thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
-#endif
-
-        }
-
-#if FULL_SYSTEM
-        // @todo: Determine if these should happen only if the
-        // instruction hasn't faulted.  In the SimpleCPU case this may
-        // not be true, but in the O3 or Ozone case this may be true.
-        Addr oldpc;
-        int count = 0;
-        do {
-            oldpc = thread->readPC();
-            system->pcEventQueue.service(tc);
-            count++;
-        } while (oldpc != thread->readPC());
-        if (count > 1) {
-            willChangePC = true;
-            newPC = thread->readPC();
-            DPRINTF(Checker, "PC Event, PC is now %#x\n", newPC);
-        }
-#endif
-
-        // @todo:  Optionally can check all registers. (Or just those
-        // that have been modified).
-        validateState();
-
-        if (memReq) {
-            delete memReq;
-            memReq = NULL;
-        }
-
-        // Continue verifying instructions if there's another completed
-        // instruction waiting to be verified.
-        if (instList.empty()) {
-            break;
-        } else if (instList.front()->isCompleted()) {
-            inst = instList.front();
-            instList.pop_front();
-        } else {
-            break;
-        }
-    }
+    warn("%lli: Checker PC:%#x, next PC:%#x",
+         curTick, thread->readPC(), thread->readNextPC());
+    panic("Checker found an error!");
 }
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::switchOut(Sampler *s)
-{
-    instList.clear();
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::takeOverFrom(BaseCPU *oldCPU)
-{
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::validateInst(DynInstPtr &inst)
-{
-    if (inst->readPC() != thread->readPC()) {
-        warn("%lli: PCs do not match! Inst: %#x, checker: %#x",
-             curTick, inst->readPC(), thread->readPC());
-        if (changedPC) {
-            warn("%lli: Changed PCs recently, may not be an error",
-                 curTick);
-        } else {
-            handleError();
-        }
-    }
-
-    MachInst mi = static_cast<MachInst>(inst->staticInst->machInst);
-
-    if (mi != machInst) {
-        warn("%lli: Binary instructions do not match! Inst: %#x, "
-             "checker: %#x",
-             curTick, mi, machInst);
-        handleError();
-    }
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::validateExecution(DynInstPtr &inst)
-{
-    if (inst->numDestRegs()) {
-        // @todo: Support more destination registers.
-        if (inst->isUnverifiable()) {
-            // Unverifiable instructions assume they were executed
-            // properly by the CPU. Grab the result from the
-            // instruction and write it to the register.
-            RegIndex idx = inst->destRegIdx(0);
-            if (idx < TheISA::FP_Base_DepTag) {
-                thread->setIntReg(idx, inst->readIntResult());
-            } else if (idx < TheISA::Fpcr_DepTag) {
-                thread->setFloatRegBits(idx, inst->readIntResult());
-            } else {
-                thread->setMiscReg(idx, inst->readIntResult());
-            }
-        } else if (result.integer != inst->readIntResult()) {
-            warn("%lli: Instruction results do not match! (Values may not "
-                 "actually be integers) Inst: %#x, checker: %#x",
-                 curTick, inst->readIntResult(), result.integer);
-            handleError();
-        }
-    }
-
-    if (inst->readNextPC() != thread->readNextPC()) {
-        warn("%lli: Instruction next PCs do not match! Inst: %#x, "
-             "checker: %#x",
-             curTick, inst->readNextPC(), thread->readNextPC());
-        handleError();
-    }
-
-    // Checking side effect registers can be difficult if they are not
-    // checked simultaneously with the execution of the instruction.
-    // This is because other valid instructions may have modified
-    // these registers in the meantime, and their values are not
-    // stored within the DynInst.
-    while (!miscRegIdxs.empty()) {
-        int misc_reg_idx = miscRegIdxs.front();
-        miscRegIdxs.pop();
-
-        if (inst->tcBase()->readMiscReg(misc_reg_idx) !=
-            thread->readMiscReg(misc_reg_idx)) {
-            warn("%lli: Misc reg idx %i (side effect) does not match! "
-                 "Inst: %#x, checker: %#x",
-                 curTick, misc_reg_idx,
-                 inst->tcBase()->readMiscReg(misc_reg_idx),
-                 thread->readMiscReg(misc_reg_idx));
-            handleError();
-        }
-    }
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::validateState()
-{
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::dumpInsts()
-{
-    int num = 0;
-
-    InstListIt inst_list_it = --(instList.end());
-
-    cprintf("Inst list size: %i\n", instList.size());
-
-    while (inst_list_it != instList.end())
-    {
-        cprintf("Instruction:%i\n",
-                num);
-
-        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
-                "Completed:%i\n",
-                (*inst_list_it)->readPC(),
-                (*inst_list_it)->seqNum,
-                (*inst_list_it)->threadNumber,
-                (*inst_list_it)->isCompleted());
-
-        cprintf("\n");
-
-        inst_list_it--;
-        ++num;
-    }
-
-}
-
-//template
-//class Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >;
-
-template
-class Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >;
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index c9986d228..a508c56ba 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -66,7 +66,6 @@ class ThreadContext;
 class MemInterface;
 class Checkpoint;
 class Request;
-class Sampler;
 
 /**
  * CheckerCPU class.  Dynamically verifies instructions as they are
@@ -103,6 +102,7 @@ class CheckerCPU : public BaseCPU
         Process *process;
 #endif
         bool exitOnError;
+        bool warnOnlyOnLoadError;
     };
 
   public:
@@ -127,6 +127,12 @@ class CheckerCPU : public BaseCPU
 
     Port *dcachePort;
 
+    virtual Port *getPort(const std::string &name, int idx)
+    {
+        panic("Not supported on checker!");
+        return NULL;
+    }
+
   public:
     // Primary thread being run.
     SimpleThread *thread;
@@ -335,10 +341,13 @@ class CheckerCPU : public BaseCPU
     void handleError()
     {
         if (exitOnError)
-            panic("Checker found error!");
+            dumpAndExit();
     }
+
     bool checkFlags(Request *req);
 
+    void dumpAndExit();
+
     ThreadContext *tcBase() { return tc; }
     SimpleThread *threadBase() { return thread; }
 
@@ -351,6 +360,7 @@ class CheckerCPU : public BaseCPU
     uint64_t newPC;
     bool changedNextPC;
     bool exitOnError;
+    bool warnOnlyOnLoadError;
 
     InstSeqNum youngestSN;
 };
@@ -369,15 +379,26 @@ class Checker : public CheckerCPU
         : CheckerCPU(p)
     { }
 
-    void switchOut(Sampler *s);
+    void switchOut();
     void takeOverFrom(BaseCPU *oldCPU);
 
-    void tick(DynInstPtr &inst);
+    void verify(DynInstPtr &inst);
 
     void validateInst(DynInstPtr &inst);
     void validateExecution(DynInstPtr &inst);
     void validateState();
 
+    void copyResult(DynInstPtr &inst);
+
+  private:
+    void handleError(DynInstPtr &inst)
+    {
+        if (exitOnError)
+            dumpAndExit(inst);
+    }
+
+    void dumpAndExit(DynInstPtr &inst);
+
     std::list<DynInstPtr> instList;
     typedef typename std::list<DynInstPtr>::iterator InstListIt;
     void dumpInsts();
diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh
new file mode 100644
index 000000000..81f97726c
--- /dev/null
+++ b/src/cpu/checker/cpu_impl.hh
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#include <list>
+#include <string>
+
+#include "base/refcnt.hh"
+#include "cpu/base_dyn_inst.hh"
+#include "cpu/checker/cpu.hh"
+#include "cpu/simple_thread.hh"
+#include "cpu/thread_context.hh"
+#include "cpu/static_inst.hh"
+#include "mem/packet_impl.hh"
+#include "sim/byteswap.hh"
+#include "sim/sim_object.hh"
+#include "sim/stats.hh"
+
+#if FULL_SYSTEM
+#include "arch/vtophys.hh"
+#endif // FULL_SYSTEM
+
+using namespace std;
+//The CheckerCPU does alpha only
+using namespace AlphaISA;
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
+{
+    DynInstPtr inst;
+
+    // Either check this instruction, or add it to a list of
+    // instructions waiting to be checked.  Instructions must be
+    // checked in program order, so if a store has committed yet not
+    // completed, there may be some instructions that are waiting
+    // behind it that have completed and must be checked.
+    if (!instList.empty()) {
+        if (youngestSN < completed_inst->seqNum) {
+            DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n",
+                    completed_inst->seqNum, completed_inst->readPC());
+            instList.push_back(completed_inst);
+            youngestSN = completed_inst->seqNum;
+        }
+
+        if (!instList.front()->isCompleted()) {
+            return;
+        } else {
+            inst = instList.front();
+            instList.pop_front();
+        }
+    } else {
+        if (!completed_inst->isCompleted()) {
+            if (youngestSN < completed_inst->seqNum) {
+                DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n",
+                        completed_inst->seqNum, completed_inst->readPC());
+                instList.push_back(completed_inst);
+                youngestSN = completed_inst->seqNum;
+            }
+            return;
+        } else {
+            if (youngestSN < completed_inst->seqNum) {
+                inst = completed_inst;
+                youngestSN = completed_inst->seqNum;
+            } else {
+                return;
+            }
+        }
+    }
+
+    // Try to check all instructions that are completed, ending if we
+    // run out of instructions to check or if an instruction is not
+    // yet completed.
+    while (1) {
+        DPRINTF(Checker, "Processing instruction [sn:%lli] PC:%#x.\n",
+                inst->seqNum, inst->readPC());
+        unverifiedResult.integer = inst->readIntResult();
+        unverifiedReq = inst->req;
+        unverifiedMemData = inst->memData;
+        numCycles++;
+
+        Fault fault = NoFault;
+
+        // maintain $r0 semantics
+        thread->setIntReg(ZeroReg, 0);
+#ifdef TARGET_ALPHA
+        thread->setFloatRegDouble(ZeroReg, 0.0);
+#endif // TARGET_ALPHA
+
+        // Check if any recent PC changes match up with anything we
+        // expect to happen.  This is mostly to check if traps or
+        // PC-based events have occurred in both the checker and CPU.
+        if (changedPC) {
+            DPRINTF(Checker, "Changed PC recently to %#x\n",
+                    thread->readPC());
+            if (willChangePC) {
+                if (newPC == thread->readPC()) {
+                    DPRINTF(Checker, "Changed PC matches expected PC\n");
+                } else {
+                    warn("%lli: Changed PC does not match expected PC, "
+                         "changed: %#x, expected: %#x",
+                         curTick, thread->readPC(), newPC);
+                    CheckerCPU::handleError();
+                }
+                willChangePC = false;
+            }
+            changedPC = false;
+        }
+        if (changedNextPC) {
+            DPRINTF(Checker, "Changed NextPC recently to %#x\n",
+                    thread->readNextPC());
+            changedNextPC = false;
+        }
+
+        // Try to fetch the instruction
+
+#if FULL_SYSTEM
+#define IFETCH_FLAGS(pc)	((pc) & 1) ? PHYSICAL : 0
+#else
+#define IFETCH_FLAGS(pc)	0
+#endif
+
+        uint64_t fetch_PC = thread->readPC() & ~3;
+
+        // set up memory request for instruction fetch
+        memReq = new Request(inst->threadNumber, fetch_PC,
+                             sizeof(uint32_t),
+                             IFETCH_FLAGS(thread->readPC()),
+                             fetch_PC, thread->readCpuId(), inst->threadNumber);
+
+        bool succeeded = translateInstReq(memReq);
+
+        if (!succeeded) {
+            if (inst->getFault() == NoFault) {
+                // In this case the instruction was not a dummy
+                // instruction carrying an ITB fault.  In the single
+                // threaded case the ITB should still be able to
+                // translate this instruction; in the SMT case it's
+                // possible that its ITB entry was kicked out.
+                warn("%lli: Instruction PC %#x was not found in the ITB!",
+                     curTick, thread->readPC());
+                handleError(inst);
+
+                // go to the next instruction
+                thread->setPC(thread->readNextPC());
+                thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
+
+                return;
+            } else {
+                // The instruction is carrying an ITB fault.  Handle
+                // the fault and see if our results match the CPU on
+                // the next tick().
+                fault = inst->getFault();
+            }
+        }
+
+        if (fault == NoFault) {
+            Packet *pkt = new Packet(memReq, Packet::ReadReq,
+                                     Packet::Broadcast);
+
+            pkt->dataStatic(&machInst);
+
+            icachePort->sendFunctional(pkt);
+
+            delete pkt;
+
+            // keep an instruction count
+            numInst++;
+
+            // decode the instruction
+            machInst = gtoh(machInst);
+            // Checks that the instruction matches what we expected it to be.
+            // Checks both the machine instruction and the PC.
+            validateInst(inst);
+
+            curStaticInst = StaticInst::decode(makeExtMI(machInst,
+                                                         thread->readPC()));
+
+#if FULL_SYSTEM
+            thread->setInst(machInst);
+#endif // FULL_SYSTEM
+
+            fault = inst->getFault();
+        }
+
+        // Discard fetch's memReq.
+        delete memReq;
+        memReq = NULL;
+
+        // Either the instruction was a fault and we should process the fault,
+        // or we should just go ahead execute the instruction.  This assumes
+        // that the instruction is properly marked as a fault.
+        if (fault == NoFault) {
+
+            thread->funcExeInst++;
+
+            fault = curStaticInst->execute(this, NULL);
+
+            // Checks to make sure instrution results are correct.
+            validateExecution(inst);
+
+            if (curStaticInst->isLoad()) {
+                ++numLoad;
+            }
+        }
+
+        if (fault != NoFault) {
+#if FULL_SYSTEM
+            fault->invoke(tc);
+            willChangePC = true;
+            newPC = thread->readPC();
+            DPRINTF(Checker, "Fault, PC is now %#x\n", newPC);
+#endif
+        } else {
+#if THE_ISA != MIPS_ISA
+            // go to the next instruction
+            thread->setPC(thread->readNextPC());
+            thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
+#else
+            // go to the next instruction
+            thread->setPC(thread->readNextPC());
+            thread->setNextPC(thread->readNextNPC());
+            thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
+#endif
+
+        }
+
+#if FULL_SYSTEM
+        // @todo: Determine if these should happen only if the
+        // instruction hasn't faulted.  In the SimpleCPU case this may
+        // not be true, but in the O3 or Ozone case this may be true.
+        Addr oldpc;
+        int count = 0;
+        do {
+            oldpc = thread->readPC();
+            system->pcEventQueue.service(tc);
+            count++;
+        } while (oldpc != thread->readPC());
+        if (count > 1) {
+            willChangePC = true;
+            newPC = thread->readPC();
+            DPRINTF(Checker, "PC Event, PC is now %#x\n", newPC);
+        }
+#endif
+
+        // @todo:  Optionally can check all registers. (Or just those
+        // that have been modified).
+        validateState();
+
+        if (memReq) {
+            delete memReq;
+            memReq = NULL;
+        }
+
+        // Continue verifying instructions if there's another completed
+        // instruction waiting to be verified.
+        if (instList.empty()) {
+            break;
+        } else if (instList.front()->isCompleted()) {
+            inst = instList.front();
+            instList.pop_front();
+        } else {
+            break;
+        }
+    }
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::switchOut()
+{
+    instList.clear();
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::takeOverFrom(BaseCPU *oldCPU)
+{
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::validateInst(DynInstPtr &inst)
+{
+    if (inst->readPC() != thread->readPC()) {
+        warn("%lli: PCs do not match! Inst: %#x, checker: %#x",
+             curTick, inst->readPC(), thread->readPC());
+        if (changedPC) {
+            warn("%lli: Changed PCs recently, may not be an error",
+                 curTick);
+        } else {
+            handleError(inst);
+        }
+    }
+
+    MachInst mi = static_cast<MachInst>(inst->staticInst->machInst);
+
+    if (mi != machInst) {
+        warn("%lli: Binary instructions do not match! Inst: %#x, "
+             "checker: %#x",
+             curTick, mi, machInst);
+        handleError(inst);
+    }
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::validateExecution(DynInstPtr &inst)
+{
+    bool result_mismatch = false;
+    if (inst->numDestRegs()) {
+        // @todo: Support more destination registers.
+        if (inst->isUnverifiable()) {
+            // Unverifiable instructions assume they were executed
+            // properly by the CPU. Grab the result from the
+            // instruction and write it to the register.
+            copyResult(inst);
+        } else if (result.integer != inst->readIntResult()) {
+            result_mismatch = true;
+        }
+    }
+
+    if (result_mismatch) {
+        warn("%lli: Instruction results do not match! (Values may not "
+             "actually be integers) Inst: %#x, checker: %#x",
+             curTick, inst->readIntResult(), result.integer);
+
+        // It's useful to verify load values from memory, but in MP
+        // systems the value obtained at execute may be different than
+        // the value obtained at completion.  Similarly DMA can
+        // present the same problem on even UP systems.  Thus there is
+        // the option to only warn on loads having a result error.
+        if (inst->isLoad() && warnOnlyOnLoadError) {
+            copyResult(inst);
+        } else {
+            handleError(inst);
+        }
+    }
+
+    if (inst->readNextPC() != thread->readNextPC()) {
+        warn("%lli: Instruction next PCs do not match! Inst: %#x, "
+             "checker: %#x",
+             curTick, inst->readNextPC(), thread->readNextPC());
+        handleError(inst);
+    }
+
+    // Checking side effect registers can be difficult if they are not
+    // checked simultaneously with the execution of the instruction.
+    // This is because other valid instructions may have modified
+    // these registers in the meantime, and their values are not
+    // stored within the DynInst.
+    while (!miscRegIdxs.empty()) {
+        int misc_reg_idx = miscRegIdxs.front();
+        miscRegIdxs.pop();
+
+        if (inst->tcBase()->readMiscReg(misc_reg_idx) !=
+            thread->readMiscReg(misc_reg_idx)) {
+            warn("%lli: Misc reg idx %i (side effect) does not match! "
+                 "Inst: %#x, checker: %#x",
+                 curTick, misc_reg_idx,
+                 inst->tcBase()->readMiscReg(misc_reg_idx),
+                 thread->readMiscReg(misc_reg_idx));
+            handleError(inst);
+        }
+    }
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::validateState()
+{
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::copyResult(DynInstPtr &inst)
+{
+    RegIndex idx = inst->destRegIdx(0);
+    if (idx < TheISA::FP_Base_DepTag) {
+        thread->setIntReg(idx, inst->readIntResult());
+    } else if (idx < TheISA::Fpcr_DepTag) {
+        thread->setFloatRegBits(idx, inst->readIntResult());
+    } else {
+        thread->setMiscReg(idx, inst->readIntResult());
+    }
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::dumpAndExit(DynInstPtr &inst)
+{
+    cprintf("Error detected, instruction information:\n");
+    cprintf("PC:%#x, nextPC:%#x\n[sn:%lli]\n[tid:%i]\n"
+            "Completed:%i\n",
+            inst->readPC(),
+            inst->readNextPC(),
+            inst->seqNum,
+            inst->threadNumber,
+            inst->isCompleted());
+    inst->dump();
+    CheckerCPU::dumpAndExit();
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::dumpInsts()
+{
+    int num = 0;
+
+    InstListIt inst_list_it = --(instList.end());
+
+    cprintf("Inst list size: %i\n", instList.size());
+
+    while (inst_list_it != instList.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Completed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isCompleted());
+
+        cprintf("\n");
+
+        inst_list_it--;
+        ++num;
+    }
+
+}
diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh
index c0ac8f01d..c035e92ac 100644
--- a/src/cpu/checker/thread_context.hh
+++ b/src/cpu/checker/thread_context.hh
@@ -120,7 +120,7 @@ class CheckerThreadContext : public ThreadContext
     void suspend() { actualTC->suspend(); }
 
     /// Set the status to Unallocated.
-    void deallocate() { actualTC->deallocate(); }
+    void deallocate(int delay = 0) { actualTC->deallocate(delay); }
 
     /// Set the status to Halted.
     void halt() { actualTC->halt(); }
diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py
index 1a9724ca6..5b0c6c4da 100644
--- a/src/cpu/cpu_models.py
+++ b/src/cpu/cpu_models.py
@@ -26,6 +26,10 @@
 #
 # Authors: Steve Reinhardt
 
+import os
+import os.path
+import sys
+
 ################
 # CpuModel class
 #
@@ -47,7 +51,6 @@ class CpuModel:
         # Add self to dict
         CpuModel.dict[name] = self
 
-
 #
 # Define CPU models.
 #
@@ -67,9 +70,6 @@ CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc',
 CpuModel('FullCPU', 'full_cpu_exec.cc',
          '#include "encumbered/cpu/full/dyn_inst.hh"',
          { 'CPU_exec_context': 'DynInst' })
-CpuModel('AlphaFullCPU', 'alpha_o3_exec.cc',
-         '#include "cpu/o3/alpha_dyn_inst.hh"',
-         { 'CPU_exec_context': 'AlphaDynInst<AlphaSimpleImpl>' })
 CpuModel('OzoneSimpleCPU', 'ozone_simple_exec.cc',
          '#include "cpu/ozone/dyn_inst.hh"',
          { 'CPU_exec_context': 'OzoneDynInst<SimpleImpl>' })
@@ -79,4 +79,6 @@ CpuModel('OzoneCPU', 'ozone_exec.cc',
 CpuModel('CheckerCPU', 'checker_cpu_exec.cc',
          '#include "cpu/checker/cpu.hh"',
          { 'CPU_exec_context': 'CheckerCPU' })
-
+CpuModel('O3CPU', 'o3_cpu_exec.cc',
+         '#include "cpu/o3/isa_specific.hh"',
+         { 'CPU_exec_context': 'O3DynInst' })
diff --git a/src/cpu/cpuevent.hh b/src/cpu/cpuevent.hh
index 11ac7aafb..9dfae27cf 100644
--- a/src/cpu/cpuevent.hh
+++ b/src/cpu/cpuevent.hh
@@ -36,7 +36,7 @@
 
 class ThreadContext;
 
-/** This class creates a global list of events than need a pointer to an
+/** This class creates a global list of events that need a pointer to a
  * thread context. When a switchover takes place the events can be migrated
  * to the new thread context, otherwise you could have a wake timer interrupt
  * go off on a switched out cpu or other unfortunate events. This object MUST be
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
new file mode 100644
index 000000000..f6e8d7c25
--- /dev/null
+++ b/src/cpu/exec_context.hh
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#error "Cannot include this file"
+
+/**
+ * The ExecContext is not a usable class.  It is simply here for
+ * documentation purposes.  It shows the interface that is used by the
+ * ISA to access and change CPU state.
+ */
+class ExecContext {
+    // The register accessor methods provide the index of the
+    // instruction's operand (e.g., 0 or 1), not the architectural
+    // register index, to simplify the implementation of register
+    // renaming.  We find the architectural register index by indexing
+    // into the instruction's own operand index table.  Note that a
+    // raw pointer to the StaticInst is provided instead of a
+    // ref-counted StaticInstPtr to reduce overhead.  This is fine as
+    // long as these methods don't copy the pointer into any long-term
+    // storage (which is pretty hard to imagine they would have reason
+    // to do).
+
+    /** Reads an integer register. */
+    uint64_t readIntReg(const StaticInst *si, int idx);
+
+    /** Reads a floating point register of a specific width. */
+    FloatReg readFloatReg(const StaticInst *si, int idx, int width);
+
+    /** Reads a floating point register of single register width. */
+    FloatReg readFloatReg(const StaticInst *si, int idx);
+
+    /** Reads a floating point register of a specific width in its
+     * binary format, instead of by value. */
+    FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width);
+
+    /** Reads a floating point register in its binary format, instead
+     * of by value. */
+    FloatRegBits readFloatRegBits(const StaticInst *si, int idx);
+
+    /** Sets an integer register to a value. */
+    void setIntReg(const StaticInst *si, int idx, uint64_t val);
+
+    /** Sets a floating point register of a specific width to a value. */
+    void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width);
+
+    /** Sets a floating point register of single width to a value. */
+    void setFloatReg(const StaticInst *si, int idx, FloatReg val);
+
+    /** Sets the bits of a floating point register of a specific width
+     * to a binary value. */
+    void setFloatRegBits(const StaticInst *si, int idx,
+                         FloatRegBits val, int width);
+
+    /** Sets the bits of a floating point register of single width
+     * to a binary value. */
+    void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val);
+
+    /** Reads the PC. */
+    uint64_t readPC();
+    /** Reads the NextPC. */
+    uint64_t readNextPC();
+    /** Reads the Next-NextPC. Only for architectures like SPARC or MIPS. */
+    uint64_t readNextNPC();
+
+    /** Sets the PC. */
+    void setPC(uint64_t val);
+    /** Sets the NextPC. */
+    void setNextPC(uint64_t val);
+    /** Sets the Next-NextPC.  Only for architectures like SPARC or MIPS. */
+    void setNextNPC(uint64_t val);
+
+    /** Reads a miscellaneous register. */
+    MiscReg readMiscReg(int misc_reg);
+
+    /** Reads a miscellaneous register, handling any architectural
+     * side effects due to reading that register. */
+    MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault);
+
+    /** Sets a miscellaneous register. */
+    Fault setMiscReg(int misc_reg, const MiscReg &val);
+
+    /** Sets a miscellaneous register, handling any architectural
+     * side effects due to writing that register. */
+    Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
+
+    /** Records the effective address of the instruction.  Only valid
+     * for memory ops. */
+    void setEA(Addr EA);
+    /** Returns the effective address of the instruction.  Only valid
+     * for memory ops. */
+    Addr getEA();
+
+    /** Returns a pointer to the ThreadContext. */
+    ThreadContext *tcBase();
+
+    /** Reads an address, creating a memory request with the given
+     * flags.  Stores result of read in data. */
+    template <class T>
+    Fault read(Addr addr, T &data, unsigned flags);
+
+    /** Writes to an address, creating a memory request with the given
+     * flags.  Writes data to memory.  For store conditionals, returns
+     * the result of the store in res. */
+    template <class T>
+    Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+
+    /** Prefetches an address, creating a memory request with the
+     * given flags. */
+    void prefetch(Addr addr, unsigned flags);
+
+    /** Hints to the memory system that an address will be written to
+     * soon, with the given size.  Creates a memory request with the
+     * given flags. */
+    void writeHint(Addr addr, int size, unsigned flags);
+
+#if FULL_SYSTEM
+    /** Somewhat Alpha-specific function that handles returning from
+     * an error or interrupt. */
+    Fault hwrei();
+    /** Reads the interrupt flags. */
+    int readIntrFlag();
+    /** Sets the interrupt flags to a value. */
+    void setIntrFlag(int val);
+
+    /**
+     * Check for special simulator handling of specific PAL calls.  If
+     * return value is false, actual PAL call will be suppressed.
+     */
+    bool simPalCheck(int palFunc);
+#else
+    /** Executes a syscall specified by the callnum. */
+    void syscall(int64_t callnum);
+#endif
+};
diff --git a/src/cpu/func_unit.cc b/src/cpu/func_unit.cc
new file mode 100644
index 000000000..c20578a43
--- /dev/null
+++ b/src/cpu/func_unit.cc
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2002-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Steve Raasch
+ */
+
+#include <sstream>
+
+#include "base/misc.hh"
+#include "cpu/func_unit.hh"
+#include "sim/builder.hh"
+
+using namespace std;
+
+
+////////////////////////////////////////////////////////////////////////////
+//
+//  The funciton unit
+//
+FuncUnit::FuncUnit()
+{
+    capabilityList.reset();
+}
+
+
+//  Copy constructor
+FuncUnit::FuncUnit(const FuncUnit &fu)
+{
+
+    for (int i = 0; i < Num_OpClasses; ++i) {
+        opLatencies[i] = fu.opLatencies[i];
+        issueLatencies[i] = fu.issueLatencies[i];
+    }
+
+    capabilityList = fu.capabilityList;
+}
+
+
+void
+FuncUnit::addCapability(OpClass cap, unsigned oplat, unsigned issuelat)
+{
+    if (issuelat == 0 || oplat == 0)
+        panic("FuncUnit:  you don't really want a zero-cycle latency do you?");
+
+    capabilityList.set(cap);
+
+    opLatencies[cap] = oplat;
+    issueLatencies[cap] = issuelat;
+}
+
+bool
+FuncUnit::provides(OpClass capability)
+{
+    return capabilityList[capability];
+}
+
+bitset<Num_OpClasses>
+FuncUnit::capabilities()
+{
+    return capabilityList;
+}
+
+unsigned &
+FuncUnit::opLatency(OpClass cap)
+{
+    return opLatencies[cap];
+}
+
+unsigned
+FuncUnit::issueLatency(OpClass capability)
+{
+    return issueLatencies[capability];
+}
+
+////////////////////////////////////////////////////////////////////////////
+//
+//  The SimObjects we use to get the FU information into the simulator
+//
+////////////////////////////////////////////////////////////////////////////
+
+//
+//  We use 2 objects to specify this data in the INI file:
+//    (1) OpDesc - Describes the operation class & latencies
+//                   (multiple OpDesc objects can refer to the same
+//                   operation classes)
+//    (2) FUDesc - Describes the operations available in the unit &
+//                   the number of these units
+//
+//
+
+
+//
+//  The operation-class description object
+//
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(OpDesc)
+
+    SimpleEnumParam<OpClass> opClass;
+    Param<unsigned>    opLat;
+    Param<unsigned>    issueLat;
+
+END_DECLARE_SIM_OBJECT_PARAMS(OpDesc)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(OpDesc)
+
+    INIT_ENUM_PARAM(opClass, "type of operation", opClassStrings),
+    INIT_PARAM(opLat,        "cycles until result is available"),
+    INIT_PARAM(issueLat,     "cycles until another can be issued")
+
+END_INIT_SIM_OBJECT_PARAMS(OpDesc)
+
+
+CREATE_SIM_OBJECT(OpDesc)
+{
+    return new OpDesc(getInstanceName(), opClass, opLat, issueLat);
+}
+
+REGISTER_SIM_OBJECT("OpDesc", OpDesc)
+
+
+//
+//  The FuDesc object
+//
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUDesc)
+
+    SimObjectVectorParam<OpDesc *> opList;
+    Param<unsigned>                count;
+
+END_DECLARE_SIM_OBJECT_PARAMS(FUDesc)
+
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(FUDesc)
+
+    INIT_PARAM(opList, "list of operation classes for this FU type"),
+    INIT_PARAM(count,  "number of these FU's available")
+
+END_INIT_SIM_OBJECT_PARAMS(FUDesc)
+
+
+CREATE_SIM_OBJECT(FUDesc)
+{
+    return new FUDesc(getInstanceName(), opList, count);
+}
+
+REGISTER_SIM_OBJECT("FUDesc", FUDesc)
+
diff --git a/src/cpu/func_unit.hh b/src/cpu/func_unit.hh
new file mode 100644
index 000000000..780143096
--- /dev/null
+++ b/src/cpu/func_unit.hh
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2002-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Steve Raasch
+ */
+
+#ifndef __CPU_FUNC_UNIT_HH__
+#define __CPU_FUNC_UNIT_HH__
+
+#include <bitset>
+#include <string>
+#include <vector>
+
+#include "cpu/op_class.hh"
+#include "sim/sim_object.hh"
+
+////////////////////////////////////////////////////////////////////////////
+//
+//  Structures used ONLY during the initialization phase...
+//
+//
+//
+
+struct OpDesc : public SimObject
+{
+    OpClass opClass;
+    unsigned    opLat;
+    unsigned    issueLat;
+
+    OpDesc(std::string name, OpClass c, unsigned o, unsigned i)
+        : SimObject(name), opClass(c), opLat(o), issueLat(i) {};
+};
+
+struct FUDesc : public SimObject
+{
+    std::vector<OpDesc *> opDescList;
+    unsigned         number;
+
+    FUDesc(std::string name, std::vector<OpDesc *> l, unsigned n)
+        : SimObject(name), opDescList(l), number(n) {};
+};
+
+typedef std::vector<OpDesc *>::iterator OPDDiterator;
+typedef std::vector<FUDesc *>::iterator FUDDiterator;
+
+
+
+
+////////////////////////////////////////////////////////////////////////////
+//
+//  The actual FU object
+//
+//
+//
+class FuncUnit
+{
+  private:
+    unsigned opLatencies[Num_OpClasses];
+    unsigned issueLatencies[Num_OpClasses];
+    std::bitset<Num_OpClasses> capabilityList;
+
+  public:
+    FuncUnit();
+    FuncUnit(const FuncUnit &fu);
+
+    std::string name;
+
+    void addCapability(OpClass cap, unsigned oplat, unsigned issuelat);
+
+    bool provides(OpClass capability);
+    std::bitset<Num_OpClasses> capabilities();
+
+    unsigned &opLatency(OpClass capability);
+    unsigned issueLatency(OpClass capability);
+};
+
+#endif // __FU_POOL_HH__
diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript
new file mode 100755
index 000000000..e65d41411
--- /dev/null
+++ b/src/cpu/o3/SConscript
@@ -0,0 +1,79 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Korey Sewell
+
+import os
+import os.path
+import sys
+
+# Import build environment variable from SConstruct.
+Import('env')
+
+
+#################################################################
+#
+# Include ISA-specific files for the O3 CPU-model
+#
+#################################################################
+
+sources = []
+
+if env['TARGET_ISA'] == 'alpha':
+    sources += Split('''
+        alpha/dyn_inst.cc
+        alpha/cpu.cc
+        alpha/thread_context.cc
+        alpha/cpu_builder.cc
+        ''')
+elif env['TARGET_ISA'] == 'mips':
+    sys.exit('O3 CPU does not support MIPS')
+    #sources += Split('''
+    #    mips/dyn_inst.cc
+    #    mips/cpu.cc
+    #    mips/thread_context.cc
+    #    mips/cpu_builder.cc
+    #    ''')
+elif env['TARGET_ISA'] == 'sparc':
+    sys.exit('O3 CPU does not support MIPS')
+    #sources += Split('''
+    #    sparc/dyn_inst.cc
+    #    sparc/cpu.cc
+    #    sparc/thread_context.cc
+    #    sparc/cpu_builder.cc
+    #    ''')
+else:
+    sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA'])
+
+
+# Convert file names to SCons File objects.  This takes care of the
+# path relative to the top of the directory tree.
+sources = [File(s) for s in sources]
+
+Return('sources')
+
diff --git a/src/cpu/o3/alpha_cpu.cc b/src/cpu/o3/alpha/cpu.cc
index 39cae696b..ed10b2fd1 100644
--- a/src/cpu/o3/alpha_cpu.cc
+++ b/src/cpu/o3/alpha/cpu.cc
@@ -28,11 +28,11 @@
  * Authors: Kevin Lim
  */
 
-#include "cpu/o3/alpha_impl.hh"
-#include "cpu/o3/alpha_cpu_impl.hh"
-#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha/impl.hh"
+#include "cpu/o3/alpha/cpu_impl.hh"
+#include "cpu/o3/alpha/dyn_inst.hh"
 
-// Force instantiation of AlphaFullCPU for all the implemntations that are
+// Force instantiation of AlphaO3CPU for all the implemntations that are
 // needed.  Consider merging this and alpha_dyn_inst.cc, and maybe all
 // classes that depend on a certain impl, into one file (alpha_impl.cc?).
-template class AlphaFullCPU<AlphaSimpleImpl>;
+template class AlphaO3CPU<AlphaSimpleImpl>;
diff --git a/src/cpu/o3/alpha/cpu.hh b/src/cpu/o3/alpha/cpu.hh
new file mode 100644
index 000000000..b961341d5
--- /dev/null
+++ b/src/cpu/o3/alpha/cpu.hh
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#ifndef __CPU_O3_ALPHA_CPU_HH__
+#define __CPU_O3_ALPHA_CPU_HH__
+
+#include "arch/isa_traits.hh"
+#include "cpu/thread_context.hh"
+#include "cpu/o3/cpu.hh"
+#include "sim/byteswap.hh"
+
+class EndQuiesceEvent;
+namespace Kernel {
+    class Statistics;
+};
+
+class TranslatingPort;
+
+/**
+ * AlphaO3CPU class.  Derives from the FullO3CPU class, and
+ * implements all ISA and implementation specific functions of the
+ * CPU.  This is the CPU class that is used for the SimObjects, and is
+ * what is given to the DynInsts.  Most of its state exists in the
+ * FullO3CPU; the state is has is mainly for ISA specific
+ * functionality.
+ */
+template <class Impl>
+class AlphaO3CPU : public FullO3CPU<Impl>
+{
+  protected:
+    typedef TheISA::IntReg IntReg;
+    typedef TheISA::FloatReg FloatReg;
+    typedef TheISA::FloatRegBits FloatRegBits;
+    typedef TheISA::MiscReg MiscReg;
+    typedef TheISA::RegFile RegFile;
+    typedef TheISA::MiscRegFile MiscRegFile;
+
+  public:
+    typedef O3ThreadState<Impl> ImplState;
+    typedef O3ThreadState<Impl> Thread;
+    typedef typename Impl::Params Params;
+
+    /** Constructs an AlphaO3CPU with the given parameters. */
+    AlphaO3CPU(Params *params);
+
+#if FULL_SYSTEM
+    /** ITB pointer. */
+    AlphaITB *itb;
+    /** DTB pointer. */
+    AlphaDTB *dtb;
+#endif
+
+    /** Registers statistics. */
+    void regStats();
+
+#if FULL_SYSTEM
+    /** Translates instruction requestion. */
+    Fault translateInstReq(RequestPtr &req, Thread *thread)
+    {
+        return itb->translate(req, thread->getTC());
+    }
+
+    /** Translates data read request. */
+    Fault translateDataReadReq(RequestPtr &req, Thread *thread)
+    {
+        return dtb->translate(req, thread->getTC(), false);
+    }
+
+    /** Translates data write request. */
+    Fault translateDataWriteReq(RequestPtr &req, Thread *thread)
+    {
+        return dtb->translate(req, thread->getTC(), true);
+    }
+
+#else
+    /** Translates instruction requestion in syscall emulation mode. */
+    Fault translateInstReq(RequestPtr &req, Thread *thread)
+    {
+        return thread->getProcessPtr()->pTable->translate(req);
+    }
+
+    /** Translates data read request in syscall emulation mode. */
+    Fault translateDataReadReq(RequestPtr &req, Thread *thread)
+    {
+        return thread->getProcessPtr()->pTable->translate(req);
+    }
+
+    /** Translates data write request in syscall emulation mode. */
+    Fault translateDataWriteReq(RequestPtr &req, Thread *thread)
+    {
+        return thread->getProcessPtr()->pTable->translate(req);
+    }
+
+#endif
+    /** Reads a miscellaneous register. */
+    MiscReg readMiscReg(int misc_reg, unsigned tid);
+
+    /** Reads a misc. register, including any side effects the read
+     * might have as defined by the architecture.
+     */
+    MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid);
+
+    /** Sets a miscellaneous register. */
+    Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
+
+    /** Sets a misc. register, including any side effects the write
+     * might have as defined by the architecture.
+     */
+    Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
+
+    /** Initiates a squash of all in-flight instructions for a given
+     * thread.  The source of the squash is an external update of
+     * state through the TC.
+     */
+    void squashFromTC(unsigned tid);
+
+#if FULL_SYSTEM
+    /** Posts an interrupt. */
+    void post_interrupt(int int_num, int index);
+    /** Reads the interrupt flag. */
+    int readIntrFlag();
+    /** Sets the interrupt flags. */
+    void setIntrFlag(int val);
+    /** HW return from error interrupt. */
+    Fault hwrei(unsigned tid);
+    /** Returns if a specific PC is a PAL mode PC. */
+    bool inPalMode(uint64_t PC)
+    { return AlphaISA::PcPAL(PC); }
+
+    bool simPalCheck(int palFunc, unsigned tid);
+
+    /** Processes any interrupts. */
+    void processInterrupts();
+
+    /** Halts the CPU. */
+    void halt() { panic("Halt not implemented!\n"); }
+#endif
+
+    /** Traps to handle given fault. */
+    void trap(Fault fault, unsigned tid);
+
+#if !FULL_SYSTEM
+    /** Executes a syscall.
+     * @todo: Determine if this needs to be virtual.
+     */
+    void syscall(int64_t callnum, int tid);
+    /** Gets a syscall argument. */
+    IntReg getSyscallArg(int i, int tid);
+
+    /** Used to shift args for indirect syscall. */
+    void setSyscallArg(int i, IntReg val, int tid);
+
+    /** Sets the return value of a syscall. */
+    void setSyscallReturn(SyscallReturn return_value, int tid);
+#endif
+
+    /** CPU read function, forwards read to LSQ. */
+    template <class T>
+    Fault read(RequestPtr &req, T &data, int load_idx)
+    {
+        return this->iew.ldstQueue.read(req, data, load_idx);
+    }
+
+    /** CPU write function, forwards write to LSQ. */
+    template <class T>
+    Fault write(RequestPtr &req, T &data, int store_idx)
+    {
+        return this->iew.ldstQueue.write(req, data, store_idx);
+    }
+
+    Addr lockAddr;
+
+    /** Temporary fix for the lock flag, works in the UP case. */
+    bool lockFlag;
+};
+
+#endif // __CPU_O3_ALPHA_CPU_HH__
diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index 828977ccb..5e767655d 100644
--- a/src/cpu/o3/alpha_cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -31,21 +31,21 @@
 #include <string>
 
 #include "cpu/base.hh"
-#include "cpu/o3/alpha_cpu.hh"
-#include "cpu/o3/alpha_impl.hh"
-#include "cpu/o3/alpha_params.hh"
+#include "cpu/o3/alpha/cpu.hh"
+#include "cpu/o3/alpha/impl.hh"
+#include "cpu/o3/alpha/params.hh"
 #include "cpu/o3/fu_pool.hh"
 #include "sim/builder.hh"
 
-class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl>
+class DerivO3CPU : public AlphaO3CPU<AlphaSimpleImpl>
 {
   public:
-    DerivAlphaFullCPU(AlphaSimpleParams *p)
-        : AlphaFullCPU<AlphaSimpleImpl>(p)
+    DerivO3CPU(AlphaSimpleParams *p)
+        : AlphaO3CPU<AlphaSimpleImpl>(p)
     { }
 };
 
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
 
     Param<int> clock;
     Param<int> numThreads;
@@ -91,12 +91,10 @@ Param<unsigned> renameWidth;
 Param<unsigned> commitToIEWDelay;
 Param<unsigned> renameToIEWDelay;
 Param<unsigned> issueToExecuteDelay;
+Param<unsigned> dispatchWidth;
 Param<unsigned> issueWidth;
-Param<unsigned> executeWidth;
-Param<unsigned> executeIntWidth;
-Param<unsigned> executeFloatWidth;
-Param<unsigned> executeBranchWidth;
-Param<unsigned> executeMemoryWidth;
+Param<unsigned> wbWidth;
+Param<unsigned> wbDepth;
 SimObjectParam<FUPool *> fuPool;
 
 Param<unsigned> iewToCommitDelay;
@@ -104,7 +102,9 @@ Param<unsigned> renameToROBDelay;
 Param<unsigned> commitWidth;
 Param<unsigned> squashWidth;
 Param<Tick> trapLatency;
-Param<Tick> fetchTrapLatency;
+
+Param<unsigned> backComSize;
+Param<unsigned> forwardComSize;
 
 Param<std::string> predType;
 Param<unsigned> localPredictorSize;
@@ -149,9 +149,9 @@ Param<bool> defer_registration;
 Param<bool> function_trace;
 Param<Tick> function_trace_start;
 
-END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
 
-BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
 
     INIT_PARAM(clock, "clock speed"),
     INIT_PARAM(numThreads, "number of HW thread contexts"),
@@ -212,12 +212,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
                "Issue/Execute/Writeback delay"),
     INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
                "to the IEW stage)"),
+    INIT_PARAM(dispatchWidth, "Dispatch width"),
     INIT_PARAM(issueWidth, "Issue width"),
-    INIT_PARAM(executeWidth, "Execute width"),
-    INIT_PARAM(executeIntWidth, "Integer execute width"),
-    INIT_PARAM(executeFloatWidth, "Floating point execute width"),
-    INIT_PARAM(executeBranchWidth, "Branch execute width"),
-    INIT_PARAM(executeMemoryWidth, "Memory execute width"),
+    INIT_PARAM(wbWidth, "Writeback width"),
+    INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"),
     INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL),
 
     INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
@@ -226,7 +224,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
     INIT_PARAM(commitWidth, "Commit width"),
     INIT_PARAM(squashWidth, "Squash width"),
     INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
-    INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12),
+
+    INIT_PARAM(backComSize, "Time buffer size for backwards communication"),
+    INIT_PARAM(forwardComSize, "Time buffer size for forward communication"),
 
     INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
     INIT_PARAM(localPredictorSize, "Size of local predictor"),
@@ -271,11 +271,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
     INIT_PARAM(function_trace, "Enable function trace"),
     INIT_PARAM(function_trace_start, "Cycle to start function trace")
 
-END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
 
-CREATE_SIM_OBJECT(DerivAlphaFullCPU)
+CREATE_SIM_OBJECT(DerivO3CPU)
 {
-    DerivAlphaFullCPU *cpu;
+    DerivO3CPU *cpu;
 
 #if FULL_SYSTEM
     // Full-system only supports a single thread for the moment.
@@ -284,12 +284,12 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
     // In non-full-system mode, we infer the number of threads from
     // the workload if it's not explicitly specified.
     int actual_num_threads =
-        numThreads.isValid() ? numThreads : workload.size();
+        (numThreads.isValid() && numThreads >= workload.size()) ?
+         numThreads : workload.size();
 
     if (workload.size() == 0) {
         fatal("Must specify at least one workload!");
     }
-
 #endif
 
     AlphaSimpleParams *params = new AlphaSimpleParams;
@@ -343,12 +343,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
     params->commitToIEWDelay = commitToIEWDelay;
     params->renameToIEWDelay = renameToIEWDelay;
     params->issueToExecuteDelay = issueToExecuteDelay;
+    params->dispatchWidth = dispatchWidth;
     params->issueWidth = issueWidth;
-    params->executeWidth = executeWidth;
-    params->executeIntWidth = executeIntWidth;
-    params->executeFloatWidth = executeFloatWidth;
-    params->executeBranchWidth = executeBranchWidth;
-    params->executeMemoryWidth = executeMemoryWidth;
+    params->wbWidth = wbWidth;
+    params->wbDepth = wbDepth;
     params->fuPool = fuPool;
 
     params->iewToCommitDelay = iewToCommitDelay;
@@ -356,7 +354,9 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
     params->commitWidth = commitWidth;
     params->squashWidth = squashWidth;
     params->trapLatency = trapLatency;
-    params->fetchTrapLatency = fetchTrapLatency;
+
+    params->backComSize = backComSize;
+    params->forwardComSize = forwardComSize;
 
     params->predType = predType;
     params->localPredictorSize = localPredictorSize;
@@ -386,7 +386,16 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
     params->numROBEntries = numROBEntries;
 
     params->smtNumFetchingThreads = smtNumFetchingThreads;
-    params->smtFetchPolicy = smtFetchPolicy;
+
+    // Default smtFetchPolicy to "RoundRobin", if necessary.
+    std::string round_robin_policy = "RoundRobin";
+    std::string single_thread = "SingleThread";
+
+    if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0)
+        params->smtFetchPolicy = round_robin_policy;
+    else
+        params->smtFetchPolicy = smtFetchPolicy;
+
     params->smtIQPolicy    = smtIQPolicy;
     params->smtLSQPolicy    = smtLSQPolicy;
     params->smtLSQThreshold = smtLSQThreshold;
@@ -401,10 +410,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
     params->functionTrace = function_trace;
     params->functionTraceStart = function_trace_start;
 
-    cpu = new DerivAlphaFullCPU(params);
+    cpu = new DerivO3CPU(params);
 
     return cpu;
 }
 
-REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU)
+REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU)
 
diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh
new file mode 100644
index 000000000..0473e60c2
--- /dev/null
+++ b/src/cpu/o3/alpha/cpu_impl.hh
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#include "config/use_checker.hh"
+
+#include "arch/alpha/faults.hh"
+#include "base/cprintf.hh"
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
+#include "cpu/checker/thread_context.hh"
+#include "sim/sim_events.hh"
+#include "sim/stats.hh"
+
+#include "cpu/o3/alpha/cpu.hh"
+#include "cpu/o3/alpha/params.hh"
+#include "cpu/o3/alpha/thread_context.hh"
+#include "cpu/o3/comm.hh"
+#include "cpu/o3/thread_state.hh"
+
+#if FULL_SYSTEM
+#include "arch/alpha/osfpal.hh"
+#include "arch/isa_traits.hh"
+#include "cpu/quiesce_event.hh"
+#include "kern/kernel_stats.hh"
+#include "sim/sim_exit.hh"
+#include "sim/system.hh"
+#endif
+
+using namespace TheISA;
+
+template <class Impl>
+AlphaO3CPU<Impl>::AlphaO3CPU(Params *params)
+#if FULL_SYSTEM
+    : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb)
+#else
+    : FullO3CPU<Impl>(params)
+#endif
+{
+    DPRINTF(O3CPU, "Creating AlphaO3CPU object.\n");
+
+    // Setup any thread state.
+    this->thread.resize(this->numThreads);
+
+    for (int i = 0; i < this->numThreads; ++i) {
+#if FULL_SYSTEM
+        // SMT is not supported in FS mode yet.
+        assert(this->numThreads == 1);
+        this->thread[i] = new Thread(this, 0);
+        this->thread[i]->setStatus(ThreadContext::Suspended);
+#else
+        if (i < params->workload.size()) {
+            DPRINTF(O3CPU, "Workload[%i] process is %#x",
+                    i, this->thread[i]);
+            this->thread[i] = new Thread(this, i, params->workload[i],
+                                         i, params->mem);
+
+            this->thread[i]->setStatus(ThreadContext::Suspended);
+
+#if !FULL_SYSTEM
+            /* Use this port to for syscall emulation writes to memory. */
+            Port *mem_port;
+            TranslatingPort *trans_port;
+            trans_port = new TranslatingPort(csprintf("%s-%d-funcport",
+                                                      name(), i),
+                                             params->workload[i]->pTable,
+                                             false);
+            mem_port = params->mem->getPort("functional");
+            mem_port->setPeer(trans_port);
+            trans_port->setPeer(mem_port);
+            this->thread[i]->setMemPort(trans_port);
+#endif
+            //usedTids[i] = true;
+            //threadMap[i] = i;
+        } else {
+            //Allocate Empty thread so M5 can use later
+            //when scheduling threads to CPU
+            Process* dummy_proc = NULL;
+
+            this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem);
+            //usedTids[i] = false;
+        }
+#endif // !FULL_SYSTEM
+
+        ThreadContext *tc;
+
+        // Setup the TC that will serve as the interface to the threads/CPU.
+        AlphaTC<Impl> *alpha_tc =
+            new AlphaTC<Impl>;
+
+        tc = alpha_tc;
+
+        // If we're using a checker, then the TC should be the
+        // CheckerThreadContext.
+#if USE_CHECKER
+        if (params->checker) {
+            tc = new CheckerThreadContext<AlphaTC<Impl> >(
+                alpha_tc, this->checker);
+        }
+#endif
+
+        alpha_tc->cpu = this;
+        alpha_tc->thread = this->thread[i];
+
+#if FULL_SYSTEM
+        // Setup quiesce event.
+        this->thread[i]->quiesceEvent = new EndQuiesceEvent(tc);
+
+        Port *mem_port;
+        FunctionalPort *phys_port;
+        VirtualPort *virt_port;
+        phys_port = new FunctionalPort(csprintf("%s-%d-funcport",
+                                                name(), i));
+        mem_port = this->system->physmem->getPort("functional");
+        mem_port->setPeer(phys_port);
+        phys_port->setPeer(mem_port);
+
+        virt_port = new VirtualPort(csprintf("%s-%d-vport",
+                                             name(), i));
+        mem_port = this->system->physmem->getPort("functional");
+        mem_port->setPeer(virt_port);
+        virt_port->setPeer(mem_port);
+
+        this->thread[i]->setPhysPort(phys_port);
+        this->thread[i]->setVirtPort(virt_port);
+#endif
+        // Give the thread the TC.
+        this->thread[i]->tc = tc;
+
+        // Add the TC to the CPU's list of TC's.
+        this->threadContexts.push_back(tc);
+    }
+
+    for (int i=0; i < this->numThreads; i++) {
+        this->thread[i]->setFuncExeInst(0);
+    }
+
+    // Sets CPU pointers. These must be set at this level because the CPU
+    // pointers are defined to be the highest level of CPU class.
+    this->fetch.setCPU(this);
+    this->decode.setCPU(this);
+    this->rename.setCPU(this);
+    this->iew.setCPU(this);
+    this->commit.setCPU(this);
+
+    this->rob.setCPU(this);
+    this->regFile.setCPU(this);
+
+    lockAddr = 0;
+    lockFlag = false;
+}
+
+template <class Impl>
+void
+AlphaO3CPU<Impl>::regStats()
+{
+    // Register stats for everything that has stats.
+    this->fullCPURegStats();
+    this->fetch.regStats();
+    this->decode.regStats();
+    this->rename.regStats();
+    this->iew.regStats();
+    this->commit.regStats();
+}
+
+
+template <class Impl>
+MiscReg
+AlphaO3CPU<Impl>::readMiscReg(int misc_reg, unsigned tid)
+{
+    return this->regFile.readMiscReg(misc_reg, tid);
+}
+
+template <class Impl>
+MiscReg
+AlphaO3CPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault,
+                                        unsigned tid)
+{
+    return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid);
+}
+
+template <class Impl>
+Fault
+AlphaO3CPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid)
+{
+    return this->regFile.setMiscReg(misc_reg, val, tid);
+}
+
+template <class Impl>
+Fault
+AlphaO3CPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val,
+                                       unsigned tid)
+{
+    return this->regFile.setMiscRegWithEffect(misc_reg, val, tid);
+}
+
+template <class Impl>
+void
+AlphaO3CPU<Impl>::squashFromTC(unsigned tid)
+{
+    this->thread[tid]->inSyscall = true;
+    this->commit.generateTCEvent(tid);
+}
+
+#if FULL_SYSTEM
+
+template <class Impl>
+void
+AlphaO3CPU<Impl>::post_interrupt(int int_num, int index)
+{
+    BaseCPU::post_interrupt(int_num, index);
+
+    if (this->thread[0]->status() == ThreadContext::Suspended) {
+        DPRINTF(IPI,"Suspended Processor awoke\n");
+        this->threadContexts[0]->activate();
+    }
+}
+
+template <class Impl>
+int
+AlphaO3CPU<Impl>::readIntrFlag()
+{
+    return this->regFile.readIntrFlag();
+}
+
+template <class Impl>
+void
+AlphaO3CPU<Impl>::setIntrFlag(int val)
+{
+    this->regFile.setIntrFlag(val);
+}
+
+template <class Impl>
+Fault
+AlphaO3CPU<Impl>::hwrei(unsigned tid)
+{
+    // Need to clear the lock flag upon returning from an interrupt.
+    this->lockFlag = false;
+
+    this->thread[tid]->kernelStats->hwrei();
+
+    this->checkInterrupts = true;
+
+    // FIXME: XXX check for interrupts? XXX
+    return NoFault;
+}
+
+template <class Impl>
+bool
+AlphaO3CPU<Impl>::simPalCheck(int palFunc, unsigned tid)
+{
+    if (this->thread[tid]->kernelStats)
+        this->thread[tid]->kernelStats->callpal(palFunc,
+                                                this->threadContexts[tid]);
+
+    switch (palFunc) {
+      case PAL::halt:
+        halt();
+        if (--System::numSystemsRunning == 0)
+            exitSimLoop("all cpus halted");
+        break;
+
+      case PAL::bpt:
+      case PAL::bugchk:
+        if (this->system->breakpoint())
+            return false;
+        break;
+    }
+
+    return true;
+}
+
+template <class Impl>
+void
+AlphaO3CPU<Impl>::processInterrupts()
+{
+    // Check for interrupts here.  For now can copy the code that
+    // exists within isa_fullsys_traits.hh.  Also assume that thread 0
+    // is the one that handles the interrupts.
+    // @todo: Possibly consolidate the interrupt checking code.
+    // @todo: Allow other threads to handle interrupts.
+
+    // Check if there are any outstanding interrupts
+    //Handle the interrupts
+    int ipl = 0;
+    int summary = 0;
+
+    this->checkInterrupts = false;
+
+    if (this->readMiscReg(IPR_ASTRR, 0))
+        panic("asynchronous traps not implemented\n");
+
+    if (this->readMiscReg(IPR_SIRR, 0)) {
+        for (int i = INTLEVEL_SOFTWARE_MIN;
+             i < INTLEVEL_SOFTWARE_MAX; i++) {
+            if (this->readMiscReg(IPR_SIRR, 0) & (ULL(1) << i)) {
+                // See table 4-19 of the 21164 hardware reference
+                ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
+                summary |= (ULL(1) << i);
+            }
+        }
+    }
+
+    uint64_t interrupts = this->intr_status();
+
+    if (interrupts) {
+        for (int i = INTLEVEL_EXTERNAL_MIN;
+             i < INTLEVEL_EXTERNAL_MAX; i++) {
+            if (interrupts & (ULL(1) << i)) {
+                // See table 4-19 of the 21164 hardware reference
+                ipl = i;
+                summary |= (ULL(1) << i);
+            }
+        }
+    }
+
+    if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) {
+        this->setMiscReg(IPR_ISR, summary, 0);
+        this->setMiscReg(IPR_INTID, ipl, 0);
+        // Checker needs to know these two registers were updated.
+#if USE_CHECKER
+        if (this->checker) {
+            this->checker->threadBase()->setMiscReg(IPR_ISR, summary);
+            this->checker->threadBase()->setMiscReg(IPR_INTID, ipl);
+        }
+#endif
+        this->trap(Fault(new InterruptFault), 0);
+        DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
+                this->readMiscReg(IPR_IPLR, 0), ipl, summary);
+    }
+}
+
+#endif // FULL_SYSTEM
+
+template <class Impl>
+void
+AlphaO3CPU<Impl>::trap(Fault fault, unsigned tid)
+{
+    // Pass the thread's TC into the invoke method.
+    fault->invoke(this->threadContexts[tid]);
+}
+
+#if !FULL_SYSTEM
+
+template <class Impl>
+void
+AlphaO3CPU<Impl>::syscall(int64_t callnum, int tid)
+{
+    DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid);
+
+    DPRINTF(Activity,"Activity: syscall() called.\n");
+
+    // Temporarily increase this by one to account for the syscall
+    // instruction.
+    ++(this->thread[tid]->funcExeInst);
+
+    // Execute the actual syscall.
+    this->thread[tid]->syscall(callnum);
+
+    // Decrease funcExeInst by one as the normal commit will handle
+    // incrementing it.
+    --(this->thread[tid]->funcExeInst);
+}
+
+template <class Impl>
+TheISA::IntReg
+AlphaO3CPU<Impl>::getSyscallArg(int i, int tid)
+{
+    return this->readArchIntReg(AlphaISA::ArgumentReg0 + i, tid);
+}
+
+template <class Impl>
+void
+AlphaO3CPU<Impl>::setSyscallArg(int i, IntReg val, int tid)
+{
+    this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid);
+}
+
+template <class Impl>
+void
+AlphaO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
+{
+    // check for error condition.  Alpha syscall convention is to
+    // indicate success/failure in reg a3 (r19) and put the
+    // return value itself in the standard return value reg (v0).
+    if (return_value.successful()) {
+        // no error
+        this->setArchIntReg(SyscallSuccessReg, 0, tid);
+        this->setArchIntReg(ReturnValueReg, return_value.value(), tid);
+    } else {
+        // got an error, return details
+        this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid);
+        this->setArchIntReg(ReturnValueReg, -return_value.value(), tid);
+    }
+}
+#endif
diff --git a/src/cpu/o3/alpha_dyn_inst.cc b/src/cpu/o3/alpha/dyn_inst.cc
index 0c1723eec..97d2f3d08 100644
--- a/src/cpu/o3/alpha_dyn_inst.cc
+++ b/src/cpu/o3/alpha/dyn_inst.cc
@@ -28,8 +28,8 @@
  * Authors: Kevin Lim
  */
 
-#include "cpu/o3/alpha_dyn_inst_impl.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/alpha/dyn_inst_impl.hh"
+#include "cpu/o3/alpha/impl.hh"
 
 // Force instantiation of AlphaDynInst for all the implementations that
 // are needed.
diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh
index 36a08c4a7..9dee610b6 100644
--- a/src/cpu/o3/alpha_dyn_inst.hh
+++ b/src/cpu/o3/alpha/dyn_inst.hh
@@ -34,8 +34,8 @@
 #include "arch/isa_traits.hh"
 #include "cpu/base_dyn_inst.hh"
 #include "cpu/inst_seq.hh"
-#include "cpu/o3/alpha_cpu.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/alpha/cpu.hh"
+#include "cpu/o3/alpha/impl.hh"
 
 class Packet;
 
@@ -51,7 +51,7 @@ class AlphaDynInst : public BaseDynInst<Impl>
 {
   public:
     /** Typedef for the CPU. */
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
 
     /** Binary machine instruction type. */
     typedef TheISA::MachInst MachInst;
@@ -74,7 +74,7 @@ class AlphaDynInst : public BaseDynInst<Impl>
   public:
     /** BaseDynInst constructor given a binary instruction. */
     AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num,
-                 FullCPU *cpu);
+                 O3CPU *cpu);
 
     /** BaseDynInst constructor given a static inst pointer. */
     AlphaDynInst(StaticInstPtr &_staticInst);
diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh
index a73cf4a7d..2d1b4b309 100644
--- a/src/cpu/o3/alpha_dyn_inst_impl.hh
+++ b/src/cpu/o3/alpha/dyn_inst_impl.hh
@@ -28,11 +28,11 @@
  * Authors: Kevin Lim
  */
 
-#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha/dyn_inst.hh"
 
 template <class Impl>
 AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
-                                 InstSeqNum seq_num, FullCPU *cpu)
+                                 InstSeqNum seq_num, O3CPU *cpu)
     : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
 {
     initVars();
@@ -102,15 +102,7 @@ template <class Impl>
 Fault
 AlphaDynInst<Impl>::completeAcc(Packet *pkt)
 {
-    if (this->isLoad()) {
-        this->fault = this->staticInst->completeAcc(pkt, this,
-                                                    this->traceData);
-    } else if (this->isStore()) {
-        this->fault = this->staticInst->completeAcc(pkt, this,
-                                                    this->traceData);
-    } else {
-        panic("Unknown type!");
-    }
+    this->fault = this->staticInst->completeAcc(pkt, this, this->traceData);
 
     return this->fault;
 }
diff --git a/src/cpu/o3/alpha_impl.hh b/src/cpu/o3/alpha/impl.hh
index 52f7c2394..b928ae654 100644
--- a/src/cpu/o3/alpha_impl.hh
+++ b/src/cpu/o3/alpha/impl.hh
@@ -33,20 +33,21 @@
 
 #include "arch/alpha/isa_traits.hh"
 
-#include "cpu/o3/alpha_params.hh"
+#include "cpu/o3/alpha/params.hh"
 #include "cpu/o3/cpu_policy.hh"
 
+
 // Forward declarations.
 template <class Impl>
 class AlphaDynInst;
 
 template <class Impl>
-class AlphaFullCPU;
+class AlphaO3CPU;
 
 /** Implementation specific struct that defines several key types to the
  *  CPU, the stages within the CPU, the time buffers, and the DynInst.
  *  The struct defines the ISA, the CPU policy, the specific DynInst, the
- *  specific FullCPU, and all of the structs from the time buffers to do
+ *  specific O3CPU, and all of the structs from the time buffers to do
  *  communication.
  *  This is one of the key things that must be defined for each hardware
  *  specific CPU implementation.
@@ -67,8 +68,14 @@ struct AlphaSimpleImpl
      */
     typedef RefCountingPtr<DynInst> DynInstPtr;
 
-    /** The FullCPU type to be used. */
-    typedef AlphaFullCPU<AlphaSimpleImpl> FullCPU;
+    /** The O3CPU type to be used. */
+    typedef AlphaO3CPU<AlphaSimpleImpl> O3CPU;
+
+    /** Same typedef, but for CPUType.  BaseDynInst may not always use
+     * an O3 CPU, so it's clearer to call it CPUType instead in that
+     * case.
+     */
+    typedef O3CPU CPUType;
 
     /** The Params to be passed to each stage. */
     typedef AlphaSimpleParams Params;
@@ -79,4 +86,7 @@ struct AlphaSimpleImpl
     };
 };
 
+/** The O3Impl to be used. */
+typedef AlphaSimpleImpl O3CPUImpl;
+
 #endif // __CPU_O3_ALPHA_IMPL_HH__
diff --git a/src/cpu/o3/alpha/params.hh b/src/cpu/o3/alpha/params.hh
new file mode 100644
index 000000000..c618cee08
--- /dev/null
+++ b/src/cpu/o3/alpha/params.hh
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#ifndef __CPU_O3_ALPHA_PARAMS_HH__
+#define __CPU_O3_ALPHA_PARAMS_HH__
+
+#include "cpu/o3/cpu.hh"
+#include "cpu/o3/params.hh"
+
+//Forward declarations
+class AlphaDTB;
+class AlphaITB;
+class MemObject;
+class Process;
+class System;
+
+/**
+ * This file defines the parameters that will be used for the AlphaO3CPU.
+ * This must be defined externally so that the Impl can have a params class
+ * defined that it can pass to all of the individual stages.
+ */
+
+class AlphaSimpleParams : public O3Params
+{
+  public:
+
+#if FULL_SYSTEM
+    AlphaITB *itb;
+    AlphaDTB *dtb;
+#endif
+};
+
+#endif // __CPU_O3_ALPHA_PARAMS_HH__
diff --git a/src/cpu/o3/alpha/thread_context.cc b/src/cpu/o3/alpha/thread_context.cc
new file mode 100755
index 000000000..4a02715bc
--- /dev/null
+++ b/src/cpu/o3/alpha/thread_context.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ *          Korey Sewell
+ */
+
+#include "cpu/o3/thread_context.hh"
+#include "cpu/o3/thread_context_impl.hh"
+
+template class O3ThreadContext<AlphaSimpleImpl>;
+
diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh
new file mode 100644
index 000000000..ad52b0d2e
--- /dev/null
+++ b/src/cpu/o3/alpha/thread_context.hh
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ *          Korey Sewell
+ */
+
+#include "cpu/o3/thread_context.hh"
+
+template <class Impl>
+class AlphaTC : public O3ThreadContext<Impl>
+{
+  public:
+#if FULL_SYSTEM
+    /** Returns a pointer to the ITB. */
+    virtual AlphaITB *getITBPtr() { return this->cpu->itb; }
+
+    /** Returns a pointer to the DTB. */
+    virtual AlphaDTB *getDTBPtr() { return this->cpu->dtb; }
+
+    /** Returns pointer to the quiesce event. */
+    virtual EndQuiesceEvent *getQuiesceEvent()
+    {
+        return this->thread->quiesceEvent;
+    }
+
+    /** Returns if the thread is currently in PAL mode, based on
+     * the PC's value. */
+    virtual bool inPalMode()
+    { return TheISA::PcPAL(this->cpu->readPC(this->thread->readTid())); }
+#endif
+
+    virtual uint64_t readNextNPC()
+    {
+        panic("Alpha has no NextNPC!");
+        return 0;
+    }
+
+    virtual void setNextNPC(uint64_t val)
+    {
+        panic("Alpha has no NextNPC!");
+    }
+
+    virtual void changeRegFileContext(TheISA::RegFile::ContextParam param,
+                                      TheISA::RegFile::ContextVal val)
+    { panic("Not supported on Alpha!"); }
+
+
+    /** This function exits the thread context in the CPU and returns
+     * 1 if the CPU has no more active threads (meaning it's OK to exit);
+     * Used in syscall-emulation mode when a thread executes the 'exit'
+     * syscall.
+     */
+    virtual int exit()
+    {
+        this->deallocate();
+
+        // If there are still threads executing in the system
+        if (this->cpu->numActiveThreads())
+            return 0; // don't exit simulation
+        else
+            return 1; // exit simulation
+    }
+};
diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh
deleted file mode 100644
index f81837f3c..000000000
--- a/src/cpu/o3/alpha_cpu.hh
+++ /dev/null
@@ -1,434 +0,0 @@
-/*
- * Copyright (c) 2004-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Kevin Lim
- */
-
-#ifndef __CPU_O3_ALPHA_FULL_CPU_HH__
-#define __CPU_O3_ALPHA_FULL_CPU_HH__
-
-#include "arch/isa_traits.hh"
-#include "cpu/thread_context.hh"
-#include "cpu/o3/cpu.hh"
-#include "sim/byteswap.hh"
-
-class EndQuiesceEvent;
-namespace Kernel {
-    class Statistics;
-};
-
-class TranslatingPort;
-
-/**
- * AlphaFullCPU class.  Derives from the FullO3CPU class, and
- * implements all ISA and implementation specific functions of the
- * CPU.  This is the CPU class that is used for the SimObjects, and is
- * what is given to the DynInsts.  Most of its state exists in the
- * FullO3CPU; the state is has is mainly for ISA specific
- * functionality.
- */
-template <class Impl>
-class AlphaFullCPU : public FullO3CPU<Impl>
-{
-  protected:
-    typedef TheISA::IntReg IntReg;
-    typedef TheISA::FloatReg FloatReg;
-    typedef TheISA::FloatRegBits FloatRegBits;
-    typedef TheISA::MiscReg MiscReg;
-    typedef TheISA::RegFile RegFile;
-    typedef TheISA::MiscRegFile MiscRegFile;
-
-  public:
-    typedef O3ThreadState<Impl> ImplState;
-    typedef O3ThreadState<Impl> Thread;
-    typedef typename Impl::Params Params;
-
-    /** Constructs an AlphaFullCPU with the given parameters. */
-    AlphaFullCPU(Params *params);
-
-    /**
-     * Derived ThreadContext class for use with the AlphaFullCPU.  It
-     * provides the interface for any external objects to access a
-     * single thread's state and some general CPU state.  Any time
-     * external objects try to update state through this interface,
-     * the CPU will create an event to squash all in-flight
-     * instructions in order to ensure state is maintained correctly.
-     * It must be defined specifically for the AlphaFullCPU because
-     * not all architectural state is located within the O3ThreadState
-     * (such as the commit PC, and registers), and specific actions
-     * must be taken when using this interface (such as squashing all
-     * in-flight instructions when doing a write to this interface).
-     */
-    class AlphaTC : public ThreadContext
-    {
-      public:
-        /** Pointer to the CPU. */
-        AlphaFullCPU<Impl> *cpu;
-
-        /** Pointer to the thread state that this TC corrseponds to. */
-        O3ThreadState<Impl> *thread;
-
-        /** Returns a pointer to this CPU. */
-        virtual BaseCPU *getCpuPtr() { return cpu; }
-
-        /** Sets this CPU's ID. */
-        virtual void setCpuId(int id) { cpu->cpu_id = id; }
-
-        /** Reads this CPU's ID. */
-        virtual int readCpuId() { return cpu->cpu_id; }
-
-#if FULL_SYSTEM
-        /** Returns a pointer to the system. */
-        virtual System *getSystemPtr() { return cpu->system; }
-
-        /** Returns a pointer to physical memory. */
-        virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; }
-
-        /** Returns a pointer to the ITB. */
-        virtual AlphaITB *getITBPtr() { return cpu->itb; }
-
-        /** Returns a pointer to the DTB. */
-        virtual AlphaDTB *getDTBPtr() { return cpu->dtb; }
-
-        /** Returns a pointer to this thread's kernel statistics. */
-        virtual Kernel::Statistics *getKernelStats()
-        { return thread->kernelStats; }
-
-        virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); }
-
-        virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL);
-
-        void delVirtPort(VirtualPort *vp);
-#else
-        virtual TranslatingPort *getMemPort() { return thread->getMemPort(); }
-
-        /** Returns a pointer to this thread's process. */
-        virtual Process *getProcessPtr() { return thread->getProcessPtr(); }
-#endif
-        /** Returns this thread's status. */
-        virtual Status status() const { return thread->status(); }
-
-        /** Sets this thread's status. */
-        virtual void setStatus(Status new_status)
-        { thread->setStatus(new_status); }
-
-        /** Set the status to Active.  Optional delay indicates number of
-         * cycles to wait before beginning execution. */
-        virtual void activate(int delay = 1);
-
-        /** Set the status to Suspended. */
-        virtual void suspend();
-
-        /** Set the status to Unallocated. */
-        virtual void deallocate();
-
-        /** Set the status to Halted. */
-        virtual void halt();
-
-#if FULL_SYSTEM
-        /** Dumps the function profiling information.
-         * @todo: Implement.
-         */
-        virtual void dumpFuncProfile();
-#endif
-        /** Takes over execution of a thread from another CPU. */
-        virtual void takeOverFrom(ThreadContext *old_context);
-
-        /** Registers statistics associated with this TC. */
-        virtual void regStats(const std::string &name);
-
-        /** Serializes state. */
-        virtual void serialize(std::ostream &os);
-        /** Unserializes state. */
-        virtual void unserialize(Checkpoint *cp, const std::string &section);
-
-#if FULL_SYSTEM
-        /** Returns pointer to the quiesce event. */
-        virtual EndQuiesceEvent *getQuiesceEvent();
-
-        /** Reads the last tick that this thread was activated on. */
-        virtual Tick readLastActivate();
-        /** Reads the last tick that this thread was suspended on. */
-        virtual Tick readLastSuspend();
-
-        /** Clears the function profiling information. */
-        virtual void profileClear();
-        /** Samples the function profiling information. */
-        virtual void profileSample();
-#endif
-        /** Returns this thread's ID number. */
-        virtual int getThreadNum() { return thread->readTid(); }
-
-        /** Returns the instruction this thread is currently committing.
-         *  Only used when an instruction faults.
-         */
-        virtual TheISA::MachInst getInst();
-
-        /** Copies the architectural registers from another TC into this TC. */
-        virtual void copyArchRegs(ThreadContext *tc);
-
-        /** Resets all architectural registers to 0. */
-        virtual void clearArchRegs();
-
-        /** Reads an integer register. */
-        virtual uint64_t readIntReg(int reg_idx);
-
-        virtual FloatReg readFloatReg(int reg_idx, int width);
-
-        virtual FloatReg readFloatReg(int reg_idx);
-
-        virtual FloatRegBits readFloatRegBits(int reg_idx, int width);
-
-        virtual FloatRegBits readFloatRegBits(int reg_idx);
-
-        /** Sets an integer register to a value. */
-        virtual void setIntReg(int reg_idx, uint64_t val);
-
-        virtual void setFloatReg(int reg_idx, FloatReg val, int width);
-
-        virtual void setFloatReg(int reg_idx, FloatReg val);
-
-        virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
-
-        virtual void setFloatRegBits(int reg_idx, FloatRegBits val);
-
-        /** Reads this thread's PC. */
-        virtual uint64_t readPC()
-        { return cpu->readPC(thread->readTid()); }
-
-        /** Sets this thread's PC. */
-        virtual void setPC(uint64_t val);
-
-        /** Reads this thread's next PC. */
-        virtual uint64_t readNextPC()
-        { return cpu->readNextPC(thread->readTid()); }
-
-        /** Sets this thread's next PC. */
-        virtual void setNextPC(uint64_t val);
-
-        virtual uint64_t readNextNPC()
-        {
-            panic("Alpha has no NextNPC!");
-            return 0;
-        }
-
-        virtual void setNextNPC(uint64_t val)
-        { }
-
-        /** Reads a miscellaneous register. */
-        virtual MiscReg readMiscReg(int misc_reg)
-        { return cpu->readMiscReg(misc_reg, thread->readTid()); }
-
-        /** Reads a misc. register, including any side-effects the
-         * read might have as defined by the architecture. */
-        virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
-        { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); }
-
-        /** Sets a misc. register. */
-        virtual Fault setMiscReg(int misc_reg, const MiscReg &val);
-
-        /** Sets a misc. register, including any side-effects the
-         * write might have as defined by the architecture. */
-        virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
-
-        /** Returns the number of consecutive store conditional failures. */
-        // @todo: Figure out where these store cond failures should go.
-        virtual unsigned readStCondFailures()
-        { return thread->storeCondFailures; }
-
-        /** Sets the number of consecutive store conditional failures. */
-        virtual void setStCondFailures(unsigned sc_failures)
-        { thread->storeCondFailures = sc_failures; }
-
-#if FULL_SYSTEM
-        /** Returns if the thread is currently in PAL mode, based on
-         * the PC's value. */
-        virtual bool inPalMode()
-        { return TheISA::PcPAL(cpu->readPC(thread->readTid())); }
-#endif
-        // Only really makes sense for old CPU model.  Lots of code
-        // outside the CPU still checks this function, so it will
-        // always return false to keep everything working.
-        /** Checks if the thread is misspeculating.  Because it is
-         * very difficult to determine if the thread is
-         * misspeculating, this is set as false. */
-        virtual bool misspeculating() { return false; }
-
-#if !FULL_SYSTEM
-        /** Gets a syscall argument by index. */
-        virtual IntReg getSyscallArg(int i);
-
-        /** Sets a syscall argument. */
-        virtual void setSyscallArg(int i, IntReg val);
-
-        /** Sets the syscall return value. */
-        virtual void setSyscallReturn(SyscallReturn return_value);
-
-        /** Executes a syscall in SE mode. */
-        virtual void syscall(int64_t callnum)
-        { return cpu->syscall(callnum, thread->readTid()); }
-
-        /** Reads the funcExeInst counter. */
-        virtual Counter readFuncExeInst() { return thread->funcExeInst; }
-#endif
-        virtual void changeRegFileContext(TheISA::RegFile::ContextParam param,
-                                          TheISA::RegFile::ContextVal val)
-        { panic("Not supported on Alpha!"); }
-    };
-
-#if FULL_SYSTEM
-    /** ITB pointer. */
-    AlphaITB *itb;
-    /** DTB pointer. */
-    AlphaDTB *dtb;
-#endif
-
-    /** Registers statistics. */
-    void regStats();
-
-#if FULL_SYSTEM
-    /** Translates instruction requestion. */
-    Fault translateInstReq(RequestPtr &req, Thread *thread)
-    {
-        return itb->translate(req, thread->getTC());
-    }
-
-    /** Translates data read request. */
-    Fault translateDataReadReq(RequestPtr &req, Thread *thread)
-    {
-        return dtb->translate(req, thread->getTC(), false);
-    }
-
-    /** Translates data write request. */
-    Fault translateDataWriteReq(RequestPtr &req, Thread *thread)
-    {
-        return dtb->translate(req, thread->getTC(), true);
-    }
-
-#else
-    /** Translates instruction requestion in syscall emulation mode. */
-    Fault translateInstReq(RequestPtr &req, Thread *thread)
-    {
-        return thread->getProcessPtr()->pTable->translate(req);
-    }
-
-    /** Translates data read request in syscall emulation mode. */
-    Fault translateDataReadReq(RequestPtr &req, Thread *thread)
-    {
-        return thread->getProcessPtr()->pTable->translate(req);
-    }
-
-    /** Translates data write request in syscall emulation mode. */
-    Fault translateDataWriteReq(RequestPtr &req, Thread *thread)
-    {
-        return thread->getProcessPtr()->pTable->translate(req);
-    }
-
-#endif
-    /** Reads a miscellaneous register. */
-    MiscReg readMiscReg(int misc_reg, unsigned tid);
-
-    /** Reads a misc. register, including any side effects the read
-     * might have as defined by the architecture.
-     */
-    MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid);
-
-    /** Sets a miscellaneous register. */
-    Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
-
-    /** Sets a misc. register, including any side effects the write
-     * might have as defined by the architecture.
-     */
-    Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
-
-    /** Initiates a squash of all in-flight instructions for a given
-     * thread.  The source of the squash is an external update of
-     * state through the TC.
-     */
-    void squashFromTC(unsigned tid);
-
-#if FULL_SYSTEM
-    /** Posts an interrupt. */
-    void post_interrupt(int int_num, int index);
-    /** Reads the interrupt flag. */
-    int readIntrFlag();
-    /** Sets the interrupt flags. */
-    void setIntrFlag(int val);
-    /** HW return from error interrupt. */
-    Fault hwrei(unsigned tid);
-    /** Returns if a specific PC is a PAL mode PC. */
-    bool inPalMode(uint64_t PC)
-    { return AlphaISA::PcPAL(PC); }
-
-    /** Traps to handle given fault. */
-    void trap(Fault fault, unsigned tid);
-    bool simPalCheck(int palFunc, unsigned tid);
-
-    /** Processes any interrupts. */
-    void processInterrupts();
-
-    /** Halts the CPU. */
-    void halt() { panic("Halt not implemented!\n"); }
-#endif
-
-
-#if !FULL_SYSTEM
-    /** Executes a syscall.
-     * @todo: Determine if this needs to be virtual.
-     */
-    void syscall(int64_t callnum, int tid);
-    /** Gets a syscall argument. */
-    IntReg getSyscallArg(int i, int tid);
-
-    /** Used to shift args for indirect syscall. */
-    void setSyscallArg(int i, IntReg val, int tid);
-
-    /** Sets the return value of a syscall. */
-    void setSyscallReturn(SyscallReturn return_value, int tid);
-#endif
-
-    /** CPU read function, forwards read to LSQ. */
-    template <class T>
-    Fault read(RequestPtr &req, T &data, int load_idx)
-    {
-        return this->iew.ldstQueue.read(req, data, load_idx);
-    }
-
-    /** CPU write function, forwards write to LSQ. */
-    template <class T>
-    Fault write(RequestPtr &req, T &data, int store_idx)
-    {
-        return this->iew.ldstQueue.write(req, data, store_idx);
-    }
-
-    Addr lockAddr;
-
-    /** Temporary fix for the lock flag, works in the UP case. */
-    bool lockFlag;
-};
-
-#endif // __CPU_O3_ALPHA_FULL_CPU_HH__
diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh
deleted file mode 100644
index 98290e57f..000000000
--- a/src/cpu/o3/alpha_cpu_impl.hh
+++ /dev/null
@@ -1,872 +0,0 @@
-/*
- * Copyright (c) 2004-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Kevin Lim
- */
-
-#include "arch/alpha/faults.hh"
-#include "base/cprintf.hh"
-#include "base/statistics.hh"
-#include "base/timebuf.hh"
-#include "cpu/checker/thread_context.hh"
-#include "sim/sim_events.hh"
-#include "sim/stats.hh"
-
-#include "cpu/o3/alpha_cpu.hh"
-#include "cpu/o3/alpha_params.hh"
-#include "cpu/o3/comm.hh"
-#include "cpu/o3/thread_state.hh"
-
-#if FULL_SYSTEM
-#include "arch/alpha/osfpal.hh"
-#include "arch/isa_traits.hh"
-#include "cpu/quiesce_event.hh"
-#include "kern/kernel_stats.hh"
-#include "sim/system.hh"
-#endif
-
-using namespace TheISA;
-
-template <class Impl>
-AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
-#if FULL_SYSTEM
-    : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb)
-#else
-    : FullO3CPU<Impl>(params)
-#endif
-{
-    DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n");
-
-    // Setup any thread state.
-    this->thread.resize(this->numThreads);
-
-    for (int i = 0; i < this->numThreads; ++i) {
-#if FULL_SYSTEM
-        // SMT is not supported in FS mode yet.
-        assert(this->numThreads == 1);
-        this->thread[i] = new Thread(this, 0);
-        this->thread[i]->setStatus(ThreadContext::Suspended);
-#else
-        if (i < params->workload.size()) {
-            DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x",
-                    i, this->thread[i]);
-            this->thread[i] = new Thread(this, i, params->workload[i],
-                                         i, params->mem);
-
-            this->thread[i]->setStatus(ThreadContext::Suspended);
-
-#if !FULL_SYSTEM
-            /* Use this port to for syscall emulation writes to memory. */
-            Port *mem_port;
-            TranslatingPort *trans_port;
-            trans_port = new TranslatingPort(csprintf("%s-%d-funcport",
-                                                      name(), i),
-                                             params->workload[i]->pTable,
-                                             false);
-            mem_port = params->mem->getPort("functional");
-            mem_port->setPeer(trans_port);
-            trans_port->setPeer(mem_port);
-            this->thread[i]->setMemPort(trans_port);
-#endif
-            //usedTids[i] = true;
-            //threadMap[i] = i;
-        } else {
-            //Allocate Empty thread so M5 can use later
-            //when scheduling threads to CPU
-            Process* dummy_proc = NULL;
-
-            this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem);
-            //usedTids[i] = false;
-        }
-#endif // !FULL_SYSTEM
-
-        ThreadContext *tc;
-
-        // Setup the TC that will serve as the interface to the threads/CPU.
-        AlphaTC *alpha_tc = new AlphaTC;
-
-        // If we're using a checker, then the TC should be the
-        // CheckerThreadContext.
-        if (params->checker) {
-            tc = new CheckerThreadContext<AlphaTC>(
-                alpha_tc, this->checker);
-        } else {
-            tc = alpha_tc;
-        }
-
-        alpha_tc->cpu = this;
-        alpha_tc->thread = this->thread[i];
-
-#if FULL_SYSTEM
-        // Setup quiesce event.
-        this->thread[i]->quiesceEvent = new EndQuiesceEvent(tc);
-
-        Port *mem_port;
-        FunctionalPort *phys_port;
-        VirtualPort *virt_port;
-        phys_port = new FunctionalPort(csprintf("%s-%d-funcport",
-                                                name(), i));
-        mem_port = this->system->physmem->getPort("functional");
-        mem_port->setPeer(phys_port);
-        phys_port->setPeer(mem_port);
-
-        virt_port = new VirtualPort(csprintf("%s-%d-vport",
-                                             name(), i));
-        mem_port = this->system->physmem->getPort("functional");
-        mem_port->setPeer(virt_port);
-        virt_port->setPeer(mem_port);
-
-        this->thread[i]->setPhysPort(phys_port);
-        this->thread[i]->setVirtPort(virt_port);
-#endif
-        // Give the thread the TC.
-        this->thread[i]->tc = tc;
-
-        // Add the TC to the CPU's list of TC's.
-        this->threadContexts.push_back(tc);
-    }
-
-    for (int i=0; i < this->numThreads; i++) {
-        this->thread[i]->setFuncExeInst(0);
-    }
-
-    // Sets CPU pointers. These must be set at this level because the CPU
-    // pointers are defined to be the highest level of CPU class.
-    this->fetch.setCPU(this);
-    this->decode.setCPU(this);
-    this->rename.setCPU(this);
-    this->iew.setCPU(this);
-    this->commit.setCPU(this);
-
-    this->rob.setCPU(this);
-    this->regFile.setCPU(this);
-
-    lockAddr = 0;
-    lockFlag = false;
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::regStats()
-{
-    // Register stats for everything that has stats.
-    this->fullCPURegStats();
-    this->fetch.regStats();
-    this->decode.regStats();
-    this->rename.regStats();
-    this->iew.regStats();
-    this->commit.regStats();
-}
-
-#if FULL_SYSTEM
-template <class Impl>
-VirtualPort *
-AlphaFullCPU<Impl>::AlphaTC::getVirtPort(ThreadContext *src_tc)
-{
-    if (!src_tc)
-        return thread->getVirtPort();
-
-    VirtualPort *vp;
-    Port *mem_port;
-
-    vp = new VirtualPort("tc-vport", src_tc);
-    mem_port = cpu->system->physmem->getPort("functional");
-    mem_port->setPeer(vp);
-    vp->setPeer(mem_port);
-    return vp;
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::dumpFuncProfile()
-{
-    // Currently not supported
-}
-#endif
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::takeOverFrom(ThreadContext *old_context)
-{
-    // some things should already be set up
-#if FULL_SYSTEM
-    assert(getSystemPtr() == old_context->getSystemPtr());
-#else
-    assert(getProcessPtr() == old_context->getProcessPtr());
-#endif
-
-    // copy over functional state
-    setStatus(old_context->status());
-    copyArchRegs(old_context);
-    setCpuId(old_context->readCpuId());
-
-#if !FULL_SYSTEM
-    thread->funcExeInst = old_context->readFuncExeInst();
-#else
-    EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent();
-    if (other_quiesce) {
-        // Point the quiesce event's TC at this TC so that it wakes up
-        // the proper CPU.
-        other_quiesce->tc = this;
-    }
-    if (thread->quiesceEvent) {
-        thread->quiesceEvent->tc = this;
-    }
-
-    // Transfer kernel stats from one CPU to the other.
-    thread->kernelStats = old_context->getKernelStats();
-//    storeCondFailures = 0;
-    cpu->lockFlag = false;
-#endif
-
-    old_context->setStatus(ThreadContext::Unallocated);
-
-    thread->inSyscall = false;
-    thread->trapPending = false;
-}
-
-#if FULL_SYSTEM
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::delVirtPort(VirtualPort *vp)
-{
-    delete vp->getPeer();
-    delete vp;
-}
-#endif
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::activate(int delay)
-{
-    DPRINTF(FullCPU, "Calling activate on AlphaTC\n");
-
-    if (thread->status() == ThreadContext::Active)
-        return;
-
-#if FULL_SYSTEM
-    thread->lastActivate = curTick;
-#endif
-
-    if (thread->status() == ThreadContext::Unallocated) {
-        cpu->activateWhenReady(thread->readTid());
-        return;
-    }
-
-    thread->setStatus(ThreadContext::Active);
-
-    // status() == Suspended
-    cpu->activateContext(thread->readTid(), delay);
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::suspend()
-{
-    DPRINTF(FullCPU, "Calling suspend on AlphaTC\n");
-
-    if (thread->status() == ThreadContext::Suspended)
-        return;
-
-#if FULL_SYSTEM
-    thread->lastActivate = curTick;
-    thread->lastSuspend = curTick;
-#endif
-/*
-#if FULL_SYSTEM
-    // Don't change the status from active if there are pending interrupts
-    if (cpu->check_interrupts()) {
-        assert(status() == ThreadContext::Active);
-        return;
-    }
-#endif
-*/
-    thread->setStatus(ThreadContext::Suspended);
-    cpu->suspendContext(thread->readTid());
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::deallocate()
-{
-    DPRINTF(FullCPU, "Calling deallocate on AlphaTC\n");
-
-    if (thread->status() == ThreadContext::Unallocated)
-        return;
-
-    thread->setStatus(ThreadContext::Unallocated);
-    cpu->deallocateContext(thread->readTid());
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::halt()
-{
-    DPRINTF(FullCPU, "Calling halt on AlphaTC\n");
-
-    if (thread->status() == ThreadContext::Halted)
-        return;
-
-    thread->setStatus(ThreadContext::Halted);
-    cpu->haltContext(thread->readTid());
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::regStats(const std::string &name)
-{
-#if FULL_SYSTEM
-    thread->kernelStats = new Kernel::Statistics(cpu->system);
-    thread->kernelStats->regStats(name + ".kern");
-#endif
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::serialize(std::ostream &os)
-{
-#if FULL_SYSTEM
-    if (thread->kernelStats)
-        thread->kernelStats->serialize(os);
-#endif
-
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::unserialize(Checkpoint *cp, const std::string &section)
-{
-#if FULL_SYSTEM
-    if (thread->kernelStats)
-        thread->kernelStats->unserialize(cp, section);
-#endif
-
-}
-
-#if FULL_SYSTEM
-template <class Impl>
-EndQuiesceEvent *
-AlphaFullCPU<Impl>::AlphaTC::getQuiesceEvent()
-{
-    return thread->quiesceEvent;
-}
-
-template <class Impl>
-Tick
-AlphaFullCPU<Impl>::AlphaTC::readLastActivate()
-{
-    return thread->lastActivate;
-}
-
-template <class Impl>
-Tick
-AlphaFullCPU<Impl>::AlphaTC::readLastSuspend()
-{
-    return thread->lastSuspend;
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::profileClear()
-{}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::profileSample()
-{}
-#endif
-
-template <class Impl>
-TheISA::MachInst
-AlphaFullCPU<Impl>::AlphaTC:: getInst()
-{
-    return thread->getInst();
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::copyArchRegs(ThreadContext *tc)
-{
-    // This function will mess things up unless the ROB is empty and
-    // there are no instructions in the pipeline.
-    unsigned tid = thread->readTid();
-    PhysRegIndex renamed_reg;
-
-    // First loop through the integer registers.
-    for (int i = 0; i < AlphaISA::NumIntRegs; ++i) {
-        renamed_reg = cpu->renameMap[tid].lookup(i);
-
-        DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, "
-                "now has data %lli.\n",
-                renamed_reg, cpu->readIntReg(renamed_reg),
-                tc->readIntReg(i));
-
-        cpu->setIntReg(renamed_reg, tc->readIntReg(i));
-    }
-
-    // Then loop through the floating point registers.
-    for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) {
-        renamed_reg = cpu->renameMap[tid].lookup(i + AlphaISA::FP_Base_DepTag);
-        cpu->setFloatRegBits(renamed_reg,
-                             tc->readFloatRegBits(i));
-    }
-
-    // Copy the misc regs.
-    copyMiscRegs(tc, this);
-
-    // Then finally set the PC and the next PC.
-    cpu->setPC(tc->readPC(), tid);
-    cpu->setNextPC(tc->readNextPC(), tid);
-#if !FULL_SYSTEM
-    this->thread->funcExeInst = tc->readFuncExeInst();
-#endif
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::clearArchRegs()
-{}
-
-template <class Impl>
-uint64_t
-AlphaFullCPU<Impl>::AlphaTC::readIntReg(int reg_idx)
-{
-    return cpu->readArchIntReg(reg_idx, thread->readTid());
-}
-
-template <class Impl>
-FloatReg
-AlphaFullCPU<Impl>::AlphaTC::readFloatReg(int reg_idx, int width)
-{
-    switch(width) {
-      case 32:
-        return cpu->readArchFloatRegSingle(reg_idx, thread->readTid());
-      case 64:
-        return cpu->readArchFloatRegDouble(reg_idx, thread->readTid());
-      default:
-        panic("Unsupported width!");
-        return 0;
-    }
-}
-
-template <class Impl>
-FloatReg
-AlphaFullCPU<Impl>::AlphaTC::readFloatReg(int reg_idx)
-{
-    return cpu->readArchFloatRegSingle(reg_idx, thread->readTid());
-}
-
-template <class Impl>
-FloatRegBits
-AlphaFullCPU<Impl>::AlphaTC::readFloatRegBits(int reg_idx, int width)
-{
-    DPRINTF(Fault, "Reading floatint register through the TC!\n");
-    return cpu->readArchFloatRegInt(reg_idx, thread->readTid());
-}
-
-template <class Impl>
-FloatRegBits
-AlphaFullCPU<Impl>::AlphaTC::readFloatRegBits(int reg_idx)
-{
-    return cpu->readArchFloatRegInt(reg_idx, thread->readTid());
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::setIntReg(int reg_idx, uint64_t val)
-{
-    cpu->setArchIntReg(reg_idx, val, thread->readTid());
-
-    // Squash if we're not already in a state update mode.
-    if (!thread->trapPending && !thread->inSyscall) {
-        cpu->squashFromTC(thread->readTid());
-    }
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::setFloatReg(int reg_idx, FloatReg val, int width)
-{
-    switch(width) {
-      case 32:
-        cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid());
-        break;
-      case 64:
-        cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid());
-        break;
-    }
-
-    // Squash if we're not already in a state update mode.
-    if (!thread->trapPending && !thread->inSyscall) {
-        cpu->squashFromTC(thread->readTid());
-    }
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::setFloatReg(int reg_idx, FloatReg val)
-{
-    cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid());
-
-    if (!thread->trapPending && !thread->inSyscall) {
-        cpu->squashFromTC(thread->readTid());
-    }
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val,
-                                             int width)
-{
-    DPRINTF(Fault, "Setting floatint register through the TC!\n");
-    cpu->setArchFloatRegInt(reg_idx, val, thread->readTid());
-
-    // Squash if we're not already in a state update mode.
-    if (!thread->trapPending && !thread->inSyscall) {
-        cpu->squashFromTC(thread->readTid());
-    }
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val)
-{
-    cpu->setArchFloatRegInt(reg_idx, val, thread->readTid());
-
-    // Squash if we're not already in a state update mode.
-    if (!thread->trapPending && !thread->inSyscall) {
-        cpu->squashFromTC(thread->readTid());
-    }
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::setPC(uint64_t val)
-{
-    cpu->setPC(val, thread->readTid());
-
-    // Squash if we're not already in a state update mode.
-    if (!thread->trapPending && !thread->inSyscall) {
-        cpu->squashFromTC(thread->readTid());
-    }
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::setNextPC(uint64_t val)
-{
-    cpu->setNextPC(val, thread->readTid());
-
-    // Squash if we're not already in a state update mode.
-    if (!thread->trapPending && !thread->inSyscall) {
-        cpu->squashFromTC(thread->readTid());
-    }
-}
-
-template <class Impl>
-Fault
-AlphaFullCPU<Impl>::AlphaTC::setMiscReg(int misc_reg, const MiscReg &val)
-{
-    Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid());
-
-    // Squash if we're not already in a state update mode.
-    if (!thread->trapPending && !thread->inSyscall) {
-        cpu->squashFromTC(thread->readTid());
-    }
-
-    return ret_fault;
-}
-
-template <class Impl>
-Fault
-AlphaFullCPU<Impl>::AlphaTC::setMiscRegWithEffect(int misc_reg,
-                                                  const MiscReg &val)
-{
-    Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val,
-                                                thread->readTid());
-
-    // Squash if we're not already in a state update mode.
-    if (!thread->trapPending && !thread->inSyscall) {
-        cpu->squashFromTC(thread->readTid());
-    }
-
-    return ret_fault;
-}
-
-#if !FULL_SYSTEM
-
-template <class Impl>
-TheISA::IntReg
-AlphaFullCPU<Impl>::AlphaTC::getSyscallArg(int i)
-{
-    return cpu->getSyscallArg(i, thread->readTid());
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::setSyscallArg(int i, IntReg val)
-{
-    cpu->setSyscallArg(i, val, thread->readTid());
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaTC::setSyscallReturn(SyscallReturn return_value)
-{
-    cpu->setSyscallReturn(return_value, thread->readTid());
-}
-
-#endif // FULL_SYSTEM
-
-template <class Impl>
-MiscReg
-AlphaFullCPU<Impl>::readMiscReg(int misc_reg, unsigned tid)
-{
-    return this->regFile.readMiscReg(misc_reg, tid);
-}
-
-template <class Impl>
-MiscReg
-AlphaFullCPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault,
-                                          unsigned tid)
-{
-    return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid);
-}
-
-template <class Impl>
-Fault
-AlphaFullCPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid)
-{
-    return this->regFile.setMiscReg(misc_reg, val, tid);
-}
-
-template <class Impl>
-Fault
-AlphaFullCPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val,
-                                         unsigned tid)
-{
-    return this->regFile.setMiscRegWithEffect(misc_reg, val, tid);
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::squashFromTC(unsigned tid)
-{
-    this->thread[tid]->inSyscall = true;
-    this->commit.generateTCEvent(tid);
-}
-
-#if FULL_SYSTEM
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::post_interrupt(int int_num, int index)
-{
-    BaseCPU::post_interrupt(int_num, index);
-
-    if (this->thread[0]->status() == ThreadContext::Suspended) {
-        DPRINTF(IPI,"Suspended Processor awoke\n");
-        this->threadContexts[0]->activate();
-    }
-}
-
-template <class Impl>
-int
-AlphaFullCPU<Impl>::readIntrFlag()
-{
-    return this->regFile.readIntrFlag();
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::setIntrFlag(int val)
-{
-    this->regFile.setIntrFlag(val);
-}
-
-template <class Impl>
-Fault
-AlphaFullCPU<Impl>::hwrei(unsigned tid)
-{
-    // Need to clear the lock flag upon returning from an interrupt.
-    this->lockFlag = false;
-
-    this->thread[tid]->kernelStats->hwrei();
-
-    this->checkInterrupts = true;
-
-    // FIXME: XXX check for interrupts? XXX
-    return NoFault;
-}
-
-template <class Impl>
-bool
-AlphaFullCPU<Impl>::simPalCheck(int palFunc, unsigned tid)
-{
-    if (this->thread[tid]->kernelStats)
-        this->thread[tid]->kernelStats->callpal(palFunc,
-                                                this->threadContexts[tid]);
-
-    switch (palFunc) {
-      case PAL::halt:
-        halt();
-        if (--System::numSystemsRunning == 0)
-            exitSimLoop("all cpus halted");
-        break;
-
-      case PAL::bpt:
-      case PAL::bugchk:
-        if (this->system->breakpoint())
-            return false;
-        break;
-    }
-
-    return true;
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::trap(Fault fault, unsigned tid)
-{
-    // Pass the thread's TC into the invoke method.
-    fault->invoke(this->threadContexts[tid]);
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::processInterrupts()
-{
-    // Check for interrupts here.  For now can copy the code that
-    // exists within isa_fullsys_traits.hh.  Also assume that thread 0
-    // is the one that handles the interrupts.
-    // @todo: Possibly consolidate the interrupt checking code.
-    // @todo: Allow other threads to handle interrupts.
-
-    // Check if there are any outstanding interrupts
-    //Handle the interrupts
-    int ipl = 0;
-    int summary = 0;
-
-    this->checkInterrupts = false;
-
-    if (this->readMiscReg(IPR_ASTRR, 0))
-        panic("asynchronous traps not implemented\n");
-
-    if (this->readMiscReg(IPR_SIRR, 0)) {
-        for (int i = INTLEVEL_SOFTWARE_MIN;
-             i < INTLEVEL_SOFTWARE_MAX; i++) {
-            if (this->readMiscReg(IPR_SIRR, 0) & (ULL(1) << i)) {
-                // See table 4-19 of the 21164 hardware reference
-                ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
-                summary |= (ULL(1) << i);
-            }
-        }
-    }
-
-    uint64_t interrupts = this->intr_status();
-
-    if (interrupts) {
-        for (int i = INTLEVEL_EXTERNAL_MIN;
-             i < INTLEVEL_EXTERNAL_MAX; i++) {
-            if (interrupts & (ULL(1) << i)) {
-                // See table 4-19 of the 21164 hardware reference
-                ipl = i;
-                summary |= (ULL(1) << i);
-            }
-        }
-    }
-
-    if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) {
-        this->setMiscReg(IPR_ISR, summary, 0);
-        this->setMiscReg(IPR_INTID, ipl, 0);
-        // Checker needs to know these two registers were updated.
-        if (this->checker) {
-            this->checker->threadBase()->setMiscReg(IPR_ISR, summary);
-            this->checker->threadBase()->setMiscReg(IPR_INTID, ipl);
-        }
-        this->trap(Fault(new InterruptFault), 0);
-        DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
-                this->readMiscReg(IPR_IPLR, 0), ipl, summary);
-    }
-}
-
-#endif // FULL_SYSTEM
-
-#if !FULL_SYSTEM
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::syscall(int64_t callnum, int tid)
-{
-    DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid);
-
-    DPRINTF(Activity,"Activity: syscall() called.\n");
-
-    // Temporarily increase this by one to account for the syscall
-    // instruction.
-    ++(this->thread[tid]->funcExeInst);
-
-    // Execute the actual syscall.
-    this->thread[tid]->syscall(callnum);
-
-    // Decrease funcExeInst by one as the normal commit will handle
-    // incrementing it.
-    --(this->thread[tid]->funcExeInst);
-}
-
-template <class Impl>
-TheISA::IntReg
-AlphaFullCPU<Impl>::getSyscallArg(int i, int tid)
-{
-    return this->readArchIntReg(AlphaISA::ArgumentReg0 + i, tid);
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::setSyscallArg(int i, IntReg val, int tid)
-{
-    this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid);
-}
-
-template <class Impl>
-void
-AlphaFullCPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
-{
-    // check for error condition.  Alpha syscall convention is to
-    // indicate success/failure in reg a3 (r19) and put the
-    // return value itself in the standard return value reg (v0).
-    if (return_value.successful()) {
-        // no error
-        this->setArchIntReg(SyscallSuccessReg, 0, tid);
-        this->setArchIntReg(ReturnValueReg, return_value.value(), tid);
-    } else {
-        // got an error, return details
-        this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid);
-        this->setArchIntReg(ReturnValueReg, -return_value.value(), tid);
-    }
-}
-#endif
diff --git a/src/cpu/o3/base_dyn_inst.cc b/src/cpu/o3/base_dyn_inst.cc
new file mode 100644
index 000000000..0979c5c8f
--- /dev/null
+++ b/src/cpu/o3/base_dyn_inst.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#include "cpu/base_dyn_inst_impl.hh"
+#include "cpu/o3/isa_specific.hh"
+
+// Explicit instantiation
+template class BaseDynInst<O3CPUImpl>;
+
+template <>
+int
+BaseDynInst<O3CPUImpl>::instcount = 0;
diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc
index b33543bdc..08fd4e8ea 100644
--- a/src/cpu/o3/bpred_unit.cc
+++ b/src/cpu/o3/bpred_unit.cc
@@ -29,11 +29,6 @@
  */
 
 #include "cpu/o3/bpred_unit_impl.hh"
-#include "cpu/o3/alpha_impl.hh"
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/ozone/ozone_impl.hh"
-//#include "cpu/ozone/simple_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 
-template class BPredUnit<AlphaSimpleImpl>;
-template class BPredUnit<OzoneImpl>;
-//template class BPredUnit<SimpleImpl>;
+template class BPredUnit<O3CPUImpl>;
diff --git a/src/cpu/checker/o3_cpu_builder.cc b/src/cpu/o3/checker_builder.cc
index 59a6c7158..782d963b0 100644
--- a/src/cpu/checker/o3_cpu_builder.cc
+++ b/src/cpu/o3/checker_builder.cc
@@ -30,16 +30,19 @@
 
 #include <string>
 
-#include "cpu/checker/cpu.hh"
+#include "cpu/checker/cpu_impl.hh"
 #include "cpu/inst_seq.hh"
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/alpha/dyn_inst.hh"
+#include "cpu/o3/alpha/impl.hh"
 #include "sim/builder.hh"
 #include "sim/process.hh"
 #include "sim/sim_object.hh"
 
 class MemObject;
 
+template
+class Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >;
+
 /**
  * Specific non-templated derived class used for SimObject configuration.
  */
@@ -75,6 +78,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
 
     Param<bool> defer_registration;
     Param<bool> exitOnError;
+    Param<bool> warnOnlyOnLoadError;
     Param<bool> function_trace;
     Param<Tick> function_trace_start;
 
@@ -105,6 +109,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
 
     INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
     INIT_PARAM(exitOnError, "exit on error"),
+    INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
+                    "result errors", false),
     INIT_PARAM(function_trace, "Enable function trace"),
     INIT_PARAM(function_trace_start, "Cycle to start function trace")
 
@@ -121,6 +127,7 @@ CREATE_SIM_OBJECT(O3Checker)
     params->max_loads_any_thread = 0;
     params->max_loads_all_threads = 0;
     params->exitOnError = exitOnError;
+    params->warnOnlyOnLoadError = warnOnlyOnLoadError;
     params->deferRegistration = defer_registration;
     params->functionTrace = function_trace;
     params->functionTraceStart = function_trace_start;
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index 770008a33..637d59f52 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -28,8 +28,7 @@
  * Authors: Kevin Lim
  */
 
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 #include "cpu/o3/commit_impl.hh"
 
-template class DefaultCommit<AlphaSimpleImpl>;
+template class DefaultCommit<O3CPUImpl>;
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index b7404c488..956b6ec3e 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Kevin Lim
+ *          Korey Sewell
  */
 
 #ifndef __CPU_O3_COMMIT_HH__
@@ -67,7 +68,7 @@ class DefaultCommit
 {
   public:
     // Typedefs from the Impl.
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
     typedef typename Impl::DynInstPtr DynInstPtr;
     typedef typename Impl::Params Params;
     typedef typename Impl::CPUPol CPUPol;
@@ -145,7 +146,7 @@ class DefaultCommit
     void regStats();
 
     /** Sets the CPU pointer. */
-    void setCPU(FullCPU *cpu_ptr);
+    void setCPU(O3CPU *cpu_ptr);
 
     /** Sets the list of threads. */
     void setThreads(std::vector<Thread *> &threads);
@@ -161,10 +162,6 @@ class DefaultCommit
     /** Sets the pointer to the queue coming from IEW. */
     void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
 
-    void setFetchStage(Fetch *fetch_stage);
-
-    Fetch *fetchStage;
-
     /** Sets the pointer to the IEW stage. */
     void setIEWStage(IEW *iew_stage);
 
@@ -186,11 +183,14 @@ class DefaultCommit
     /** Initializes stage by sending back the number of free entries. */
     void initStage();
 
-    /** Initializes the switching out of commit. */
-    void switchOut();
+    /** Initializes the draining of commit. */
+    bool drain();
+
+    /** Resumes execution after draining. */
+    void resume();
 
     /** Completes the switch out of commit. */
-    void doSwitchOut();
+    void switchOut();
 
     /** Takes over from another CPU's thread. */
     void takeOverFrom();
@@ -280,12 +280,20 @@ class DefaultCommit
     /** Sets the PC of a specific thread. */
     void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
 
-    /** Reads the PC of a specific thread. */
+    /** Reads the next PC of a specific thread. */
     uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
 
     /** Sets the next PC of a specific thread. */
     void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
 
+#if THE_ISA != ALPHA_ISA
+    /** Reads the next NPC of a specific thread. */
+    uint64_t readNextPC(unsigned tid) { return nextNPC[tid]; }
+
+    /** Sets the next NPC of a specific thread. */
+    void setNextPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; }
+#endif
+
   private:
     /** Time buffer interface. */
     TimeBuffer<TimeStruct> *timeBuffer;
@@ -317,16 +325,12 @@ class DefaultCommit
     ROB *rob;
 
   private:
-    /** Pointer to FullCPU. */
-    FullCPU *cpu;
+    /** Pointer to O3CPU. */
+    O3CPU *cpu;
 
     /** Vector of all of the threads. */
     std::vector<Thread *> thread;
 
-    Fault fetchFault;
-
-    int fetchTrapWait;
-
     /** Records that commit has written to the time buffer this cycle. Used for
      * the CPU to determine if it can deschedule itself if there is no activity.
      */
@@ -365,11 +369,6 @@ class DefaultCommit
      */
     unsigned renameWidth;
 
-    /** IEW width, in instructions.  Used so ROB knows how many
-     *  instructions to get from the IEW instruction queue.
-     */
-    unsigned iewWidth;
-
     /** Commit width, in instructions. */
     unsigned commitWidth;
 
@@ -379,8 +378,8 @@ class DefaultCommit
     /** Number of Active Threads */
     unsigned numThreads;
 
-    /** Is a switch out pending. */
-    bool switchPending;
+    /** Is a drain pending. */
+    bool drainPending;
 
     /** Is commit switched out. */
     bool switchedOut;
@@ -390,10 +389,6 @@ class DefaultCommit
      */
     Tick trapLatency;
 
-    Tick fetchTrapLatency;
-
-    Tick fetchFaultTick;
-
     /** The commit PC of each thread.  Refers to the instruction that
      * is currently being processed/committed.
      */
@@ -402,6 +397,11 @@ class DefaultCommit
     /** The next PC of each thread. */
     Addr nextPC[Impl::MaxThreads];
 
+#if THE_ISA != ALPHA_ISA
+    /** The next NPC of each thread. */
+    Addr nextNPC[Impl::MaxThreads];
+#endif
+
     /** The sequence number of the youngest valid instruction in the ROB. */
     InstSeqNum youngestSeqNum[Impl::MaxThreads];
 
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index ceb2918e0..904af1071 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -28,16 +28,22 @@
  * Authors: Kevin Lim
  */
 
+#include "config/full_system.hh"
+#include "config/use_checker.hh"
+
 #include <algorithm>
 #include <string>
 
 #include "base/loader/symtab.hh"
 #include "base/timebuf.hh"
-#include "cpu/checker/cpu.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/o3/commit.hh"
 #include "cpu/o3/thread_state.hh"
 
+#if USE_CHECKER
+#include "cpu/checker/cpu.hh"
+#endif
+
 using namespace std;
 
 template <class Impl>
@@ -72,13 +78,11 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
       renameToROBDelay(params->renameToROBDelay),
       fetchToCommitDelay(params->commitToFetchDelay),
       renameWidth(params->renameWidth),
-      iewWidth(params->executeWidth),
       commitWidth(params->commitWidth),
       numThreads(params->numberOfThreads),
-      switchPending(false),
+      drainPending(false),
       switchedOut(false),
-      trapLatency(params->trapLatency),
-      fetchTrapLatency(params->fetchTrapLatency)
+      trapLatency(params->trapLatency)
 {
     _status = Active;
     _nextStatus = Inactive;
@@ -118,9 +122,6 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
         tcSquash[i] = false;
         PC[i] = nextPC[i] = 0;
     }
-
-    fetchFaultTick = 0;
-    fetchTrapWait = 0;
 }
 
 template <class Impl>
@@ -205,19 +206,6 @@ DefaultCommit<Impl>::regStats()
         .flags(total)
         ;
 
-    //
-    //  Commit-Eligible instructions...
-    //
-    //  -> The number of instructions eligible to commit in those
-    //  cycles where we reached our commit BW limit (less the number
-    //  actually committed)
-    //
-    //  -> The average value is computed over ALL CYCLES... not just
-    //  the BW limited cycles
-    //
-    //  -> The standard deviation is computed only over cycles where
-    //  we reached the BW limit
-    //
     commitEligible
         .init(cpu->number_of_threads)
         .name(name() + ".COM:bw_limited")
@@ -233,17 +221,16 @@ DefaultCommit<Impl>::regStats()
 
 template <class Impl>
 void
-DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr)
+DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr)
 {
     DPRINTF(Commit, "Commit: Setting CPU pointer.\n");
     cpu = cpu_ptr;
 
     // Commit must broadcast the number of free entries it has at the start of
     // the simulation, so it starts as active.
-    cpu->activateStage(FullCPU::CommitIdx);
+    cpu->activateStage(O3CPU::CommitIdx);
 
     trapLatency = cpu->cycles(trapLatency);
-    fetchTrapLatency = cpu->cycles(fetchTrapLatency);
 }
 
 template <class Impl>
@@ -302,13 +289,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
 
 template <class Impl>
 void
-DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage)
-{
-    fetchStage = fetch_stage;
-}
-
-template <class Impl>
-void
 DefaultCommit<Impl>::setIEWStage(IEW *iew_stage)
 {
     iewStage = iew_stage;
@@ -358,23 +338,38 @@ DefaultCommit<Impl>::initStage()
 }
 
 template <class Impl>
-void
-DefaultCommit<Impl>::switchOut()
+bool
+DefaultCommit<Impl>::drain()
 {
-    switchPending = true;
+    drainPending = true;
+
+    // If it's already drained, return true.
+    if (rob->isEmpty() && !iewStage->hasStoresToWB()) {
+        cpu->signalDrained();
+        return true;
+    }
+
+    return false;
 }
 
 template <class Impl>
 void
-DefaultCommit<Impl>::doSwitchOut()
+DefaultCommit<Impl>::switchOut()
 {
     switchedOut = true;
-    switchPending = false;
+    drainPending = false;
     rob->switchOut();
 }
 
 template <class Impl>
 void
+DefaultCommit<Impl>::resume()
+{
+    drainPending = false;
+}
+
+template <class Impl>
+void
 DefaultCommit<Impl>::takeOverFrom()
 {
     switchedOut = false;
@@ -409,10 +404,10 @@ DefaultCommit<Impl>::updateStatus()
 
     if (_nextStatus == Inactive && _status == Active) {
         DPRINTF(Activity, "Deactivating stage.\n");
-        cpu->deactivateStage(FullCPU::CommitIdx);
+        cpu->deactivateStage(O3CPU::CommitIdx);
     } else if (_nextStatus == Active && _status == Inactive) {
         DPRINTF(Activity, "Activating stage.\n");
-        cpu->activateStage(FullCPU::CommitIdx);
+        cpu->activateStage(O3CPU::CommitIdx);
     }
 
     _status = _nextStatus;
@@ -434,7 +429,7 @@ DefaultCommit<Impl>::setNextStatus()
         }
     }
 
-    assert(squashes == squashCounter);
+    squashCounter = squashes;
 
     // If commit is currently squashing, then it will have activity for the
     // next cycle. Set its next status as active.
@@ -539,8 +534,6 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid)
 
     commitStatus[tid] = ROBSquashing;
     cpu->activityThisCycle();
-
-    ++squashCounter;
 }
 
 template <class Impl>
@@ -558,8 +551,6 @@ DefaultCommit<Impl>::squashFromTC(unsigned tid)
     cpu->activityThisCycle();
 
     tcSquash[tid] = false;
-
-    ++squashCounter;
 }
 
 template <class Impl>
@@ -569,11 +560,15 @@ DefaultCommit<Impl>::tick()
     wroteToTimeBuffer = false;
     _nextStatus = Inactive;
 
-    if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) {
-        cpu->signalSwitched();
+    if (drainPending && rob->isEmpty() && !iewStage->hasStoresToWB()) {
+        cpu->signalDrained();
+        drainPending = false;
         return;
     }
 
+    if ((*activeThreads).size() <= 0)
+        return;
+
     list<unsigned>::iterator threads = (*activeThreads).begin();
 
     // Check if any of the threads are done squashing.  Change the
@@ -585,10 +580,12 @@ DefaultCommit<Impl>::tick()
 
             if (rob->isDoneSquashing(tid)) {
                 commitStatus[tid] = Running;
-                --squashCounter;
             } else {
                 DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any"
-                        "insts this cycle.\n", tid);
+                        " insts this cycle.\n", tid);
+                rob->doSquash(tid);
+                toIEW->commitInfo[tid].robSquashing = true;
+                wroteToTimeBuffer = true;
             }
         }
     }
@@ -694,29 +691,7 @@ DefaultCommit<Impl>::commit()
 
     while (threads != (*activeThreads).end()) {
         unsigned tid = *threads++;
-/*
-        if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
-            // Record the fault.  Wait until it's empty in the ROB.
-            // Then handle the trap.  Ignore it if there's already a
-            // trap pending as fetch will be redirected.
-            fetchFault = fromFetch->fetchFault;
-            fetchFaultTick = curTick + fetchTrapLatency;
-            commitStatus[0] = FetchTrapPending;
-            DPRINTF(Commit, "Fault from fetch recorded.  Will trap if the "
-                    "ROB empties without squashing the fault.\n");
-            fetchTrapWait = 0;
-        }
 
-        // Fetch may tell commit to clear the trap if it's been squashed.
-        if (fromFetch->clearFetchFault) {
-            DPRINTF(Commit, "Received clear fetch fault signal\n");
-            fetchTrapWait = 0;
-            if (commitStatus[0] == FetchTrapPending) {
-                DPRINTF(Commit, "Clearing fault from fetch\n");
-                commitStatus[0] = Running;
-            }
-        }
-*/
         // Not sure which one takes priority.  I think if we have
         // both, that's a bad sign.
         if (trapSquash[tid] == true) {
@@ -744,8 +719,6 @@ DefaultCommit<Impl>::commit()
 
             commitStatus[tid] = ROBSquashing;
 
-            ++squashCounter;
-
             // If we want to include the squashing instruction in the squash,
             // then use one older sequence number.
             InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
@@ -947,7 +920,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         // and committed this instruction.
         thread[tid]->funcExeInst--;
 
-        head_inst->reachedCommit = true;
+        head_inst->setAtCommit();
 
         if (head_inst->isNonSpeculative() ||
             head_inst->isStoreConditional() ||
@@ -1012,18 +985,19 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         head_inst->setCompleted();
     }
 
+#if USE_CHECKER
     // Use checker prior to updating anything due to traps or PC
     // based events.
     if (cpu->checker) {
-        cpu->checker->tick(head_inst);
+        cpu->checker->verify(head_inst);
     }
+#endif
 
     // Check if the instruction caused a fault.  If so, trap.
     Fault inst_fault = head_inst->getFault();
 
     if (inst_fault != NoFault) {
         head_inst->setCompleted();
-#if FULL_SYSTEM
         DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n",
                 head_inst->seqNum, head_inst->readPC());
 
@@ -1032,9 +1006,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
             return false;
         }
 
+#if USE_CHECKER
         if (cpu->checker && head_inst->isStore()) {
-            cpu->checker->tick(head_inst);
+            cpu->checker->verify(head_inst);
         }
+#endif
 
         assert(!thread[tid]->inSyscall);
 
@@ -1065,10 +1041,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         generateTrapEvent(tid);
 
         return false;
-#else // !FULL_SYSTEM
-        panic("fault (%d) detected @ PC %08p", inst_fault,
-              head_inst->PC);
-#endif // FULL_SYSTEM
     }
 
     updateComInstStats(head_inst);
@@ -1256,7 +1228,8 @@ DefaultCommit<Impl>::roundRobin()
         unsigned tid = *pri_iter;
 
         if (commitStatus[tid] == Running ||
-            commitStatus[tid] == Idle) {
+            commitStatus[tid] == Idle ||
+            commitStatus[tid] == FetchTrapPending) {
 
             if (rob->isHeadReady(tid)) {
                 priority_list.erase(pri_iter);
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 788c6b164..b407f4fcc 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -26,9 +26,11 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Kevin Lim
+ *          Korey Sewell
  */
 
 #include "config/full_system.hh"
+#include "config/use_checker.hh"
 
 #if FULL_SYSTEM
 #include "sim/system.hh"
@@ -37,26 +39,28 @@
 #endif
 
 #include "cpu/activity.hh"
-#include "cpu/checker/cpu.hh"
 #include "cpu/simple_thread.hh"
 #include "cpu/thread_context.hh"
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 #include "cpu/o3/cpu.hh"
 
 #include "sim/root.hh"
 #include "sim/stat_control.hh"
 
+#if USE_CHECKER
+#include "cpu/checker/cpu.hh"
+#endif
+
 using namespace std;
 using namespace TheISA;
 
-BaseFullCPU::BaseFullCPU(Params *params)
+BaseO3CPU::BaseO3CPU(Params *params)
     : BaseCPU(params), cpu_id(0)
 {
 }
 
 void
-BaseFullCPU::regStats()
+BaseO3CPU::regStats()
 {
     BaseCPU::regStats();
 }
@@ -82,8 +86,67 @@ FullO3CPU<Impl>::TickEvent::description()
 }
 
 template <class Impl>
+FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent()
+    : Event(&mainEventQueue, CPU_Tick_Pri)
+{
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::ActivateThreadEvent::init(int thread_num,
+                                           FullO3CPU<Impl> *thread_cpu)
+{
+    tid = thread_num;
+    cpu = thread_cpu;
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::ActivateThreadEvent::process()
+{
+    cpu->activateThread(tid);
+}
+
+template <class Impl>
+const char *
+FullO3CPU<Impl>::ActivateThreadEvent::description()
+{
+    return "FullO3CPU \"Activate Thread\" event";
+}
+
+template <class Impl>
+FullO3CPU<Impl>::DeallocateContextEvent::DeallocateContextEvent()
+    : Event(&mainEventQueue, CPU_Tick_Pri)
+{
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::DeallocateContextEvent::init(int thread_num,
+                                           FullO3CPU<Impl> *thread_cpu)
+{
+    tid = thread_num;
+    cpu = thread_cpu;
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::DeallocateContextEvent::process()
+{
+    cpu->deactivateThread(tid);
+    cpu->removeThread(tid);
+}
+
+template <class Impl>
+const char *
+FullO3CPU<Impl>::DeallocateContextEvent::description()
+{
+    return "FullO3CPU \"Deallocate Context\" event";
+}
+
+template <class Impl>
 FullO3CPU<Impl>::FullO3CPU(Params *params)
-    : BaseFullCPU(params),
+    : BaseO3CPU(params),
       tickEvent(this),
       removeInstsThisCycle(false),
       fetch(params),
@@ -94,7 +157,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
 
       regFile(params->numPhysIntRegs, params->numPhysFloatRegs),
 
-      freeList(params->numberOfThreads,//number of activeThreads
+      freeList(params->numberOfThreads,
                TheISA::NumIntRegs, params->numPhysIntRegs,
                TheISA::NumFloatRegs, params->numPhysFloatRegs),
 
@@ -102,21 +165,20 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
           params->smtROBPolicy, params->smtROBThreshold,
           params->numberOfThreads),
 
-      scoreboard(params->numberOfThreads,//number of activeThreads
+      scoreboard(params->numberOfThreads,
                  TheISA::NumIntRegs, params->numPhysIntRegs,
                  TheISA::NumFloatRegs, params->numPhysFloatRegs,
                  TheISA::NumMiscRegs * number_of_threads,
                  TheISA::ZeroReg),
 
-      // For now just have these time buffers be pretty big.
-      // @todo: Make these time buffer sizes parameters or derived
-      // from latencies
-      timeBuffer(5, 5),
-      fetchQueue(5, 5),
-      decodeQueue(5, 5),
-      renameQueue(5, 5),
-      iewQueue(5, 5),
-      activityRec(NumStages, 10, params->activity),
+      timeBuffer(params->backComSize, params->forwardComSize),
+      fetchQueue(params->backComSize, params->forwardComSize),
+      decodeQueue(params->backComSize, params->forwardComSize),
+      renameQueue(params->backComSize, params->forwardComSize),
+      iewQueue(params->backComSize, params->forwardComSize),
+      activityRec(NumStages,
+                  params->backComSize + params->forwardComSize,
+                  params->activity),
 
       globalSeqNum(1),
 
@@ -125,21 +187,25 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
       physmem(system->physmem),
 #endif // FULL_SYSTEM
       mem(params->mem),
-      switchCount(0),
+      drainCount(0),
       deferRegistration(params->deferRegistration),
       numThreads(number_of_threads)
 {
     _status = Idle;
 
+    checker = NULL;
+
     if (params->checker) {
+#if USE_CHECKER
         BaseCPU *temp_checker = params->checker;
         checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
         checker->setMemory(mem);
 #if FULL_SYSTEM
         checker->setSystem(params->system);
 #endif
-    } else {
-        checker = NULL;
+#else
+        panic("Checker enabled but not compiled in!");
+#endif // USE_CHECKER
     }
 
 #if !FULL_SYSTEM
@@ -177,13 +243,18 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
     commit.setIEWQueue(&iewQueue);
     commit.setRenameQueue(&renameQueue);
 
-    commit.setFetchStage(&fetch);
     commit.setIEWStage(&iew);
     rename.setIEWStage(&iew);
     rename.setCommitStage(&commit);
 
 #if !FULL_SYSTEM
     int active_threads = params->workload.size();
+
+    if (active_threads > Impl::MaxThreads) {
+        panic("Workload Size too large. Increase the 'MaxThreads'"
+              "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) or "
+              "edit your workload size.");
+    }
 #else
     int active_threads = 1;
 #endif
@@ -249,6 +320,8 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
 
     lastRunningCycle = curTick;
 
+    lastActivatedCycle = -1;
+
     contextSwitch = false;
 }
 
@@ -261,9 +334,9 @@ template <class Impl>
 void
 FullO3CPU<Impl>::fullCPURegStats()
 {
-    BaseFullCPU::regStats();
+    BaseO3CPU::regStats();
 
-    // Register any of the FullCPU's stats here.
+    // Register any of the O3CPU's stats here.
     timesIdled
         .name(name() + ".timesIdled")
         .desc("Number of times that the entire CPU went into an idle state and"
@@ -316,10 +389,22 @@ FullO3CPU<Impl>::fullCPURegStats()
 }
 
 template <class Impl>
+Port *
+FullO3CPU<Impl>::getPort(const std::string &if_name, int idx)
+{
+    if (if_name == "dcache_port")
+        return iew.getDcachePort();
+    else if (if_name == "icache_port")
+        return fetch.getIcachePort();
+    else
+        panic("No Such Port\n");
+}
+
+template <class Impl>
 void
 FullO3CPU<Impl>::tick()
 {
-    DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullO3CPU.\n");
+    DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n");
 
     ++numCycles;
 
@@ -355,7 +440,8 @@ FullO3CPU<Impl>::tick()
     }
 
     if (!tickEvent.scheduled()) {
-        if (_status == SwitchedOut) {
+        if (_status == SwitchedOut ||
+            getState() == SimObject::Drained) {
             // increment stat
             lastRunningCycle = curTick;
         } else if (!activityRec.active()) {
@@ -416,16 +502,107 @@ FullO3CPU<Impl>::init()
 
 template <class Impl>
 void
+FullO3CPU<Impl>::activateThread(unsigned tid)
+{
+    list<unsigned>::iterator isActive = find(
+        activeThreads.begin(), activeThreads.end(), tid);
+
+    if (isActive == activeThreads.end()) {
+        DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n",
+                tid);
+
+        activeThreads.push_back(tid);
+    }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::deactivateThread(unsigned tid)
+{
+    //Remove From Active List, if Active
+    list<unsigned>::iterator thread_it =
+        find(activeThreads.begin(), activeThreads.end(), tid);
+
+    if (thread_it != activeThreads.end()) {
+        DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
+                tid);
+        activeThreads.erase(thread_it);
+    }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::activateContext(int tid, int delay)
+{
+    // Needs to set each stage to running as well.
+    if (delay){
+        DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate "
+                "on cycle %d\n", tid, curTick + cycles(delay));
+        scheduleActivateThreadEvent(tid, delay);
+    } else {
+        activateThread(tid);
+    }
+
+    if(lastActivatedCycle < curTick) {
+        scheduleTickEvent(delay);
+
+        // Be sure to signal that there's some activity so the CPU doesn't
+        // deschedule itself.
+        activityRec.activity();
+        fetch.wakeFromQuiesce();
+
+        lastActivatedCycle = curTick;
+
+        _status = Running;
+    }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::deallocateContext(int tid, int delay)
+{
+    // Schedule removal of thread data from CPU
+    if (delay){
+        DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate "
+                "on cycle %d\n", tid, curTick + cycles(delay));
+        scheduleDeallocateContextEvent(tid, delay);
+    } else {
+        deactivateThread(tid);
+        removeThread(tid);
+    }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::suspendContext(int tid)
+{
+    DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid);
+    deactivateThread(tid);
+    if (activeThreads.size() == 0)
+        unscheduleTickEvent();
+    _status = Idle;
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::haltContext(int tid)
+{
+    //For now, this is the same as deallocate
+    DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid);
+    deallocateContext(tid, 1);
+}
+
+template <class Impl>
+void
 FullO3CPU<Impl>::insertThread(unsigned tid)
 {
-    DPRINTF(FullCPU,"[tid:%i] Initializing thread data");
+    DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU");
     // Will change now that the PC and thread state is internal to the CPU
     // and not in the ThreadContext.
-#if 0
 #if FULL_SYSTEM
     ThreadContext *src_tc = system->threadContexts[tid];
 #else
-    ThreadContext *src_tc = thread[tid];
+    ThreadContext *src_tc = tcBase(tid);
 #endif
 
     //Bind Int Regs to Rename Map
@@ -445,11 +622,14 @@ FullO3CPU<Impl>::insertThread(unsigned tid)
     }
 
     //Copy Thread Data Into RegFile
-    this->copyFromTC(tid);
+    //this->copyFromTC(tid);
 
-    //Set PC/NPC
-    regFile.pc[tid]  = src_tc->readPC();
-    regFile.npc[tid] = src_tc->readNextPC();
+    //Set PC/NPC/NNPC
+    setPC(src_tc->readPC(), tid);
+    setNextPC(src_tc->readNextPC(), tid);
+#if THE_ISA != ALPHA_ISA
+    setNextNPC(src_tc->readNextNPC(), tid);
+#endif
 
     src_tc->setStatus(ThreadContext::Active);
 
@@ -458,16 +638,19 @@ FullO3CPU<Impl>::insertThread(unsigned tid)
     //Reset ROB/IQ/LSQ Entries
     commit.rob->resetEntries();
     iew.resetEntries();
-#endif
 }
 
 template <class Impl>
 void
 FullO3CPU<Impl>::removeThread(unsigned tid)
 {
-    DPRINTF(FullCPU,"[tid:%i] Removing thread data");
-#if 0
-    //Unbind Int Regs from Rename Map
+    DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid);
+
+    // Copy Thread Data From RegFile
+    // If thread is suspended, it might be re-allocated
+    //this->copyToTC(tid);
+
+    // Unbind Int Regs from Rename Map
     for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) {
         PhysRegIndex phys_reg = renameMap[tid].lookup(ireg);
 
@@ -475,7 +658,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
         freeList.addReg(phys_reg);
     }
 
-    //Unbind Float Regs from Rename Map
+    // Unbind Float Regs from Rename Map
     for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) {
         PhysRegIndex phys_reg = renameMap[tid].lookup(freg);
 
@@ -483,27 +666,20 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
         freeList.addReg(phys_reg);
     }
 
-    //Copy Thread Data From RegFile
-    /* Fix Me:
-     * Do we really need to do this if we are removing a thread
-     * in the sense that it's finished (exiting)? If the thread is just
-     * being suspended we might...
-     */
-//    this->copyToTC(tid);
-
-    //Squash Throughout Pipeline
+    // Squash Throughout Pipeline
     fetch.squash(0,tid);
     decode.squash(tid);
     rename.squash(tid);
+    iew.squash(tid);
+    commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid);
 
     assert(iew.ldstQueue.getCount(tid) == 0);
 
-    //Reset ROB/IQ/LSQ Entries
+    // Reset ROB/IQ/LSQ Entries
     if (activeThreads.size() >= 1) {
         commit.rob->resetEntries();
         iew.resetEntries();
     }
-#endif
 }
 
 
@@ -511,37 +687,37 @@ template <class Impl>
 void
 FullO3CPU<Impl>::activateWhenReady(int tid)
 {
-    DPRINTF(FullCPU,"[tid:%i]: Checking if resources are available for incoming"
+    DPRINTF(O3CPU,"[tid:%i]: Checking if resources are available for incoming"
             "(e.g. PhysRegs/ROB/IQ/LSQ) \n",
             tid);
 
     bool ready = true;
 
     if (freeList.numFreeIntRegs() >= TheISA::NumIntRegs) {
-        DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
+        DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough "
                 "Phys. Int. Regs.\n",
                 tid);
         ready = false;
     } else if (freeList.numFreeFloatRegs() >= TheISA::NumFloatRegs) {
-        DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
+        DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough "
                 "Phys. Float. Regs.\n",
                 tid);
         ready = false;
     } else if (commit.rob->numFreeEntries() >=
                commit.rob->entryAmount(activeThreads.size() + 1)) {
-        DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
+        DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough "
                 "ROB entries.\n",
                 tid);
         ready = false;
     } else if (iew.instQueue.numFreeEntries() >=
                iew.instQueue.entryAmount(activeThreads.size() + 1)) {
-        DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
+        DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough "
                 "IQ entries.\n",
                 tid);
         ready = false;
     } else if (iew.ldstQueue.numFreeEntries() >=
                iew.ldstQueue.entryAmount(activeThreads.size() + 1)) {
-        DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
+        DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough "
                 "LSQ entries.\n",
                 tid);
         ready = false;
@@ -559,6 +735,7 @@ FullO3CPU<Impl>::activateWhenReady(int tid)
         //blocks fetch
         contextSwitch = true;
 
+        //@todo: dont always add to waitlist
         //do waitlist
         cpuWaitList.push_back(tid);
     }
@@ -566,133 +743,130 @@ FullO3CPU<Impl>::activateWhenReady(int tid)
 
 template <class Impl>
 void
-FullO3CPU<Impl>::activateContext(int tid, int delay)
+FullO3CPU<Impl>::serialize(std::ostream &os)
 {
-    // Needs to set each stage to running as well.
-    list<unsigned>::iterator isActive = find(
-        activeThreads.begin(), activeThreads.end(), tid);
-
-    if (isActive == activeThreads.end()) {
-        //May Need to Re-code this if the delay variable is the
-        //delay needed for thread to activate
-        DPRINTF(FullCPU, "Adding Thread %i to active threads list\n",
-                tid);
-
-        activeThreads.push_back(tid);
+    SERIALIZE_ENUM(_status);
+    BaseCPU::serialize(os);
+    nameOut(os, csprintf("%s.tickEvent", name()));
+    tickEvent.serialize(os);
+
+    // Use SimpleThread's ability to checkpoint to make it easier to
+    // write out the registers.  Also make this static so it doesn't
+    // get instantiated multiple times (causes a panic in statistics).
+    static SimpleThread temp;
+
+    for (int i = 0; i < thread.size(); i++) {
+        nameOut(os, csprintf("%s.xc.%i", name(), i));
+        temp.copyTC(thread[i]->getTC());
+        temp.serialize(os);
     }
-
-    assert(_status == Idle || _status == SwitchedOut);
-
-    scheduleTickEvent(delay);
-
-    // Be sure to signal that there's some activity so the CPU doesn't
-    // deschedule itself.
-    activityRec.activity();
-    fetch.wakeFromQuiesce();
-
-    _status = Running;
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::suspendContext(int tid)
+FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
 {
-    DPRINTF(FullCPU,"[tid: %i]: Suspended ...\n", tid);
-    unscheduleTickEvent();
-    _status = Idle;
-/*
-    //Remove From Active List, if Active
-    list<unsigned>::iterator isActive = find(
-        activeThreads.begin(), activeThreads.end(), tid);
+    UNSERIALIZE_ENUM(_status);
+    BaseCPU::unserialize(cp, section);
+    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+
+    // Use SimpleThread's ability to checkpoint to make it easier to
+    // read in the registers.  Also make this static so it doesn't
+    // get instantiated multiple times (causes a panic in statistics).
+    static SimpleThread temp;
+
+    for (int i = 0; i < thread.size(); i++) {
+        temp.copyTC(thread[i]->getTC());
+        temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
+        thread[i]->getTC()->copyArchRegs(temp.getTC());
+    }
+}
 
-    if (isActive != activeThreads.end()) {
-        DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n",
-                tid);
-        activeThreads.erase(isActive);
+template <class Impl>
+unsigned int
+FullO3CPU<Impl>::drain(Event *drain_event)
+{
+    drainCount = 0;
+    fetch.drain();
+    decode.drain();
+    rename.drain();
+    iew.drain();
+    commit.drain();
+
+    // Wake the CPU and record activity so everything can drain out if
+    // the CPU was not able to immediately drain.
+    if (getState() != SimObject::Drained) {
+        // A bit of a hack...set the drainEvent after all the drain()
+        // calls have been made, that way if all of the stages drain
+        // immediately, the signalDrained() function knows not to call
+        // process on the drain event.
+        drainEvent = drain_event;
+
+        wakeCPU();
+        activityRec.activity();
+
+        return 1;
+    } else {
+        return 0;
     }
-*/
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::deallocateContext(int tid)
+FullO3CPU<Impl>::resume()
 {
-    DPRINTF(FullCPU,"[tid:%i]: Deallocating ...", tid);
-/*
-    //Remove From Active List, if Active
-    list<unsigned>::iterator isActive = find(
-        activeThreads.begin(), activeThreads.end(), tid);
+    assert(system->getMemoryMode() == System::Timing);
+    fetch.resume();
+    decode.resume();
+    rename.resume();
+    iew.resume();
+    commit.resume();
 
-    if (isActive != activeThreads.end()) {
-        DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n",
-                tid);
-        activeThreads.erase(isActive);
+    changeState(SimObject::Running);
 
-        removeThread(tid);
-    }
-*/
+    if (_status == SwitchedOut || _status == Idle)
+        return;
+
+    if (!tickEvent.scheduled())
+        tickEvent.schedule(curTick);
+    _status = Running;
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::haltContext(int tid)
+FullO3CPU<Impl>::signalDrained()
 {
-    DPRINTF(FullCPU,"[tid:%i]: Halted ...", tid);
-/*
-    //Remove From Active List, if Active
-    list<unsigned>::iterator isActive = find(
-        activeThreads.begin(), activeThreads.end(), tid);
+    if (++drainCount == NumStages) {
+        if (tickEvent.scheduled())
+            tickEvent.squash();
 
-    if (isActive != activeThreads.end()) {
-        DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n",
-                tid);
-        activeThreads.erase(isActive);
+        changeState(SimObject::Drained);
 
-        removeThread(tid);
+        if (drainEvent) {
+            drainEvent->process();
+            drainEvent = NULL;
+        }
     }
-*/
+    assert(drainCount <= 5);
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::switchOut(Sampler *_sampler)
+FullO3CPU<Impl>::switchOut()
 {
-    sampler = _sampler;
-    switchCount = 0;
     fetch.switchOut();
-    decode.switchOut();
     rename.switchOut();
-    iew.switchOut();
     commit.switchOut();
-
-    // Wake the CPU and record activity so everything can drain out if
-    // the CPU is currently idle.
-    wakeCPU();
-    activityRec.activity();
-}
-
-template <class Impl>
-void
-FullO3CPU<Impl>::signalSwitched()
-{
-    if (++switchCount == NumStages) {
-        fetch.doSwitchOut();
-        rename.doSwitchOut();
-        commit.doSwitchOut();
-        instList.clear();
-        while (!removeList.empty()) {
-            removeList.pop();
-        }
-
-        if (checker)
-            checker->switchOut(sampler);
-
-        if (tickEvent.scheduled())
-            tickEvent.squash();
-        sampler->signalSwitched();
-        _status = SwitchedOut;
+    instList.clear();
+    while (!removeList.empty()) {
+        removeList.pop();
     }
-    assert(switchCount <= 5);
+
+    _status = SwitchedOut;
+#if USE_CHECKER
+    if (checker)
+        checker->switchOut();
+#endif
 }
 
 template <class Impl>
@@ -700,7 +874,7 @@ void
 FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
 {
     // Flush out any old data from the time buffers.
-    for (int i = 0; i < 10; ++i) {
+    for (int i = 0; i < timeBuffer.getSize(); ++i) {
         timeBuffer.advance();
         fetchQueue.advance();
         decodeQueue.advance();
@@ -730,7 +904,7 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
     if (isActive == activeThreads.end()) {
         //May Need to Re-code this if the delay variable is the delay
         //needed for thread to activate
-        DPRINTF(FullCPU, "Adding Thread %i to active threads list\n",
+        DPRINTF(O3CPU, "Adding Thread %i to active threads list\n",
                 tid);
 
         activeThreads.push_back(tid);
@@ -922,6 +1096,22 @@ FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid)
     commit.setNextPC(val, tid);
 }
 
+#if THE_ISA != ALPHA_ISA
+template <class Impl>
+uint64_t
+FullO3CPU<Impl>::readNextNPC(unsigned tid)
+{
+    return commit.readNextNPC(tid);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setNextNNPC(uint64_t val,unsigned tid)
+{
+    commit.setNextNPC(val, tid);
+}
+#endif
+
 template <class Impl>
 typename FullO3CPU<Impl>::ListIt
 FullO3CPU<Impl>::addInst(DynInstPtr &inst)
@@ -958,7 +1148,7 @@ template <class Impl>
 void
 FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst)
 {
-    DPRINTF(FullCPU, "FullCPU: Removing committed instruction [tid:%i] PC %#x "
+    DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %#x "
             "[sn:%lli]\n",
             inst->threadNumber, inst->readPC(), inst->seqNum);
 
@@ -972,7 +1162,7 @@ template <class Impl>
 void
 FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid)
 {
-    DPRINTF(FullCPU, "FullCPU: Thread %i: Deleting instructions from instruction"
+    DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction"
             " list.\n", tid);
 
     ListIt end_it;
@@ -982,12 +1172,12 @@ FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid)
     if (instList.empty()) {
         return;
     } else if (rob.isEmpty(/*tid*/)) {
-        DPRINTF(FullCPU, "FullCPU: ROB is empty, squashing all insts.\n");
+        DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n");
         end_it = instList.begin();
         rob_empty = true;
     } else {
         end_it = (rob.readTailInst(tid))->getInstListIt();
-        DPRINTF(FullCPU, "FullCPU: ROB is not empty, squashing insts not in ROB.\n");
+        DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n");
     }
 
     removeInstsThisCycle = true;
@@ -1026,7 +1216,7 @@ FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num,
 
     inst_iter--;
 
-    DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction "
+    DPRINTF(O3CPU, "Deleting instructions from instruction "
             "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n",
             tid, seq_num, (*inst_iter)->seqNum);
 
@@ -1048,7 +1238,7 @@ inline void
 FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, const unsigned &tid)
 {
     if ((*instIt)->threadNumber == tid) {
-        DPRINTF(FullCPU, "FullCPU: Squashing instruction, "
+        DPRINTF(O3CPU, "Squashing instruction, "
                 "[tid:%i] [sn:%lli] PC %#x\n",
                 (*instIt)->threadNumber,
                 (*instIt)->seqNum,
@@ -1069,7 +1259,7 @@ void
 FullO3CPU<Impl>::cleanUpRemovedInsts()
 {
     while (!removeList.empty()) {
-        DPRINTF(FullCPU, "FullCPU: Removing instruction, "
+        DPRINTF(O3CPU, "Removing instruction, "
                 "[tid:%i] [sn:%lli] PC %#x\n",
                 (*removeList.front())->threadNumber,
                 (*removeList.front())->seqNum,
@@ -1185,4 +1375,4 @@ FullO3CPU<Impl>::updateThreadPriority()
 }
 
 // Forward declaration of FullO3CPU.
-template class FullO3CPU<AlphaSimpleImpl>;
+template class FullO3CPU<O3CPUImpl>;
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index ff41a3306..83cb966e3 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Kevin Lim
+ *          Korey Sewell
  */
 
 #ifndef __CPU_O3_CPU_HH__
@@ -48,24 +49,33 @@
 #include "cpu/o3/cpu_policy.hh"
 #include "cpu/o3/scoreboard.hh"
 #include "cpu/o3/thread_state.hh"
+//#include "cpu/o3/thread_context.hh"
 #include "sim/process.hh"
 
 template <class>
 class Checker;
 class ThreadContext;
+template <class>
+class O3ThreadContext;
+
+class Checkpoint;
 class MemObject;
 class Process;
 
-class BaseFullCPU : public BaseCPU
+class BaseO3CPU : public BaseCPU
 {
     //Stuff that's pretty ISA independent will go here.
   public:
     typedef BaseCPU::Params Params;
 
-    BaseFullCPU(Params *params);
+    BaseO3CPU(Params *params);
 
     void regStats();
 
+    /** Sets this CPU's ID. */
+    void setCpuId(int id) { cpu_id = id; }
+
+    /** Reads this CPU's ID. */
     int readCpuId() { return cpu_id; }
 
   protected:
@@ -78,7 +88,7 @@ class BaseFullCPU : public BaseCPU
  * tick() function for the CPU is defined here.
  */
 template <class Impl>
-class FullO3CPU : public BaseFullCPU
+class FullO3CPU : public BaseO3CPU
 {
   public:
     typedef TheISA::FloatReg FloatReg;
@@ -93,6 +103,8 @@ class FullO3CPU : public BaseFullCPU
 
     typedef typename std::list<DynInstPtr>::iterator ListIt;
 
+    friend class O3ThreadContext<Impl>;
+
   public:
     enum Status {
         Running,
@@ -105,6 +117,9 @@ class FullO3CPU : public BaseFullCPU
     /** Overall CPU status. */
     Status _status;
 
+    /** Per-thread status in CPU, used for SMT.  */
+    Status _threadStatus[Impl::MaxThreads];
+
   private:
     class TickEvent : public Event
     {
@@ -141,6 +156,92 @@ class FullO3CPU : public BaseFullCPU
             tickEvent.squash();
     }
 
+    class ActivateThreadEvent : public Event
+    {
+      private:
+        /** Number of Thread to Activate */
+        int tid;
+
+        /** Pointer to the CPU. */
+        FullO3CPU<Impl> *cpu;
+
+      public:
+        /** Constructs the event. */
+        ActivateThreadEvent();
+
+        /** Initialize Event */
+        void init(int thread_num, FullO3CPU<Impl> *thread_cpu);
+
+        /** Processes the event, calling activateThread() on the CPU. */
+        void process();
+
+        /** Returns the description of the event. */
+        const char *description();
+    };
+
+    /** Schedule thread to activate , regardless of its current state. */
+    void scheduleActivateThreadEvent(int tid, int delay)
+    {
+        // Schedule thread to activate, regardless of its current state.
+        if (activateThreadEvent[tid].squashed())
+            activateThreadEvent[tid].reschedule(curTick + cycles(delay));
+        else if (!activateThreadEvent[tid].scheduled())
+            activateThreadEvent[tid].schedule(curTick + cycles(delay));
+    }
+
+    /** Unschedule actiavte thread event, regardless of its current state. */
+    void unscheduleActivateThreadEvent(int tid)
+    {
+        if (activateThreadEvent[tid].scheduled())
+            activateThreadEvent[tid].squash();
+    }
+
+    /** The tick event used for scheduling CPU ticks. */
+    ActivateThreadEvent activateThreadEvent[Impl::MaxThreads];
+
+    class DeallocateContextEvent : public Event
+    {
+      private:
+        /** Number of Thread to Activate */
+        int tid;
+
+        /** Pointer to the CPU. */
+        FullO3CPU<Impl> *cpu;
+
+      public:
+        /** Constructs the event. */
+        DeallocateContextEvent();
+
+        /** Initialize Event */
+        void init(int thread_num, FullO3CPU<Impl> *thread_cpu);
+
+        /** Processes the event, calling activateThread() on the CPU. */
+        void process();
+
+        /** Returns the description of the event. */
+        const char *description();
+    };
+
+    /** Schedule cpu to deallocate thread context.*/
+    void scheduleDeallocateContextEvent(int tid, int delay)
+    {
+        // Schedule thread to activate, regardless of its current state.
+        if (deallocateContextEvent[tid].squashed())
+            deallocateContextEvent[tid].reschedule(curTick + cycles(delay));
+        else if (!deallocateContextEvent[tid].scheduled())
+            deallocateContextEvent[tid].schedule(curTick + cycles(delay));
+    }
+
+    /** Unschedule thread deallocation in CPU */
+    void unscheduleDeallocateContextEvent(int tid)
+    {
+        if (deallocateContextEvent[tid].scheduled())
+            deallocateContextEvent[tid].squash();
+    }
+
+    /** The tick event used for scheduling CPU ticks. */
+    DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads];
+
   public:
     /** Constructs a CPU with the given parameters. */
     FullO3CPU(Params *params);
@@ -150,6 +251,9 @@ class FullO3CPU : public BaseFullCPU
     /** Registers statistics. */
     void fullCPURegStats();
 
+    /** Returns a specific port. */
+    Port *getPort(const std::string &if_name, int idx);
+
     /** Ticks CPU, calling tick() on each stage, and checking the overall
      *  activity to see if the CPU should deschedule itself.
      */
@@ -158,6 +262,16 @@ class FullO3CPU : public BaseFullCPU
     /** Initialize the CPU */
     void init();
 
+    /** Returns the Number of Active Threads in the CPU */
+    int numActiveThreads()
+    { return activeThreads.size(); }
+
+    /** Add Thread to Active Threads List */
+    void activateThread(unsigned tid);
+
+    /** Remove Thread from Active Threads List */
+    void deactivateThread(unsigned tid);
+
     /** Setup CPU to insert a thread's context */
     void insertThread(unsigned tid);
 
@@ -184,7 +298,7 @@ class FullO3CPU : public BaseFullCPU
     /** Remove Thread from Active Threads List &&
      *  Remove Thread Context from CPU.
      */
-    void deallocateContext(int tid);
+    void deallocateContext(int tid, int delay = 1);
 
     /** Remove Thread from Active Threads List &&
      *  Remove Thread Context from CPU.
@@ -200,6 +314,13 @@ class FullO3CPU : public BaseFullCPU
     /** Update The Order In Which We Process Threads. */
     void updateThreadPriority();
 
+    /** Serialize state. */
+    virtual void serialize(std::ostream &os);
+
+    /** Unserialize from a checkpoint. */
+    virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+  public:
     /** Executes a syscall on this cycle.
      *  ---------------------------------------
      *  Note: this is a virtual function. CPU-Specific
@@ -207,14 +328,21 @@ class FullO3CPU : public BaseFullCPU
      */
     virtual void syscall(int tid) { panic("Unimplemented!"); }
 
-    /** Switches out this CPU. */
-    void switchOut(Sampler *sampler);
+    /** Starts draining the CPU's pipeline of all instructions in
+     * order to stop all memory accesses. */
+    virtual unsigned int drain(Event *drain_event);
+
+    /** Resumes execution after a drain. */
+    virtual void resume();
 
     /** Signals to this CPU that a stage has completed switching out. */
-    void signalSwitched();
+    void signalDrained();
+
+    /** Switches out this CPU. */
+    virtual void switchOut();
 
     /** Takes over from another CPU. */
-    void takeOverFrom(BaseCPU *oldCPU);
+    virtual void takeOverFrom(BaseCPU *oldCPU);
 
     /** Get the current instruction sequence number, and increment it. */
     InstSeqNum getAndIncrementInstSeq()
@@ -299,6 +427,12 @@ class FullO3CPU : public BaseFullCPU
     /** Sets the next PC of a specific thread. */
     void setNextPC(uint64_t val, unsigned tid);
 
+    /** Reads the next NPC of a specific thread. */
+    uint64_t readNextNPC(unsigned tid);
+
+    /** Sets the next NPC of a specific thread. */
+    void setNextNPC(uint64_t val, unsigned tid);
+
     /** Function to add instruction onto the head of the list of the
      *  instructions.  Used when new instructions are fetched.
      */
@@ -481,11 +615,11 @@ class FullO3CPU : public BaseFullCPU
     /** Pointer to memory. */
     MemObject *mem;
 
-    /** Pointer to the sampler */
-    Sampler *sampler;
+    /** Event to call process() on once draining has completed. */
+    Event *drainEvent;
 
-    /** Counter of how many stages have completed switching out. */
-    int switchCount;
+    /** Counter of how many stages have completed draining. */
+    int drainCount;
 
     /** Pointers to all of the threads in the CPU. */
     std::vector<Thread *> thread;
@@ -507,6 +641,9 @@ class FullO3CPU : public BaseFullCPU
     /** The cycle that the CPU was last running, used for statistics. */
     Tick lastRunningCycle;
 
+    /** The cycle that the CPU was last activated by a new thread*/
+    Tick lastActivatedCycle;
+
     /** Number of Threads CPU can process */
     unsigned numThreads;
 
diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc
index 4924f018a..896e38331 100644
--- a/src/cpu/o3/decode.cc
+++ b/src/cpu/o3/decode.cc
@@ -28,8 +28,7 @@
  * Authors: Kevin Lim
  */
 
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 #include "cpu/o3/decode_impl.hh"
 
-template class DefaultDecode<AlphaSimpleImpl>;
+template class DefaultDecode<O3CPUImpl>;
diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh
index ff88358d6..7f5ecbc26 100644
--- a/src/cpu/o3/decode.hh
+++ b/src/cpu/o3/decode.hh
@@ -48,7 +48,7 @@ class DefaultDecode
 {
   private:
     // Typedefs from the Impl.
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
     typedef typename Impl::DynInstPtr DynInstPtr;
     typedef typename Impl::Params Params;
     typedef typename Impl::CPUPol CPUPol;
@@ -95,7 +95,7 @@ class DefaultDecode
     void regStats();
 
     /** Sets CPU pointer. */
-    void setCPU(FullCPU *cpu_ptr);
+    void setCPU(O3CPU *cpu_ptr);
 
     /** Sets the main backwards communication time buffer pointer. */
     void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
@@ -109,8 +109,14 @@ class DefaultDecode
     /** Sets pointer to list of active threads. */
     void setActiveThreads(std::list<unsigned> *at_ptr);
 
+    /** Drains the decode stage. */
+    bool drain();
+
+    /** Resumes execution after a drain. */
+    void resume() { }
+
     /** Switches out the decode stage. */
-    void switchOut();
+    void switchOut() { }
 
     /** Takes over from another CPU's thread. */
     void takeOverFrom();
@@ -189,7 +195,7 @@ class DefaultDecode
   private:
     // Interfaces to objects outside of decode.
     /** CPU interface. */
-    FullCPU *cpu;
+    O3CPU *cpu;
 
     /** Time buffer interface. */
     TimeBuffer<TimeStruct> *timeBuffer;
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 8a6ea6626..8b851c032 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -112,7 +112,7 @@ DefaultDecode<Impl>::regStats()
 
 template<class Impl>
 void
-DefaultDecode<Impl>::setCPU(FullCPU *cpu_ptr)
+DefaultDecode<Impl>::setCPU(O3CPU *cpu_ptr)
 {
     DPRINTF(Decode, "Setting CPU pointer.\n");
     cpu = cpu_ptr;
@@ -165,11 +165,12 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr)
 }
 
 template <class Impl>
-void
-DefaultDecode<Impl>::switchOut()
+bool
+DefaultDecode<Impl>::drain()
 {
-    // Decode can immediately switch out.
-    cpu->signalSwitched();
+    // Decode is done draining at any time.
+    cpu->signalDrained();
+    return true;
 }
 
 template <class Impl>
@@ -296,7 +297,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
     for (int i=0; i<fromFetch->size; i++) {
         if (fromFetch->insts[i]->threadNumber == tid &&
             fromFetch->insts[i]->seqNum > inst->seqNum) {
-            fromFetch->insts[i]->squashed = true;
+            fromFetch->insts[i]->setSquashed();
         }
     }
 
@@ -345,7 +346,7 @@ DefaultDecode<Impl>::squash(unsigned tid)
 
     for (int i=0; i<fromFetch->size; i++) {
         if (fromFetch->insts[i]->threadNumber == tid) {
-            fromFetch->insts[i]->squashed = true;
+            fromFetch->insts[i]->setSquashed();
             squash_count++;
         }
     }
@@ -427,7 +428,7 @@ DefaultDecode<Impl>::updateStatus()
 
             DPRINTF(Activity, "Activating stage.\n");
 
-            cpu->activateStage(FullCPU::DecodeIdx);
+            cpu->activateStage(O3CPU::DecodeIdx);
         }
     } else {
         // If it's not unblocking, then decode will not have any internal
@@ -436,7 +437,7 @@ DefaultDecode<Impl>::updateStatus()
             _status = Inactive;
             DPRINTF(Activity, "Deactivating stage.\n");
 
-            cpu->deactivateStage(FullCPU::DecodeIdx);
+            cpu->deactivateStage(O3CPU::DecodeIdx);
         }
     }
 }
@@ -515,7 +516,7 @@ DefaultDecode<Impl>::checkSignalsAndUpdate(unsigned tid)
 
     // Check ROB squash signals from commit.
     if (fromCommit->commitInfo[tid].robSquashing) {
-        DPRINTF(Decode, "[tid:%]: ROB is still squashing.\n",tid);
+        DPRINTF(Decode, "[tid:%u]: ROB is still squashing.\n", tid);
 
         // Continue to squash.
         decodeStatus[tid] = Squashing;
diff --git a/src/cpu/o3/dep_graph.hh b/src/cpu/o3/dep_graph.hh
index 3659b1a37..c19fd0abf 100644
--- a/src/cpu/o3/dep_graph.hh
+++ b/src/cpu/o3/dep_graph.hh
@@ -68,6 +68,8 @@ class DependencyGraph
         : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
     { }
 
+    ~DependencyGraph();
+
     /** Resize the dependency graph to have num_entries registers. */
     void resize(int num_entries);
 
@@ -121,6 +123,12 @@ class DependencyGraph
 };
 
 template <class DynInstPtr>
+DependencyGraph<DynInstPtr>::~DependencyGraph()
+{
+    delete [] dependGraph;
+}
+
+template <class DynInstPtr>
 void
 DependencyGraph<DynInstPtr>::resize(int num_entries)
 {
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
new file mode 100644
index 000000000..a2cdf2dba
--- /dev/null
+++ b/src/cpu/o3/dyn_inst.hh
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Korey Sewell
+ */
+
+#ifndef __CPU_O3_DYN_INST_HH__
+#define __CPU_O3_DYN_INST_HH__
+
+#include "arch/isa_specific.hh"
+
+#if THE_ISA == ALPHA_ISA
+template <class Impl>
+class AlphaDynInst;
+
+struct AlphaSimpleImpl;
+
+typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst;
+#endif
+
+#endif // __CPU_O3_DYN_INST_HH__
diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc
index 5f52d0fca..d809b07e4 100644
--- a/src/cpu/o3/fetch.cc
+++ b/src/cpu/o3/fetch.cc
@@ -28,8 +28,7 @@
  * Authors: Kevin Lim
  */
 
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 #include "cpu/o3/fetch_impl.hh"
 
-template class DefaultFetch<AlphaSimpleImpl>;
+template class DefaultFetch<O3CPUImpl>;
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 76b32de68..931919af8 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Kevin Lim
+ *          Korey Sewell
  */
 
 #ifndef __CPU_O3_FETCH_HH__
@@ -35,12 +36,10 @@
 #include "base/statistics.hh"
 #include "base/timebuf.hh"
 #include "cpu/pc_event.hh"
-#include "mem/packet.hh"
+#include "mem/packet_impl.hh"
 #include "mem/port.hh"
 #include "sim/eventq.hh"
 
-class Sampler;
-
 /**
  * DefaultFetch class handles both single threaded and SMT fetch. Its
  * width is specified by the parameters; each cycle it tries to fetch
@@ -57,7 +56,7 @@ class DefaultFetch
     typedef typename Impl::CPUPol CPUPol;
     typedef typename Impl::DynInst DynInst;
     typedef typename Impl::DynInstPtr DynInstPtr;
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
     typedef typename Impl::Params Params;
 
     /** Typedefs from the CPU policy. */
@@ -163,8 +162,11 @@ class DefaultFetch
     /** Registers statistics. */
     void regStats();
 
+    /** Returns the icache port. */
+    Port *getIcachePort() { return icachePort; }
+
     /** Sets CPU pointer. */
-    void setCPU(FullCPU *cpu_ptr);
+    void setCPU(O3CPU *cpu_ptr);
 
     /** Sets the main backwards communication time buffer pointer. */
     void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer);
@@ -181,11 +183,14 @@ class DefaultFetch
     /** Processes cache completion event. */
     void processCacheCompletion(PacketPtr pkt);
 
-    /** Begins the switch out of the fetch stage. */
-    void switchOut();
+    /** Begins the drain of the fetch stage. */
+    bool drain();
 
-    /** Completes the switch out of the fetch stage. */
-    void doSwitchOut();
+    /** Resumes execution after a drain. */
+    void resume();
+
+    /** Tells fetch stage to prepare to be switched out. */
+    void switchOut();
 
     /** Takes over from another CPU's thread. */
     void takeOverFrom();
@@ -296,8 +301,8 @@ class DefaultFetch
     int branchCount();
 
   private:
-    /** Pointer to the FullCPU. */
-    FullCPU *cpu;
+    /** Pointer to the O3CPU. */
+    O3CPU *cpu;
 
     /** Time buffer interface. */
     TimeBuffer<TimeStruct> *timeBuffer;
@@ -335,6 +340,15 @@ class DefaultFetch
     /** Per-thread next PC. */
     Addr nextPC[Impl::MaxThreads];
 
+#if THE_ISA != ALPHA_ISA
+    /** Per-thread next Next PC.
+     *  This is not a real register but is used for
+     *  architectures that use a branch-delay slot.
+     *  (such as MIPS or Sparc)
+     */
+    Addr nextNPC[Impl::MaxThreads];
+#endif
+
     /** Memory request used to access cache. */
     RequestPtr memReq[Impl::MaxThreads];
 
@@ -390,6 +404,12 @@ class DefaultFetch
     /** The cache line being fetched. */
     uint8_t *cacheData[Impl::MaxThreads];
 
+    /** The PC of the cacheline that has been loaded. */
+    Addr cacheDataPC[Impl::MaxThreads];
+
+    /** Whether or not the cache data is valid. */
+    bool cacheDataValid[Impl::MaxThreads];
+
     /** Size of instructions. */
     int instSize;
 
@@ -413,6 +433,9 @@ class DefaultFetch
      */
     bool interruptPending;
 
+    /** Is there a drain pending. */
+    bool drainPending;
+
     /** Records if fetch is switched out. */
     bool switchedOut;
 
@@ -421,6 +444,7 @@ class DefaultFetch
     Stats::Scalar<> icacheStallCycles;
     /** Stat for total number of fetched instructions. */
     Stats::Scalar<> fetchedInsts;
+    /** Total number of fetched branches. */
     Stats::Scalar<> fetchedBranches;
     /** Stat for total number of predicted branches. */
     Stats::Scalar<> predictedBranches;
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index c0a2a5d09..4184e1867 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -26,8 +26,11 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Kevin Lim
+ *          Korey Sewell
  */
 
+#include "config/use_checker.hh"
+
 #include "arch/isa_traits.hh"
 #include "arch/utility.hh"
 #include "cpu/checker/cpu.hh"
@@ -106,13 +109,12 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
       numThreads(params->numberOfThreads),
       numFetchingThreads(params->smtNumFetchingThreads),
       interruptPending(false),
+      drainPending(false),
       switchedOut(false)
 {
     if (numThreads > Impl::MaxThreads)
         fatal("numThreads is not a valid value\n");
 
-    DPRINTF(Fetch, "Fetch constructor called\n");
-
     // Set fetch stage's status to inactive.
     _status = Inactive;
 
@@ -125,6 +127,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
     // Figure out fetch policy
     if (policy == "singlethread") {
         fetchPolicy = SingleThread;
+        if (numThreads > 1)
+            panic("Invalid Fetch Policy for a SMT workload.");
     } else if (policy == "roundrobin") {
         fetchPolicy = RoundRobin;
         DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
@@ -158,6 +162,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
 
         // Create space to store a cache line.
         cacheData[tid] = new uint8_t[cacheBlkSize];
+        cacheDataPC[tid] = 0;
+        cacheDataValid[tid] = false;
 
         stalls[tid].decode = 0;
         stalls[tid].rename = 0;
@@ -268,7 +274,7 @@ DefaultFetch<Impl>::regStats()
 
 template<class Impl>
 void
-DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr)
+DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
 {
     DPRINTF(Fetch, "Setting the CPU pointer.\n");
     cpu = cpu_ptr;
@@ -276,13 +282,11 @@ DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr)
     // Name is finally available, so create the port.
     icachePort = new IcachePort(this);
 
-    Port *mem_dport = mem->getPort("");
-    icachePort->setPeer(mem_dport);
-    mem_dport->setPeer(icachePort);
-
+#if USE_CHECKER
     if (cpu->checker) {
         cpu->checker->setIcachePort(icachePort);
     }
+#endif
 
     // Fetch needs to start fetching instructions at the very beginning,
     // so it must start up in active state.
@@ -330,6 +334,9 @@ DefaultFetch<Impl>::initStage()
     for (int tid = 0; tid < numThreads; tid++) {
         PC[tid] = cpu->readPC(tid);
         nextPC[tid] = cpu->readNextPC(tid);
+#if THE_ISA != ALPHA_ISA
+        nextNPC[tid] = cpu->readNextNPC(tid);
+#endif
     }
 }
 
@@ -349,18 +356,22 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
         ++fetchIcacheSquashes;
         delete pkt->req;
         delete pkt;
-        memReq[tid] = NULL;
         return;
     }
 
-    // Wake up the CPU (if it went to sleep and was waiting on this completion
-    // event).
-    cpu->wakeCPU();
+    memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize);
+    cacheDataValid[tid] = true;
 
-    DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
-            tid);
+    if (!drainPending) {
+        // Wake up the CPU (if it went to sleep and was waiting on
+        // this completion event).
+        cpu->wakeCPU();
 
-    switchToActive();
+        DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
+                tid);
+
+        switchToActive();
+    }
 
     // Only switch to IcacheAccessComplete if we're not stalled as well.
     if (checkStall(tid)) {
@@ -376,18 +387,27 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
 }
 
 template <class Impl>
+bool
+DefaultFetch<Impl>::drain()
+{
+    // Fetch is ready to drain at any time.
+    cpu->signalDrained();
+    drainPending = true;
+    return true;
+}
+
+template <class Impl>
 void
-DefaultFetch<Impl>::switchOut()
+DefaultFetch<Impl>::resume()
 {
-    // Fetch is ready to switch out at any time.
-    switchedOut = true;
-    cpu->signalSwitched();
+    drainPending = false;
 }
 
 template <class Impl>
 void
-DefaultFetch<Impl>::doSwitchOut()
+DefaultFetch<Impl>::switchOut()
 {
+    switchedOut = true;
     // Branch predictor needs to have its state cleared.
     branchPred.switchOut();
 }
@@ -404,6 +424,9 @@ DefaultFetch<Impl>::takeOverFrom()
         stalls[i].commit = 0;
         PC[i] = cpu->readPC(i);
         nextPC[i] = cpu->readNextPC(i);
+#if THE_ISA != ALPHA_ISA
+        nextNPC[i] = cpu->readNextNPC(i);
+#endif
         fetchStatus[i] = Running;
     }
     numInst = 0;
@@ -430,7 +453,7 @@ DefaultFetch<Impl>::switchToActive()
     if (_status == Inactive) {
         DPRINTF(Activity, "Activating stage.\n");
 
-        cpu->activateStage(FullCPU::FetchIdx);
+        cpu->activateStage(O3CPU::FetchIdx);
 
         _status = Active;
     }
@@ -443,7 +466,7 @@ DefaultFetch<Impl>::switchToInactive()
     if (_status == Active) {
         DPRINTF(Activity, "Deactivating stage.\n");
 
-        cpu->deactivateStage(FullCPU::FetchIdx);
+        cpu->deactivateStage(O3CPU::FetchIdx);
 
         _status = Inactive;
     }
@@ -488,7 +511,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
     unsigned flags = 0;
 #endif // FULL_SYSTEM
 
-    if (cacheBlocked || (interruptPending && flags == 0) || switchedOut) {
+    if (cacheBlocked || (interruptPending && flags == 0)) {
         // Hold off fetch from getting new instructions when:
         // Cache is blocked, or
         // while an interrupt is pending and we're not in PAL mode, or
@@ -499,6 +522,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
     // Align the fetch PC so it's at the start of a cache block.
     fetch_PC = icacheBlockAlignPC(fetch_PC);
 
+    // If we've already got the block, no need to try to fetch it again.
+    if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) {
+        return true;
+    }
+
     // Setup the memReq to do a read of the first instruction's address.
     // Set the appropriate read size and flags as well.
     // Build request here.
@@ -530,7 +558,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
         // Build packet here.
         PacketPtr data_pkt = new Packet(mem_req,
                                         Packet::ReadReq, Packet::Broadcast);
-        data_pkt->dataStatic(cacheData[tid]);
+        data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);
+
+        cacheDataPC[tid] = fetch_PC;
+        cacheDataValid[tid] = false;
 
         DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
 
@@ -549,7 +580,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
             return false;
         }
 
-        DPRINTF(Fetch, "Doing cache access.\n");
+        DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid);
 
         lastIcacheStall[tid] = curTick;
 
@@ -662,7 +693,7 @@ DefaultFetch<Impl>::updateFetchStatus()
                             "completion\n",tid);
                 }
 
-                cpu->activateStage(FullCPU::FetchIdx);
+                cpu->activateStage(O3CPU::FetchIdx);
             }
 
             return Active;
@@ -673,7 +704,7 @@ DefaultFetch<Impl>::updateFetchStatus()
     if (_status == Active) {
         DPRINTF(Activity, "Deactivating stage.\n");
 
-        cpu->deactivateStage(FullCPU::FetchIdx);
+        cpu->deactivateStage(O3CPU::FetchIdx);
     }
 
     return Inactive;
@@ -714,12 +745,15 @@ DefaultFetch<Impl>::tick()
     // Reset the number of the instruction we're fetching.
     numInst = 0;
 
+#if FULL_SYSTEM
     if (fromCommit->commitInfo[0].interruptPending) {
         interruptPending = true;
     }
+
     if (fromCommit->commitInfo[0].clearInterrupt) {
         interruptPending = false;
     }
+#endif
 
     for (threadFetched = 0; threadFetched < numFetchingThreads;
          threadFetched++) {
@@ -817,7 +851,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
 
     // Check ROB squash signals from commit.
     if (fromCommit->commitInfo[tid].robSquashing) {
-        DPRINTF(Fetch, "[tid:%u]: ROB is still squashing Thread %u.\n", tid);
+        DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid);
 
         // Continue to squash.
         fetchStatus[tid] = Squashing;
@@ -885,7 +919,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     //////////////////////////////////////////
     int tid = getFetchingThread(fetchPolicy);
 
-    if (tid == -1) {
+    if (tid == -1 || drainPending) {
         DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
 
         // Breaks looping condition in tick()
@@ -893,6 +927,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         return;
     }
 
+    DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
+
     // The current PC.
     Addr &fetch_PC = PC[tid];
 
@@ -915,7 +951,11 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 
         bool fetch_success = fetchCacheLine(fetch_PC, fault, tid);
         if (!fetch_success) {
-            ++fetchMiscStallCycles;
+            if (cacheBlocked) {
+                ++icacheStallCycles;
+            } else {
+                ++fetchMiscStallCycles;
+            }
             return;
         }
     } else {
@@ -984,11 +1024,11 @@ DefaultFetch<Impl>::fetch(bool &status_change)
             DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
                                                  next_PC,
                                                  inst_seq, cpu);
-            instruction->setThread(tid);
+            instruction->setTid(tid);
 
             instruction->setASID(tid);
 
-            instruction->setState(cpu->thread[tid]);
+            instruction->setThreadState(cpu->thread[tid]);
 
             DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
                     "[sn:%lli]\n",
@@ -1020,7 +1060,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
             fetch_PC = next_PC;
 
             if (instruction->isQuiesce()) {
-                warn("%lli: Quiesce instruction encountered, halting fetch!",
+                warn("cycle %lli: Quiesce instruction encountered, halting fetch!",
                      curTick);
                 fetchStatus[tid] = QuiescePending;
                 ++numInst;
@@ -1041,8 +1081,17 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     if (fault == NoFault) {
         DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
 
+#if THE_ISA == ALPHA_ISA
+        PC[tid] = next_PC;
+        nextPC[tid] = next_PC + instSize;
+#else
         PC[tid] = next_PC;
         nextPC[tid] = next_PC + instSize;
+        nextPC[tid] = next_PC + instSize;
+
+        thread->setNextPC(thread->readNextNPC());
+        thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
+#endif
     } else {
         // We shouldn't be in an icache miss and also have a fault (an ITB
         // miss)
@@ -1065,11 +1114,11 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                                              next_PC,
                                              inst_seq, cpu);
         instruction->setPredTarg(next_PC + instSize);
-        instruction->setThread(tid);
+        instruction->setTid(tid);
 
         instruction->setASID(tid);
 
-        instruction->setState(cpu->thread[tid]);
+        instruction->setThreadState(cpu->thread[tid]);
 
         instruction->traceData = NULL;
 
@@ -1085,9 +1134,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         fetchStatus[tid] = TrapPending;
         status_change = true;
 
-        warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
+        warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
 #else // !FULL_SYSTEM
-        warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
+        warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
 #endif // FULL_SYSTEM
     }
 }
@@ -1256,6 +1305,6 @@ int
 DefaultFetch<Impl>::branchCount()
 {
     list<unsigned>::iterator threads = (*activeThreads).begin();
-
+    panic("Branch Count Fetch policy unimplemented\n");
     return *threads;
 }
diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc
index 545deea9b..42e329aca 100644
--- a/src/cpu/o3/fu_pool.cc
+++ b/src/cpu/o3/fu_pool.cc
@@ -31,7 +31,7 @@
 #include <sstream>
 
 #include "cpu/o3/fu_pool.hh"
-#include "encumbered/cpu/full/fu_pool.hh"
+#include "cpu/func_unit.hh"
 #include "sim/builder.hh"
 
 using namespace std;
diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc
index 8145f4cc7..f99be7fe0 100644
--- a/src/cpu/o3/iew.cc
+++ b/src/cpu/o3/iew.cc
@@ -28,9 +28,8 @@
  * Authors: Kevin Lim
  */
 
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 #include "cpu/o3/iew_impl.hh"
 #include "cpu/o3/inst_queue.hh"
 
-template class DefaultIEW<AlphaSimpleImpl>;
+template class DefaultIEW<O3CPUImpl>;
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index 2e61af5fc..fb9afde54 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -68,7 +68,7 @@ class DefaultIEW
     //Typedefs from Impl
     typedef typename Impl::CPUPol CPUPol;
     typedef typename Impl::DynInstPtr DynInstPtr;
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
     typedef typename Impl::Params Params;
 
     typedef typename CPUPol::IQ IQ;
@@ -80,7 +80,7 @@ class DefaultIEW
     typedef typename CPUPol::RenameStruct RenameStruct;
     typedef typename CPUPol::IssueStruct IssueStruct;
 
-    friend class Impl::FullCPU;
+    friend class Impl::O3CPU;
     friend class CPUPol::IQ;
 
   public:
@@ -125,8 +125,11 @@ class DefaultIEW
     /** Initializes stage; sends back the number of free IQ and LSQ entries. */
     void initStage();
 
+    /** Returns the dcache port. */
+    Port *getDcachePort() { return ldstQueue.getDcachePort(); }
+
     /** Sets CPU pointer for IEW, IQ, and LSQ. */
-    void setCPU(FullCPU *cpu_ptr);
+    void setCPU(O3CPU *cpu_ptr);
 
     /** Sets main time buffer used for backwards communication. */
     void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
@@ -143,11 +146,14 @@ class DefaultIEW
     /** Sets pointer to the scoreboard. */
     void setScoreboard(Scoreboard *sb_ptr);
 
-    /** Starts switch out of IEW stage. */
-    void switchOut();
+    /** Drains IEW stage. */
+    bool drain();
+
+    /** Resumes execution after a drain. */
+    void resume();
 
     /** Completes switch out of IEW stage. */
-    void doSwitchOut();
+    void switchOut();
 
     /** Takes over from another CPU's thread. */
     void takeOverFrom();
@@ -204,6 +210,45 @@ class DefaultIEW
     /** Returns if the LSQ has any stores to writeback. */
     bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); }
 
+    void incrWb(InstSeqNum &sn)
+    {
+        if (++wbOutstanding == wbMax)
+            ableToIssue = false;
+        DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+#if DEBUG
+        wbList.insert(sn);
+#endif
+    }
+
+    void decrWb(InstSeqNum &sn)
+    {
+        if (wbOutstanding-- == wbMax)
+            ableToIssue = true;
+        DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+#if DEBUG
+        assert(wbList.find(sn) != wbList.end());
+        wbList.erase(sn);
+#endif
+    }
+
+#if DEBUG
+    std::set<InstSeqNum> wbList;
+
+    void dumpWb()
+    {
+        std::set<InstSeqNum>::iterator wb_it = wbList.begin();
+        while (wb_it != wbList.end()) {
+            cprintf("[sn:%lli]\n",
+                    (*wb_it));
+            wb_it++;
+        }
+    }
+#endif
+
+    bool canIssue() { return ableToIssue; }
+
+    bool ableToIssue;
+
   private:
     /** Sends commit proper information for a squash due to a branch
      * mispredict.
@@ -261,6 +306,9 @@ class DefaultIEW
     /** Processes inputs and changes state accordingly. */
     void checkSignalsAndUpdate(unsigned tid);
 
+    /** Removes instructions from rename from a thread's instruction list. */
+    void emptyRenameInsts(unsigned tid);
+
     /** Sorts instructions coming from rename into lists separated by thread. */
     void sortInsts();
 
@@ -328,7 +376,7 @@ class DefaultIEW
 
   private:
     /** CPU pointer. */
-    FullCPU *cpu;
+    O3CPU *cpu;
 
     /** Records if IEW has written to the time buffer this cycle, so that the
      * CPU can deschedule itself if there is no activity.
@@ -381,20 +429,12 @@ class DefaultIEW
      */
     unsigned issueToExecuteDelay;
 
-    /** Width of issue's read path, in instructions.  The read path is both
-     *  the skid buffer and the rename instruction queue.
-     *  Note to self: is this really different than issueWidth?
-     */
-    unsigned issueReadWidth;
+    /** Width of dispatch, in instructions. */
+    unsigned dispatchWidth;
 
     /** Width of issue, in instructions. */
     unsigned issueWidth;
 
-    /** Width of execute, in instructions.  Might make more sense to break
-     *  down into FP vs int.
-     */
-    unsigned executeWidth;
-
     /** Index into queue of instructions being written back. */
     unsigned wbNumInst;
 
@@ -405,6 +445,17 @@ class DefaultIEW
      */
     unsigned wbCycle;
 
+    /** Number of instructions in flight that will writeback. */
+    unsigned wbOutstanding;
+
+    /** Writeback width. */
+    unsigned wbWidth;
+
+    /** Writeback width * writeback depth, where writeback depth is
+     * the number of cycles of writing back instructions that can be
+     * buffered. */
+    unsigned wbMax;
+
     /** Number of active threads. */
     unsigned numThreads;
 
@@ -439,14 +490,6 @@ class DefaultIEW
     Stats::Scalar<> iewIQFullEvents;
     /** Stat for number of times the LSQ becomes full. */
     Stats::Scalar<> iewLSQFullEvents;
-    /** Stat for total number of executed instructions. */
-    Stats::Scalar<> iewExecutedInsts;
-    /** Stat for total number of executed load instructions. */
-    Stats::Vector<> iewExecLoadInsts;
-    /** Stat for total number of executed store instructions. */
-//    Stats::Scalar<> iewExecStoreInsts;
-    /** Stat for total number of squashed instructions skipped at execute. */
-    Stats::Scalar<> iewExecSquashedInsts;
     /** Stat for total number of memory ordering violation events. */
     Stats::Scalar<> memOrderViolationEvents;
     /** Stat for total number of incorrect predicted taken branches. */
@@ -456,28 +499,25 @@ class DefaultIEW
     /** Stat for total number of mispredicted branches detected at execute. */
     Stats::Formula branchMispredicts;
 
+    /** Stat for total number of executed instructions. */
+    Stats::Scalar<> iewExecutedInsts;
+    /** Stat for total number of executed load instructions. */
+    Stats::Vector<> iewExecLoadInsts;
+    /** Stat for total number of squashed instructions skipped at execute. */
+    Stats::Scalar<> iewExecSquashedInsts;
     /** Number of executed software prefetches. */
-    Stats::Vector<> exeSwp;
+    Stats::Vector<> iewExecutedSwp;
     /** Number of executed nops. */
-    Stats::Vector<> exeNop;
+    Stats::Vector<> iewExecutedNop;
     /** Number of executed meomory references. */
-    Stats::Vector<> exeRefs;
+    Stats::Vector<> iewExecutedRefs;
     /** Number of executed branches. */
-    Stats::Vector<> exeBranches;
-
-//    Stats::Vector<> issued_ops;
-/*
-    Stats::Vector<> stat_fu_busy;
-    Stats::Vector2d<> stat_fuBusy;
-    Stats::Vector<> dist_unissued;
-    Stats::Vector2d<> stat_issued_inst_type;
-*/
-    /** Number of instructions issued per cycle. */
-    Stats::Formula issueRate;
+    Stats::Vector<> iewExecutedBranches;
     /** Number of executed store instructions. */
     Stats::Formula iewExecStoreInsts;
-//    Stats::Formula issue_op_rate;
-//    Stats::Formula fu_busy_rate;
+    /** Number of instructions executed per cycle. */
+    Stats::Formula iewExecRate;
+
     /** Number of instructions sent to commit. */
     Stats::Vector<> iewInstsToCommit;
     /** Number of instructions that writeback. */
@@ -490,7 +530,6 @@ class DefaultIEW
      * to resource contention.
      */
     Stats::Vector<> wbPenalized;
-
     /** Number of instructions per cycle written back. */
     Stats::Formula wbRate;
     /** Average number of woken instructions per writeback. */
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 3929f2e19..684ae2295 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -42,17 +42,17 @@ using namespace std;
 
 template<class Impl>
 DefaultIEW<Impl>::DefaultIEW(Params *params)
-    : // @todo: Make this into a parameter.
-      issueToExecQueue(5, 5),
+    : issueToExecQueue(params->backComSize, params->forwardComSize),
       instQueue(params),
       ldstQueue(params),
       fuPool(params->fuPool),
       commitToIEWDelay(params->commitToIEWDelay),
       renameToIEWDelay(params->renameToIEWDelay),
       issueToExecuteDelay(params->issueToExecuteDelay),
-      issueReadWidth(params->issueWidth),
+      dispatchWidth(params->dispatchWidth),
       issueWidth(params->issueWidth),
-      executeWidth(params->executeWidth),
+      wbOutstanding(0),
+      wbWidth(params->wbWidth),
       numThreads(params->numberOfThreads),
       switchedOut(false)
 {
@@ -75,8 +75,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params)
         fetchRedirect[i] = false;
     }
 
+    wbMax = wbWidth * params->wbDepth;
+
     updateLSQNextCycle = false;
 
+    ableToIssue = true;
+
     skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth;
 }
 
@@ -94,6 +98,7 @@ DefaultIEW<Impl>::regStats()
     using namespace Stats;
 
     instQueue.regStats();
+    ldstQueue.regStats();
 
     iewIdleCycles
         .name(name() + ".iewIdleCycles")
@@ -139,20 +144,6 @@ DefaultIEW<Impl>::regStats()
         .name(name() + ".iewLSQFullEvents")
         .desc("Number of times the LSQ has become full, causing a stall");
 
-    iewExecutedInsts
-        .name(name() + ".iewExecutedInsts")
-        .desc("Number of executed instructions");
-
-    iewExecLoadInsts
-        .init(cpu->number_of_threads)
-        .name(name() + ".iewExecLoadInsts")
-        .desc("Number of load instructions executed")
-        .flags(total);
-
-    iewExecSquashedInsts
-        .name(name() + ".iewExecSquashedInsts")
-        .desc("Number of squashed instructions skipped in execute");
-
     memOrderViolationEvents
         .name(name() + ".memOrderViolationEvents")
         .desc("Number of memory order violations");
@@ -171,114 +162,105 @@ DefaultIEW<Impl>::regStats()
 
     branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
 
-    exeSwp
+    iewExecutedInsts
+        .name(name() + ".EXEC:insts")
+        .desc("Number of executed instructions");
+
+    iewExecLoadInsts
+        .init(cpu->number_of_threads)
+        .name(name() + ".EXEC:loads")
+        .desc("Number of load instructions executed")
+        .flags(total);
+
+    iewExecSquashedInsts
+        .name(name() + ".EXEC:squashedInsts")
+        .desc("Number of squashed instructions skipped in execute");
+
+    iewExecutedSwp
         .init(cpu->number_of_threads)
         .name(name() + ".EXEC:swp")
         .desc("number of swp insts executed")
-        .flags(total)
-        ;
+        .flags(total);
 
-    exeNop
+    iewExecutedNop
         .init(cpu->number_of_threads)
         .name(name() + ".EXEC:nop")
         .desc("number of nop insts executed")
-        .flags(total)
-        ;
+        .flags(total);
 
-    exeRefs
+    iewExecutedRefs
         .init(cpu->number_of_threads)
         .name(name() + ".EXEC:refs")
         .desc("number of memory reference insts executed")
-        .flags(total)
-        ;
+        .flags(total);
 
-    exeBranches
+    iewExecutedBranches
         .init(cpu->number_of_threads)
         .name(name() + ".EXEC:branches")
         .desc("Number of branches executed")
-        .flags(total)
-        ;
-
-    issueRate
-        .name(name() + ".EXEC:rate")
-        .desc("Inst execution rate")
-        .flags(total)
-        ;
-    issueRate = iewExecutedInsts / cpu->numCycles;
+        .flags(total);
 
     iewExecStoreInsts
         .name(name() + ".EXEC:stores")
         .desc("Number of stores executed")
-        .flags(total)
-        ;
-    iewExecStoreInsts = exeRefs - iewExecLoadInsts;
-/*
-    for (int i=0; i<Num_OpClasses; ++i) {
-        stringstream subname;
-        subname << opClassStrings[i] << "_delay";
-        issue_delay_dist.subname(i, subname.str());
-    }
-*/
-    //
-    //  Other stats
-    //
+        .flags(total);
+    iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts;
+
+    iewExecRate
+        .name(name() + ".EXEC:rate")
+        .desc("Inst execution rate")
+        .flags(total);
+
+    iewExecRate = iewExecutedInsts / cpu->numCycles;
 
     iewInstsToCommit
         .init(cpu->number_of_threads)
         .name(name() + ".WB:sent")
         .desc("cumulative count of insts sent to commit")
-        .flags(total)
-        ;
+        .flags(total);
 
     writebackCount
         .init(cpu->number_of_threads)
         .name(name() + ".WB:count")
         .desc("cumulative count of insts written-back")
-        .flags(total)
-        ;
+        .flags(total);
 
     producerInst
         .init(cpu->number_of_threads)
         .name(name() + ".WB:producers")
         .desc("num instructions producing a value")
-        .flags(total)
-        ;
+        .flags(total);
 
     consumerInst
         .init(cpu->number_of_threads)
         .name(name() + ".WB:consumers")
         .desc("num instructions consuming a value")
-        .flags(total)
-        ;
+        .flags(total);
 
     wbPenalized
         .init(cpu->number_of_threads)
         .name(name() + ".WB:penalized")
         .desc("number of instrctions required to write to 'other' IQ")
-        .flags(total)
-        ;
+        .flags(total);
 
     wbPenalizedRate
         .name(name() + ".WB:penalized_rate")
         .desc ("fraction of instructions written-back that wrote to 'other' IQ")
-        .flags(total)
-        ;
+        .flags(total);
 
     wbPenalizedRate = wbPenalized / writebackCount;
 
     wbFanout
         .name(name() + ".WB:fanout")
         .desc("average fanout of values written-back")
-        .flags(total)
-        ;
+        .flags(total);
 
     wbFanout = producerInst / consumerInst;
 
     wbRate
         .name(name() + ".WB:rate")
         .desc("insts written-back per cycle")
-        .flags(total)
-        ;
+        .flags(total);
     wbRate = writebackCount / cpu->numCycles;
 }
 
@@ -299,7 +281,7 @@ DefaultIEW<Impl>::initStage()
 
 template<class Impl>
 void
-DefaultIEW<Impl>::setCPU(FullCPU *cpu_ptr)
+DefaultIEW<Impl>::setCPU(O3CPU *cpu_ptr)
 {
     DPRINTF(IEW, "Setting CPU pointer.\n");
     cpu = cpu_ptr;
@@ -307,7 +289,7 @@ DefaultIEW<Impl>::setCPU(FullCPU *cpu_ptr)
     instQueue.setCPU(cpu_ptr);
     ldstQueue.setCPU(cpu_ptr);
 
-    cpu->activateStage(FullCPU::IEWIdx);
+    cpu->activateStage(O3CPU::IEWIdx);
 }
 
 template<class Impl>
@@ -371,16 +353,23 @@ DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr)
 }
 
 template <class Impl>
+bool
+DefaultIEW<Impl>::drain()
+{
+    // IEW is ready to drain at any time.
+    cpu->signalDrained();
+    return true;
+}
+
+template <class Impl>
 void
-DefaultIEW<Impl>::switchOut()
+DefaultIEW<Impl>::resume()
 {
-    // IEW is ready to switch out at any time.
-    cpu->signalSwitched();
 }
 
 template <class Impl>
 void
-DefaultIEW<Impl>::doSwitchOut()
+DefaultIEW<Impl>::switchOut()
 {
     // Clear any state.
     switchedOut = true;
@@ -423,7 +412,7 @@ DefaultIEW<Impl>::takeOverFrom()
     updateLSQNextCycle = false;
 
     // @todo: Fix hardcoded number
-    for (int i = 0; i < 6; ++i) {
+    for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
         issueToExecQueue.advance();
     }
 }
@@ -456,16 +445,7 @@ DefaultIEW<Impl>::squash(unsigned tid)
         skidBuffer[tid].pop();
     }
 
-    while (!insts[tid].empty()) {
-        if (insts[tid].front()->isLoad() ||
-            insts[tid].front()->isStore() ) {
-            toRename->iewInfo[tid].dispatchedToLSQ++;
-        }
-
-        toRename->iewInfo[tid].dispatched++;
-
-        insts[tid].pop();
-    }
+    emptyRenameInsts(tid);
 }
 
 template<class Impl>
@@ -591,12 +571,12 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
     // free slot.
     while ((*iewQueue)[wbCycle].insts[wbNumInst]) {
         ++wbNumInst;
-        if (wbNumInst == issueWidth) {
+        if (wbNumInst == wbWidth) {
             ++wbCycle;
             wbNumInst = 0;
         }
 
-        assert(wbCycle < 5);
+        assert((wbCycle * wbWidth + wbNumInst) < wbMax);
     }
 
     // Add finished instruction to queue to commit.
@@ -611,7 +591,7 @@ DefaultIEW<Impl>::validInstsFromRename()
     unsigned inst_count = 0;
 
     for (int i=0; i<fromRename->size; i++) {
-        if (!fromRename->insts[i]->squashed)
+        if (!fromRename->insts[i]->isSquashed())
             inst_count++;
     }
 
@@ -799,10 +779,12 @@ DefaultIEW<Impl>::checkSignalsAndUpdate(unsigned tid)
     }
 
     if (fromCommit->commitInfo[tid].robSquashing) {
-        DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n");
+        DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid);
 
         dispatchStatus[tid] = Squashing;
 
+        emptyRenameInsts(tid);
+        wroteToTimeBuffer = true;
         return;
     }
 
@@ -853,6 +835,22 @@ DefaultIEW<Impl>::sortInsts()
 
 template <class Impl>
 void
+DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
+{
+    while (!insts[tid].empty()) {
+        if (insts[tid].front()->isLoad() ||
+            insts[tid].front()->isStore() ) {
+            toRename->iewInfo[tid].dispatchedToLSQ++;
+        }
+
+        toRename->iewInfo[tid].dispatched++;
+
+        insts[tid].pop();
+    }
+}
+
+template <class Impl>
+void
 DefaultIEW<Impl>::wakeCPU()
 {
     cpu->wakeCPU();
@@ -871,7 +869,7 @@ inline void
 DefaultIEW<Impl>::activateStage()
 {
     DPRINTF(Activity, "Activating stage.\n");
-    cpu->activateStage(FullCPU::IEWIdx);
+    cpu->activateStage(O3CPU::IEWIdx);
 }
 
 template <class Impl>
@@ -879,7 +877,7 @@ inline void
 DefaultIEW<Impl>::deactivateStage()
 {
     DPRINTF(Activity, "Deactivating stage.\n");
-    cpu->deactivateStage(FullCPU::IEWIdx);
+    cpu->deactivateStage(O3CPU::IEWIdx);
 }
 
 template<class Impl>
@@ -951,7 +949,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
     // Loop through the instructions, putting them in the instruction
     // queue.
     for ( ; dis_num_inst < insts_to_add &&
-              dis_num_inst < issueReadWidth;
+              dis_num_inst < dispatchWidth;
           ++dis_num_inst)
     {
         inst = insts_to_dispatch.front();
@@ -1090,7 +1088,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
 
             instQueue.recordProducer(inst);
 
-            exeNop[tid]++;
+            iewExecutedNop[tid]++;
 
             add_to_iq = false;
         } else if (inst->isExecuted()) {
@@ -1203,6 +1201,7 @@ DefaultIEW<Impl>::executeInsts()
 
             ++iewExecSquashedInsts;
 
+            decrWb(inst->seqNum);
             continue;
         }
 
@@ -1365,6 +1364,8 @@ DefaultIEW<Impl>::writebackInsts()
             }
             writebackCount[tid]++;
         }
+
+        decrWb(inst->seqNum);
     }
 }
 
@@ -1501,9 +1502,9 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
     //
 #ifdef TARGET_ALPHA
     if (inst->isDataPrefetch())
-        exeSwp[thread_number]++;
+        iewExecutedSwp[thread_number]++;
     else
-        iewExecutedInsts++;
+        iewIewExecutedcutedInsts++;
 #else
     iewExecutedInsts++;
 #endif
@@ -1512,13 +1513,13 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
     //  Control operations
     //
     if (inst->isControl())
-        exeBranches[thread_number]++;
+        iewExecutedBranches[thread_number]++;
 
     //
     //  Memory operations
     //
     if (inst->isMemRef()) {
-        exeRefs[thread_number]++;
+        iewExecutedRefs[thread_number]++;
 
         if (inst->isLoad()) {
             iewExecLoadInsts[thread_number]++;
diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc
index f2c6b8213..a539066f9 100644
--- a/src/cpu/o3/inst_queue.cc
+++ b/src/cpu/o3/inst_queue.cc
@@ -28,9 +28,8 @@
  * Authors: Kevin Lim
  */
 
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 #include "cpu/o3/inst_queue_impl.hh"
 
 // Force instantiation of InstructionQueue.
-template class InstructionQueue<AlphaSimpleImpl>;
+template class InstructionQueue<O3CPUImpl>;
diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh
index 60a713020..4c69ca384 100644
--- a/src/cpu/o3/inst_queue.hh
+++ b/src/cpu/o3/inst_queue.hh
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Kevin Lim
+ *          Korey Sewell
  */
 
 #ifndef __CPU_O3_INST_QUEUE_HH__
@@ -68,7 +69,7 @@ class InstructionQueue
 {
   public:
     //Typedefs from the Impl.
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
     typedef typename Impl::DynInstPtr DynInstPtr;
     typedef typename Impl::Params Params;
 
@@ -80,7 +81,7 @@ class InstructionQueue
     // Typedef of iterator through the list of instructions.
     typedef typename std::list<DynInstPtr>::iterator ListIt;
 
-    friend class Impl::FullCPU;
+    friend class Impl::O3CPU;
 
     /** FU completion event class. */
     class FUCompletion : public Event {
@@ -125,7 +126,7 @@ class InstructionQueue
     void resetState();
 
     /** Sets CPU pointer. */
-    void setCPU(FullCPU *_cpu) { cpu = _cpu; }
+    void setCPU(O3CPU *_cpu) { cpu = _cpu; }
 
     /** Sets active threads list. */
     void setActiveThreads(std::list<unsigned> *at_ptr);
@@ -252,7 +253,7 @@ class InstructionQueue
     /////////////////////////
 
     /** Pointer to the CPU. */
-    FullCPU *cpu;
+    O3CPU *cpu;
 
     /** Cache interface. */
     MemInterface *dcacheInterface;
@@ -474,12 +475,17 @@ class InstructionQueue
     /** Stat for number of non-speculative instructions removed due to a squash.
      */
     Stats::Scalar<> iqSquashedNonSpecRemoved;
+    // Also include number of instructions rescheduled and replayed.
 
-    /** Distribution of number of instructions in the queue. */
+    /** Distribution of number of instructions in the queue.
+     * @todo: Need to create struct to track the entry time for each
+     * instruction. */
     Stats::VectorDistribution<> queueResDist;
     /** Distribution of the number of instructions issued. */
     Stats::Distribution<> numIssuedDist;
-    /** Distribution of the cycles it takes to issue an instruction. */
+    /** Distribution of the cycles it takes to issue an instruction.
+     * @todo: Need to create struct to track the ready time for each
+     * instruction. */
     Stats::VectorDistribution<> issueDelayDist;
 
     /** Number of times an instruction could not be issued because a
@@ -492,8 +498,7 @@ class InstructionQueue
 
     /** Number of instructions issued per cycle. */
     Stats::Formula issueRate;
-//    Stats::Formula issue_stores;
-//    Stats::Formula issue_op_rate;
+
     /** Number of times the FU was busy. */
     Stats::Vector<> fuBusy;
     /** Number of times the FU was busy per instruction issued. */
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 06a052c6f..36e0842be 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Kevin Lim
+ *          Korey Sewell
  */
 
 #include <limits>
@@ -125,7 +126,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
             maxEntries[i] = part_amt;
         }
 
-        DPRINTF(Fetch, "IQ sharing policy set to Partitioned:"
+        DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
                 "%i entries per thread.\n",part_amt);
 
     } else if (policy == "threshold") {
@@ -140,7 +141,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
             maxEntries[i] = thresholdIQ;
         }
 
-        DPRINTF(Fetch, "IQ sharing policy set to Threshold:"
+        DPRINTF(IQ, "IQ sharing policy set to Threshold:"
                 "%i entries per thread.\n",thresholdIQ);
    } else {
        assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic,"
@@ -289,22 +290,7 @@ InstructionQueue<Impl>::regStats()
         .flags(total)
         ;
     issueRate = iqInstsIssued / cpu->numCycles;
-/*
-    issue_stores
-        .name(name() + ".ISSUE:stores")
-        .desc("Number of stores issued")
-        .flags(total)
-        ;
-    issue_stores = exe_refs - exe_loads;
-*/
-/*
-    issue_op_rate
-        .name(name() + ".ISSUE:op_rate")
-        .desc("Operation issue rate")
-        .flags(total)
-        ;
-    issue_op_rate = issued_ops / numCycles;
-*/
+
     statFuBusy
         .init(Num_OpClasses)
         .name(name() + ".ISSUE:fu_full")
@@ -701,6 +687,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
     int total_issued = 0;
 
     while (total_issued < totalWidth &&
+           iewStage->canIssue() &&
            order_it != order_end_it) {
         OpClass op_class = (*order_it).queueType;
 
@@ -791,13 +778,14 @@ InstructionQueue<Impl>::scheduleReadyInsts()
                 // complete.
                 ++freeEntries;
                 count[tid]--;
-                issuing_inst->removeInIQ();
+                issuing_inst->clearInIQ();
             } else {
                 memDepUnit[tid].issue(issuing_inst);
             }
 
             listOrder.erase(order_it++);
             statIssuedInstType[tid][op_class]++;
+            iewStage->incrWb(issuing_inst->seqNum);
         } else {
             statFuBusy[op_class]++;
             fuBusy[tid]++;
@@ -1097,7 +1085,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
             // inst will flow through the rest of the pipeline.
             squashed_inst->setIssued();
             squashed_inst->setCanCommit();
-            squashed_inst->removeInIQ();
+            squashed_inst->clearInIQ();
 
             //Update Thread IQ Count
             count[squashed_inst->threadNumber]--;
diff --git a/src/cpu/o3/isa_specific.hh b/src/cpu/o3/isa_specific.hh
new file mode 100755
index 000000000..f8a9dd8cc
--- /dev/null
+++ b/src/cpu/o3/isa_specific.hh
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Korey Sewell
+ */
+
+#include "cpu/base.hh"
+
+#if THE_ISA == ALPHA_ISA
+    #include "cpu/o3/alpha/cpu.hh"
+    #include "cpu/o3/alpha/impl.hh"
+    #include "cpu/o3/alpha/params.hh"
+    #include "cpu/o3/alpha/dyn_inst.hh"
+#else
+    #error "O3CPU doesnt support this ISA"
+#endif
diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc
index de0325920..527947281 100644
--- a/src/cpu/o3/lsq.cc
+++ b/src/cpu/o3/lsq.cc
@@ -28,11 +28,9 @@
  * Authors: Korey Sewell
  */
 
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_cpu.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 #include "cpu/o3/lsq_impl.hh"
 
 // Force the instantiation of LDSTQ for all the implementations we care about.
-template class LSQ<AlphaSimpleImpl>;
+template class LSQ<O3CPUImpl>;
 
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index bc4154c85..190734dc2 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -44,7 +44,7 @@ template <class Impl>
 class LSQ {
   public:
     typedef typename Impl::Params Params;
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
     typedef typename Impl::DynInstPtr DynInstPtr;
     typedef typename Impl::CPUPol::IEW IEW;
     typedef typename Impl::CPUPol::LSQUnit LSQUnit;
@@ -62,10 +62,20 @@ class LSQ {
     /** Returns the name of the LSQ. */
     std::string name() const;
 
+    /** Registers statistics of each LSQ unit. */
+    void regStats();
+
+    /** Returns dcache port.
+     *  @todo: Dcache port needs to be moved up to this level for SMT
+     *  to work.  For now it just returns the port from one of the
+     *  threads.
+     */
+    Port *getDcachePort() { return &dcachePort; }
+
     /** Sets the pointer to the list of active threads. */
     void setActiveThreads(std::list<unsigned> *at_ptr);
     /** Sets the CPU pointer. */
-    void setCPU(FullCPU *cpu_ptr);
+    void setCPU(O3CPU *cpu_ptr);
     /** Sets the IEW stage pointer. */
     void setIEW(IEW *iew_ptr);
     /** Switches out the LSQ. */
@@ -248,6 +258,15 @@ class LSQ {
     bool willWB(unsigned tid)
     { return thread[tid].willWB(); }
 
+    /** Returns if the cache is currently blocked. */
+    bool cacheBlocked()
+    { return retryTid != -1; }
+
+    /** Sets the retry thread id, indicating that one of the LSQUnits
+     * tried to access the cache but the cache was blocked. */
+    void setRetryTid(int tid)
+    { retryTid = tid; }
+
     /** Debugging function to print out all instructions. */
     void dumpInsts();
     /** Debugging function to print out instructions from a specific thread. */
@@ -264,7 +283,49 @@ class LSQ {
     template <class T>
     Fault write(RequestPtr req, T &data, int store_idx);
 
-  private:
+    /** DcachePort class for this LSQ.  Handles doing the
+     * communication with the cache/memory.
+     */
+    class DcachePort : public Port
+    {
+      protected:
+        /** Pointer to LSQ. */
+        LSQ *lsq;
+
+      public:
+        /** Default constructor. */
+        DcachePort(LSQ *_lsq)
+            : lsq(_lsq)
+        { }
+
+      protected:
+        /** Atomic version of receive.  Panics. */
+        virtual Tick recvAtomic(PacketPtr pkt);
+
+        /** Functional version of receive.  Panics. */
+        virtual void recvFunctional(PacketPtr pkt);
+
+        /** Receives status change.  Other than range changing, panics. */
+        virtual void recvStatusChange(Status status);
+
+        /** Returns the address ranges of this device. */
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+                                            AddrRangeList &snoop)
+        { resp.clear(); snoop.clear(); }
+
+        /** Timing version of receive.  Handles writing back and
+         * completing the load or store that has returned from
+         * memory. */
+        virtual bool recvTiming(PacketPtr pkt);
+
+        /** Handles doing a retry of the previous send. */
+        virtual void recvRetry();
+    };
+
+    /** D-cache port. */
+    DcachePort dcachePort;
+
+  protected:
     /** The LSQ policy for SMT mode. */
     LSQPolicy lsqPolicy;
 
@@ -272,7 +333,7 @@ class LSQ {
     LSQUnit thread[Impl::MaxThreads];
 
     /** The CPU pointer. */
-    FullCPU *cpu;
+    O3CPU *cpu;
 
     /** The IEW stage pointer. */
     IEW *iewStage;
@@ -293,6 +354,10 @@ class LSQ {
 
     /** Number of Threads. */
     unsigned numThreads;
+
+    /** The thread id of the LSQ Unit that is currently waiting for a
+     * retry. */
+    int retryTid;
 };
 
 template <class Impl>
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 27aa0dc3c..4e3957029 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * Copyright (c) 2005-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,9 +36,53 @@
 using namespace std;
 
 template <class Impl>
+Tick
+LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
+{
+    panic("O3CPU model does not work with atomic mode!");
+    return curTick;
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+{
+    panic("O3CPU doesn't expect recvFunctional callback!");
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvStatusChange(Status status)
+{
+    if (status == RangeChange)
+        return;
+
+    panic("O3CPU doesn't expect recvStatusChange callback!");
+}
+
+template <class Impl>
+bool
+LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
+{
+    lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
+    return true;
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvRetry()
+{
+    lsq->thread[lsq->retryTid].recvRetry();
+    // Speculatively clear the retry Tid.  This will get set again if
+    // the LSQUnit was unable to complete its access.
+    lsq->retryTid = -1;
+}
+
+template <class Impl>
 LSQ<Impl>::LSQ(Params *params)
-    : LQEntries(params->LQEntries), SQEntries(params->SQEntries),
-      numThreads(params->numberOfThreads)
+    : dcachePort(this), LQEntries(params->LQEntries),
+      SQEntries(params->SQEntries), numThreads(params->numberOfThreads),
+      retryTid(-1)
 {
     DPRINTF(LSQ, "Creating LSQ object.\n");
 
@@ -94,7 +138,8 @@ LSQ<Impl>::LSQ(Params *params)
 
     //Initialize LSQs
     for (int tid=0; tid < numThreads; tid++) {
-        thread[tid].init(params, maxLQEntries, maxSQEntries, tid);
+        thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid);
+        thread[tid].setDcachePort(&dcachePort);
     }
 }
 
@@ -108,6 +153,16 @@ LSQ<Impl>::name() const
 
 template<class Impl>
 void
+LSQ<Impl>::regStats()
+{
+    //Initialize LSQs
+    for (int tid=0; tid < numThreads; tid++) {
+        thread[tid].regStats();
+    }
+}
+
+template<class Impl>
+void
 LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr)
 {
     activeThreads = at_ptr;
@@ -116,10 +171,12 @@ LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr)
 
 template<class Impl>
 void
-LSQ<Impl>::setCPU(FullCPU *cpu_ptr)
+LSQ<Impl>::setCPU(O3CPU *cpu_ptr)
 {
     cpu = cpu_ptr;
 
+    dcachePort.setName(name());
+
     for (int tid=0; tid < numThreads; tid++) {
         thread[tid].setCPU(cpu_ptr);
     }
@@ -492,6 +549,9 @@ LSQ<Impl>::hasStoresToWB()
 {
     list<unsigned>::iterator active_threads = (*activeThreads).begin();
 
+    if ((*activeThreads).empty())
+        return false;
+
     while (active_threads != (*activeThreads).end()) {
         unsigned tid = *active_threads++;
         if (!hasStoresToWB(tid))
diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc
index e935ffa5c..3ca3fa667 100644
--- a/src/cpu/o3/lsq_unit.cc
+++ b/src/cpu/o3/lsq_unit.cc
@@ -29,11 +29,9 @@
  *          Korey Sewell
  */
 
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_cpu.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 #include "cpu/o3/lsq_unit_impl.hh"
 
 // Force the instantiation of LDSTQ for all the implementations we care about.
-template class LSQUnit<AlphaSimpleImpl>;
+template class LSQUnit<O3CPUImpl>;
 
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index ce0cdd36f..a76a73f0c 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -40,7 +40,7 @@
 #include "config/full_system.hh"
 #include "base/hashmap.hh"
 #include "cpu/inst_seq.hh"
-#include "mem/packet.hh"
+#include "mem/packet_impl.hh"
 #include "mem/port.hh"
 
 /**
@@ -61,9 +61,10 @@ class LSQUnit {
     typedef TheISA::IntReg IntReg;
   public:
     typedef typename Impl::Params Params;
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
     typedef typename Impl::DynInstPtr DynInstPtr;
     typedef typename Impl::CPUPol::IEW IEW;
+    typedef typename Impl::CPUPol::LSQ LSQ;
     typedef typename Impl::CPUPol::IssueStruct IssueStruct;
 
   public:
@@ -71,19 +72,26 @@ class LSQUnit {
     LSQUnit();
 
     /** Initializes the LSQ unit with the specified number of entries. */
-    void init(Params *params, unsigned maxLQEntries,
+    void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
               unsigned maxSQEntries, unsigned id);
 
     /** Returns the name of the LSQ unit. */
     std::string name() const;
 
+    /** Registers statistics. */
+    void regStats();
+
     /** Sets the CPU pointer. */
-    void setCPU(FullCPU *cpu_ptr);
+    void setCPU(O3CPU *cpu_ptr);
 
     /** Sets the IEW stage pointer. */
     void setIEW(IEW *iew_ptr)
     { iewStage = iew_ptr; }
 
+    /** Sets the pointer to the dcache port. */
+    void setDcachePort(Port *dcache_port)
+    { dcachePort = dcache_port; }
+
     /** Switches out LSQ unit. */
     void switchOut();
 
@@ -125,11 +133,10 @@ class LSQUnit {
     /** Writes back stores. */
     void writebackStores();
 
+    /** Completes the data access that has been returned from the
+     * memory system. */
     void completeDataAccess(PacketPtr pkt);
 
-    // @todo: Include stats in the LSQ unit.
-    //void regStats();
-
     /** Clears all the entries in the LQ. */
     void clearLQ();
 
@@ -204,6 +211,9 @@ class LSQUnit {
                         !storeQueue[storeWBIdx].completed &&
                         !isStoreBlocked; }
 
+    /** Handles doing the retry. */
+    void recvRetry();
+
   private:
     /** Writes back the instruction, sending it to IEW. */
     void writeback(DynInstPtr &inst, PacketPtr pkt);
@@ -214,9 +224,6 @@ class LSQUnit {
     /** Completes the store at the specified index. */
     void completeStore(int store_idx);
 
-    /** Handles doing the retry. */
-    void recvRetry();
-
     /** Increments the given store index (circular queue). */
     inline void incrStIdx(int &store_idx);
     /** Decrements the given store index (circular queue). */
@@ -232,59 +239,16 @@ class LSQUnit {
 
   private:
     /** Pointer to the CPU. */
-    FullCPU *cpu;
+    O3CPU *cpu;
 
     /** Pointer to the IEW stage. */
     IEW *iewStage;
 
-    /** Pointer to memory object. */
-    MemObject *mem;
-
-    /** DcachePort class for this LSQ Unit.  Handles doing the
-     * communication with the cache/memory.
-     * @todo: Needs to be moved to the LSQ level and have some sort
-     * of arbitration.
-     */
-    class DcachePort : public Port
-    {
-      protected:
-        /** Pointer to CPU. */
-        FullCPU *cpu;
-        /** Pointer to LSQ. */
-        LSQUnit *lsq;
-
-      public:
-        /** Default constructor. */
-        DcachePort(FullCPU *_cpu, LSQUnit *_lsq)
-            : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq)
-        { }
-
-      protected:
-        /** Atomic version of receive.  Panics. */
-        virtual Tick recvAtomic(PacketPtr pkt);
+    /** Pointer to the LSQ. */
+    LSQ *lsq;
 
-        /** Functional version of receive.  Panics. */
-        virtual void recvFunctional(PacketPtr pkt);
-
-        /** Receives status change.  Other than range changing, panics. */
-        virtual void recvStatusChange(Status status);
-
-        /** Returns the address ranges of this device. */
-        virtual void getDeviceAddressRanges(AddrRangeList &resp,
-                                            AddrRangeList &snoop)
-        { resp.clear(); snoop.clear(); }
-
-        /** Timing version of receive.  Handles writing back and
-         * completing the load or store that has returned from
-         * memory. */
-        virtual bool recvTiming(PacketPtr pkt);
-
-        /** Handles doing a retry of the previous send. */
-        virtual void recvRetry();
-    };
-
-    /** Pointer to the D-cache. */
-    DcachePort *dcachePort;
+    /** Pointer to the dcache port.  Used only for sending. */
+    Port *dcachePort;
 
     /** Derived class to hold any sender state the LSQ needs. */
     class LSQSenderState : public Packet::SenderState
@@ -443,25 +407,35 @@ class LSQUnit {
     // Will also need how many read/write ports the Dcache has.  Or keep track
     // of that in stage that is one level up, and only call executeLoad/Store
     // the appropriate number of times.
-/*
-    // total number of loads forwaded from LSQ stores
-    Stats::Vector<> lsq_forw_loads;
 
-    // total number of loads ignored due to invalid addresses
-    Stats::Vector<> inv_addr_loads;
+    /** Total number of loads forwaded from LSQ stores. */
+    Stats::Scalar<> lsqForwLoads;
+
+    /** Total number of loads ignored due to invalid addresses. */
+    Stats::Scalar<> invAddrLoads;
 
-    // total number of software prefetches ignored due to invalid addresses
-    Stats::Vector<> inv_addr_swpfs;
+    /** Total number of squashed loads. */
+    Stats::Scalar<> lsqSquashedLoads;
 
-    // total non-speculative bogus addresses seen (debug var)
-    Counter sim_invalid_addrs;
-    Stats::Vector<> fu_busy;  //cumulative fu busy
+    /** Total number of responses from the memory system that are
+     * ignored due to the instruction already being squashed. */
+    Stats::Scalar<> lsqIgnoredResponses;
 
-    // ready loads blocked due to memory disambiguation
-    Stats::Vector<> lsq_blocked_loads;
+    /** Total number of squashed stores. */
+    Stats::Scalar<> lsqSquashedStores;
+
+    /** Total number of software prefetches ignored due to invalid addresses. */
+    Stats::Scalar<> invAddrSwpfs;
+
+    /** Ready loads blocked due to partial store-forwarding. */
+    Stats::Scalar<> lsqBlockedLoads;
+
+    /** Number of loads that were rescheduled. */
+    Stats::Scalar<> lsqRescheduledLoads;
+
+    /** Number of times the LSQ is blocked due to the cache. */
+    Stats::Scalar<> lsqCacheBlocked;
 
-    Stats::Scalar<> lsqInversion;
-*/
   public:
     /** Executes the load at the given index. */
     template <class T>
@@ -517,8 +491,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
     // at the head of the LSQ and are ready to commit (at the head of the ROB
     // too).
     if (req->getFlags() & UNCACHEABLE &&
-        (load_idx != loadHead || !load_inst->reachedCommit)) {
+        (load_idx != loadHead || !load_inst->isAtCommit())) {
         iewStage->rescheduleMemInst(load_inst);
+        ++lsqRescheduledLoads;
         return TheISA::genMachineCheckFault();
     }
 
@@ -598,7 +573,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // @todo: Need to make this a parameter.
             wb->schedule(curTick);
 
-            // Should keep track of stat for forwarded data
+            ++lsqForwLoads;
             return NoFault;
         } else if ((store_has_lower_limit && lower_load_has_store_part) ||
                    (store_has_upper_limit && upper_load_has_store_part) ||
@@ -626,6 +601,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // Tell IQ/mem dep unit that this instruction will need to be
             // rescheduled eventually
             iewStage->rescheduleMemInst(load_inst);
+            ++lsqRescheduledLoads;
 
             // Do not generate a writeback event as this instruction is not
             // complete.
@@ -633,12 +609,13 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
                     "Store idx %i to load addr %#x\n",
                     store_idx, req->getVaddr());
 
+            ++lsqBlockedLoads;
             return NoFault;
         }
     }
 
     // If there's no forwarding case, then go access memory
-    DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
+    DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n",
             load_inst->seqNum, load_inst->readPC());
 
     assert(!load_inst->memData);
@@ -646,9 +623,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
 
     ++usedPorts;
 
-    DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
-            load_inst->readPC());
-
     PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
     data_pkt->dataStatic(load_inst->memData);
 
@@ -658,8 +632,19 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
     state->inst = load_inst;
     data_pkt->senderState = state;
 
-    // if we have a cache, do cache access too
-    if (!dcachePort->sendTiming(data_pkt)) {
+    // if we the cache is not blocked, do cache access
+    if (!lsq->cacheBlocked()) {
+        if (!dcachePort->sendTiming(data_pkt)) {
+            // If the access didn't succeed, tell the LSQ by setting
+            // the retry thread id.
+            lsq->setRetryTid(lsqID);
+        }
+    }
+
+    // If the cache was blocked, or has become blocked due to the access,
+    // handle it.
+    if (lsq->cacheBlocked()) {
+        ++lsqCacheBlocked;
         // There's an older load that's already going to squash.
         if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
             return NoFault;
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 6f32ec304..85b150cd9 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -29,11 +29,18 @@
  *          Korey Sewell
  */
 
-#include "cpu/checker/cpu.hh"
+#include "config/use_checker.hh"
+
+#include "cpu/o3/lsq.hh"
 #include "cpu/o3/lsq_unit.hh"
 #include "base/str.hh"
+#include "mem/packet.hh"
 #include "mem/request.hh"
 
+#if USE_CHECKER
+#include "cpu/checker/cpu.hh"
+#endif
+
 template<class Impl>
 LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt,
                                               LSQUnit *lsq_ptr)
@@ -71,6 +78,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
     //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
 
     if (isSwitchedOut() || inst->isSquashed()) {
+        iewStage->decrWb(inst->seqNum);
         delete state;
         delete pkt;
         return;
@@ -89,46 +97,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
 }
 
 template <class Impl>
-Tick
-LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
-{
-    panic("O3CPU model does not work with atomic mode!");
-    return curTick;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
-{
-    panic("O3CPU doesn't expect recvFunctional callback!");
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvStatusChange(Status status)
-{
-    if (status == RangeChange)
-        return;
-
-    panic("O3CPU doesn't expect recvStatusChange callback!");
-}
-
-template <class Impl>
-bool
-LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt)
-{
-    lsq->completeDataAccess(pkt);
-    return true;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvRetry()
-{
-    lsq->recvRetry();
-}
-
-template <class Impl>
 LSQUnit<Impl>::LSQUnit()
     : loads(0), stores(0), storesToWB(0), stalled(false),
       isStoreBlocked(false), isLoadBlocked(false),
@@ -138,13 +106,15 @@ LSQUnit<Impl>::LSQUnit()
 
 template<class Impl>
 void
-LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
+LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
                     unsigned maxSQEntries, unsigned id)
 {
     DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
 
     switchedOut = false;
 
+    lsq = lsq_ptr;
+
     lsqID = id;
 
     // Add 1 for the sentinel entry (they are circular queues).
@@ -161,8 +131,6 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
     usedPorts = 0;
     cachePorts = params->cachePorts;
 
-    mem = params->mem;
-
     memDepViolator = NULL;
 
     blockedLoadSeqNum = 0;
@@ -170,18 +138,15 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
 
 template<class Impl>
 void
-LSQUnit<Impl>::setCPU(FullCPU *cpu_ptr)
+LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr)
 {
     cpu = cpu_ptr;
-    dcachePort = new DcachePort(cpu, this);
-
-    Port *mem_dport = mem->getPort("");
-    dcachePort->setPeer(mem_dport);
-    mem_dport->setPeer(dcachePort);
 
+#if USE_CHECKER
     if (cpu->checker) {
         cpu->checker->setDcachePort(dcachePort);
     }
+#endif
 }
 
 template<class Impl>
@@ -197,6 +162,47 @@ LSQUnit<Impl>::name() const
 
 template<class Impl>
 void
+LSQUnit<Impl>::regStats()
+{
+    lsqForwLoads
+        .name(name() + ".forwLoads")
+        .desc("Number of loads that had data forwarded from stores");
+
+    invAddrLoads
+        .name(name() + ".invAddrLoads")
+        .desc("Number of loads ignored due to an invalid address");
+
+    lsqSquashedLoads
+        .name(name() + ".squashedLoads")
+        .desc("Number of loads squashed");
+
+    lsqIgnoredResponses
+        .name(name() + ".ignoredResponses")
+        .desc("Number of memory responses ignored because the instruction is squashed");
+
+    lsqSquashedStores
+        .name(name() + ".squashedStores")
+        .desc("Number of stores squashed");
+
+    invAddrSwpfs
+        .name(name() + ".invAddrSwpfs")
+        .desc("Number of software prefetches ignored due to an invalid address");
+
+    lsqBlockedLoads
+        .name(name() + ".blockedLoads")
+        .desc("Number of blocked loads due to partial load-store forwarding");
+
+    lsqRescheduledLoads
+        .name(name() + ".rescheduledLoads")
+        .desc("Number of loads that were rescheduled");
+
+    lsqCacheBlocked
+        .name(name() + ".cacheBlocked")
+        .desc("Number of times an access to memory failed due to the cache being blocked");
+}
+
+template<class Impl>
+void
 LSQUnit<Impl>::clearLQ()
 {
     loadQueue.clear();
@@ -542,7 +548,7 @@ LSQUnit<Impl>::writebackStores()
            storeQueue[storeWBIdx].canWB &&
            usedPorts < cachePorts) {
 
-        if (isStoreBlocked) {
+        if (isStoreBlocked || lsq->cacheBlocked()) {
             DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
                     " is blocked!\n");
             break;
@@ -617,7 +623,7 @@ LSQUnit<Impl>::writebackStores()
         if (!dcachePort->sendTiming(data_pkt)) {
             // Need to handle becoming blocked on a store.
             isStoreBlocked = true;
-
+            ++lsqCacheBlocked;
             assert(retryPkt == NULL);
             retryPkt = data_pkt;
         } else {
@@ -668,7 +674,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         }
 
         // Clear the smart pointer to make sure it is decremented.
-        loadQueue[load_idx]->squashed = true;
+        loadQueue[load_idx]->setSquashed();
         loadQueue[load_idx] = NULL;
         --loads;
 
@@ -676,6 +682,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         loadTail = load_idx;
 
         decrLdIdx(load_idx);
+        ++lsqSquashedLoads;
     }
 
     if (isLoadBlocked) {
@@ -711,7 +718,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         }
 
         // Clear the smart pointer to make sure it is decremented.
-        storeQueue[store_idx].inst->squashed = true;
+        storeQueue[store_idx].inst->setSquashed();
         storeQueue[store_idx].inst = NULL;
         storeQueue[store_idx].canWB = 0;
 
@@ -722,6 +729,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         storeTail = store_idx;
 
         decrStIdx(store_idx);
+        ++lsqSquashedStores;
     }
 }
 
@@ -744,9 +752,11 @@ LSQUnit<Impl>::storePostSend(Packet *pkt)
         // only works so long as the checker doesn't try to
         // verify the value in memory for stores.
         storeQueue[storeWBIdx].inst->setCompleted();
+#if USE_CHECKER
         if (cpu->checker) {
-            cpu->checker->tick(storeQueue[storeWBIdx].inst);
+            cpu->checker->verify(storeQueue[storeWBIdx].inst);
         }
+#endif
     }
 
     if (pkt->result != Packet::Success) {
@@ -781,6 +791,7 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
     // Squashed instructions do not need to complete their access.
     if (inst->isSquashed()) {
         assert(!inst->isStore());
+        ++lsqIgnoredResponses;
         return;
     }
 
@@ -839,9 +850,11 @@ LSQUnit<Impl>::completeStore(int store_idx)
     // Tell the checker we've completed this instruction.  Some stores
     // may get reported twice to the checker, but the checker can
     // handle that case.
+#if USE_CHECKER
     if (cpu->checker) {
-        cpu->checker->tick(storeQueue[store_idx].inst);
+        cpu->checker->verify(storeQueue[store_idx].inst);
     }
+#endif
 }
 
 template <class Impl>
@@ -857,6 +870,8 @@ LSQUnit<Impl>::recvRetry()
             isStoreBlocked = false;
         } else {
             // Still blocked!
+            ++lsqCacheBlocked;
+            lsq->setRetryTid(lsqID);
         }
     } else if (isLoadBlocked) {
         DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, "
diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc
index a95103266..6a14dcbff 100644
--- a/src/cpu/o3/mem_dep_unit.cc
+++ b/src/cpu/o3/mem_dep_unit.cc
@@ -28,23 +28,22 @@
  * Authors: Kevin Lim
  */
 
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 #include "cpu/o3/store_set.hh"
 #include "cpu/o3/mem_dep_unit_impl.hh"
 
 // Force instantation of memory dependency unit using store sets and
-// AlphaSimpleImpl.
-template class MemDepUnit<StoreSet, AlphaSimpleImpl>;
+// O3CPUImpl.
+template class MemDepUnit<StoreSet, O3CPUImpl>;
 
 #ifdef DEBUG
 template <>
 int
-MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_count = 0;
+MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_count = 0;
 template <>
 int
-MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_insert = 0;
+MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_insert = 0;
 template <>
 int
-MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_erase = 0;
+MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_erase = 0;
 #endif
diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/params.hh
index f3cf36887..1c234bcd7 100644..100755
--- a/src/cpu/o3/alpha_params.hh
+++ b/src/cpu/o3/params.hh
@@ -28,32 +28,28 @@
  * Authors: Kevin Lim
  */
 
-#ifndef __CPU_O3_ALPHA_PARAMS_HH__
-#define __CPU_O3_ALPHA_PARAMS_HH__
+#ifndef __CPU_O3_PARAMS_HH__
+#define __CPU_O3_PARAMS_HH__
 
 #include "cpu/o3/cpu.hh"
 
 //Forward declarations
-class AlphaDTB;
-class AlphaITB;
 class FUPool;
-class MemObject;
-class Process;
-class System;
 
 /**
- * This file defines the parameters that will be used for the AlphaFullCPU.
+ * This file defines the parameters that will be used for the O3CPU.
  * This must be defined externally so that the Impl can have a params class
  * defined that it can pass to all of the individual stages.
  */
-
-class AlphaSimpleParams : public BaseFullCPU::Params
+class O3Params : public BaseO3CPU::Params
 {
   public:
+    unsigned activity;
 
-#if FULL_SYSTEM
-    AlphaITB *itb; AlphaDTB *dtb;
-#else
+    //
+    // Pointers to key objects
+    //
+#if !FULL_SYSTEM
     std::vector<Process *> workload;
     Process *process;
 #endif // FULL_SYSTEM
@@ -62,13 +58,11 @@ class AlphaSimpleParams : public BaseFullCPU::Params
 
     BaseCPU *checker;
 
-    unsigned activity;
-
     //
     // Caches
     //
-//    MemInterface *icacheInterface;
-//    MemInterface *dcacheInterface;
+    //    MemInterface *icacheInterface;
+    //    MemInterface *dcacheInterface;
 
     unsigned cachePorts;
 
@@ -104,12 +98,10 @@ class AlphaSimpleParams : public BaseFullCPU::Params
     unsigned commitToIEWDelay;
     unsigned renameToIEWDelay;
     unsigned issueToExecuteDelay;
+    unsigned dispatchWidth;
     unsigned issueWidth;
-    unsigned executeWidth;
-    unsigned executeIntWidth;
-    unsigned executeFloatWidth;
-    unsigned executeBranchWidth;
-    unsigned executeMemoryWidth;
+    unsigned wbWidth;
+    unsigned wbDepth;
     FUPool *fuPool;
 
     //
@@ -123,6 +115,12 @@ class AlphaSimpleParams : public BaseFullCPU::Params
     Tick fetchTrapLatency;
 
     //
+    // Timebuffer sizes
+    //
+    unsigned backComSize;
+    unsigned forwardComSize;
+
+    //
     // Branch predictor (BP, BTB, RAS)
     //
     std::string predType;
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index ade5e4e56..b6677b4b1 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -72,7 +72,7 @@ class PhysRegFile
     // Will make these registers public for now, but they probably should
     // be private eventually with some accessor functions.
   public:
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
 
     /**
      * Constructs a physical register file with the specified amount of
@@ -86,10 +86,6 @@ class PhysRegFile
     //The duplication is unfortunate but it's better than having
     //different ways to access certain registers.
 
-    //Add these in later when everything else is in place
-//    void serialize(std::ostream &os);
-//    void unserialize(Checkpoint *cp, const std::string &section);
-
     /** Reads an integer register. */
     uint64_t readIntReg(PhysRegIndex reg_idx)
     {
@@ -278,11 +274,11 @@ class PhysRegFile
 
   private:
     /** CPU pointer. */
-    FullCPU *cpu;
+    O3CPU *cpu;
 
   public:
     /** Sets the CPU pointer. */
-    void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; }
+    void setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; }
 
     /** Number of physical integer registers. */
     unsigned numPhysicalIntRegs;
diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc
index 9ca8e82c6..443ada0cb 100644
--- a/src/cpu/o3/rename.cc
+++ b/src/cpu/o3/rename.cc
@@ -28,8 +28,7 @@
  * Authors: Kevin Lim
  */
 
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 #include "cpu/o3/rename_impl.hh"
 
-template class DefaultRename<AlphaSimpleImpl>;
+template class DefaultRename<O3CPUImpl>;
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 42fdf6bf5..034087feb 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -55,7 +55,7 @@ class DefaultRename
     // Typedefs from the Impl.
     typedef typename Impl::CPUPol CPUPol;
     typedef typename Impl::DynInstPtr DynInstPtr;
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
     typedef typename Impl::Params Params;
 
     // Typedefs from the CPUPol
@@ -115,7 +115,7 @@ class DefaultRename
     void regStats();
 
     /** Sets CPU pointer. */
-    void setCPU(FullCPU *cpu_ptr);
+    void setCPU(O3CPU *cpu_ptr);
 
     /** Sets the main backwards communication time buffer pointer. */
     void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
@@ -157,12 +157,15 @@ class DefaultRename
     /** Sets pointer to the scoreboard. */
     void setScoreboard(Scoreboard *_scoreboard);
 
+    /** Drains the rename stage. */
+    bool drain();
+
+    /** Resumes execution after a drain. */
+    void resume() { }
+
     /** Switches out the rename stage. */
     void switchOut();
 
-    /** Completes the switch out. */
-    void doSwitchOut();
-
     /** Takes over from another CPU's thread. */
     void takeOverFrom();
 
@@ -291,7 +294,7 @@ class DefaultRename
     std::list<RenameHistory> historyBuffer[Impl::MaxThreads];
 
     /** Pointer to CPU. */
-    FullCPU *cpu;
+    O3CPU *cpu;
 
     /** Pointer to main time buffer used for backwards communication. */
     TimeBuffer<TimeStruct> *timeBuffer;
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index df33b98ee..805a72808 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -162,7 +162,7 @@ DefaultRename<Impl>::regStats()
 
 template <class Impl>
 void
-DefaultRename<Impl>::setCPU(FullCPU *cpu_ptr)
+DefaultRename<Impl>::setCPU(O3CPU *cpu_ptr)
 {
     DPRINTF(Rename, "Setting CPU pointer.\n");
     cpu = cpu_ptr;
@@ -257,16 +257,17 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
 }
 
 template <class Impl>
-void
-DefaultRename<Impl>::switchOut()
+bool
+DefaultRename<Impl>::drain()
 {
     // Rename is ready to switch out at any time.
-    cpu->signalSwitched();
+    cpu->signalDrained();
+    return true;
 }
 
 template <class Impl>
 void
-DefaultRename<Impl>::doSwitchOut()
+DefaultRename<Impl>::switchOut()
 {
     // Clear any state, fix up the rename map.
     for (int i = 0; i < numThreads; i++) {
@@ -341,7 +342,7 @@ DefaultRename<Impl>::squash(unsigned tid)
 
     for (int i=0; i<fromDecode->size; i++) {
         if (fromDecode->insts[i]->threadNumber == tid) {
-            fromDecode->insts[i]->squashed = true;
+            fromDecode->insts[i]->setSquashed();
             wroteToTimeBuffer = true;
             squashCount++;
         }
@@ -755,7 +756,7 @@ DefaultRename<Impl>::updateStatus()
 
             DPRINTF(Activity, "Activating stage.\n");
 
-            cpu->activateStage(FullCPU::RenameIdx);
+            cpu->activateStage(O3CPU::RenameIdx);
         }
     } else {
         // If it's not unblocking, then rename will not have any internal
@@ -764,7 +765,7 @@ DefaultRename<Impl>::updateStatus()
             _status = Inactive;
             DPRINTF(Activity, "Deactivating stage.\n");
 
-            cpu->deactivateStage(FullCPU::RenameIdx);
+            cpu->deactivateStage(O3CPU::RenameIdx);
         }
     }
 }
@@ -1022,7 +1023,7 @@ DefaultRename<Impl>::validInsts()
     unsigned inst_count = 0;
 
     for (int i=0; i<fromDecode->size; i++) {
-        if (!fromDecode->insts[i]->squashed)
+        if (!fromDecode->insts[i]->isSquashed())
             inst_count++;
     }
 
@@ -1206,7 +1207,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
         }
 
         DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename."
-                " Adding to front of list.", tid);
+                " Adding to front of list.\n", tid);
 
         serializeInst[tid] = NULL;
 
diff --git a/src/cpu/o3/rob.cc b/src/cpu/o3/rob.cc
index f99e5ccfd..9976049cd 100644
--- a/src/cpu/o3/rob.cc
+++ b/src/cpu/o3/rob.cc
@@ -29,9 +29,8 @@
  *          Nathan Binkert
  */
 
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/isa_specific.hh"
 #include "cpu/o3/rob_impl.hh"
 
 // Force instantiation of InstructionQueue.
-template class ROB<AlphaSimpleImpl>;
+template class ROB<O3CPUImpl>;
diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh
index 6d1402531..7cd5a5143 100644
--- a/src/cpu/o3/rob.hh
+++ b/src/cpu/o3/rob.hh
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Kevin Lim
+ *          Korey Sewell
  */
 
 #ifndef __CPU_O3_ROB_HH__
@@ -45,7 +46,7 @@ class ROB
     typedef TheISA::RegIndex RegIndex;
   public:
     //Typedefs from the Impl.
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
     typedef typename Impl::DynInstPtr DynInstPtr;
 
     typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo;
@@ -90,7 +91,7 @@ class ROB
      *  is created within.
      *  @param cpu_ptr Pointer to the implementation specific full CPU object.
      */
-    void setCPU(FullCPU *cpu_ptr);
+    void setCPU(O3CPU *cpu_ptr);
 
     /** Sets pointer to the list of active threads.
      *  @param at_ptr Pointer to the list of active threads.
@@ -257,7 +258,7 @@ class ROB
 
   private:
     /** Pointer to the CPU. */
-    FullCPU *cpu;
+    O3CPU *cpu;
 
     /** Active Threads in CPU */
     std::list<unsigned>* activeThreads;
@@ -307,7 +308,7 @@ class ROB
 
   private:
     /** The sequence number of the squashed instruction. */
-    InstSeqNum squashedSeqNum;
+    InstSeqNum squashedSeqNum[Impl::MaxThreads];
 
     /** Is the ROB done squashing. */
     bool doneSquashing[Impl::MaxThreads];
diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh
index 97694e371..1b9f666b8 100644
--- a/src/cpu/o3/rob_impl.hh
+++ b/src/cpu/o3/rob_impl.hh
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Kevin Lim
+ *          Korey Sewell
  */
 
 #include "config/full_system.hh"
@@ -40,10 +41,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
     : numEntries(_numEntries),
       squashWidth(_squashWidth),
       numInstsInROB(0),
-      squashedSeqNum(0),
       numThreads(_numThreads)
 {
     for (int tid=0; tid  < numThreads; tid++) {
+        squashedSeqNum[tid] = 0;
         doneSquashing[tid] = true;
         threadEntries[tid] = 0;
     }
@@ -100,7 +101,7 @@ ROB<Impl>::name() const
 
 template <class Impl>
 void
-ROB<Impl>::setCPU(FullCPU *cpu_ptr)
+ROB<Impl>::setCPU(O3CPU *cpu_ptr)
 {
     cpu = cpu_ptr;
 
@@ -276,7 +277,7 @@ ROB<Impl>::retireHead(unsigned tid)
     --numInstsInROB;
     --threadEntries[tid];
 
-    head_inst->removeInROB();
+    head_inst->clearInROB();
     head_inst->setCommitted();
 
     instList[tid].erase(head_it);
@@ -351,11 +352,11 @@ void
 ROB<Impl>::doSquash(unsigned tid)
 {
     DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n",
-            tid, squashedSeqNum);
+            tid, squashedSeqNum[tid]);
 
     assert(squashIt[tid] != instList[tid].end());
 
-    if ((*squashIt[tid])->seqNum < squashedSeqNum) {
+    if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) {
         DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
                 tid);
 
@@ -370,7 +371,7 @@ ROB<Impl>::doSquash(unsigned tid)
     for (int numSquashed = 0;
          numSquashed < squashWidth &&
          squashIt[tid] != instList[tid].end() &&
-         (*squashIt[tid])->seqNum > squashedSeqNum;
+         (*squashIt[tid])->seqNum > squashedSeqNum[tid];
          ++numSquashed)
     {
         DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n",
@@ -407,7 +408,7 @@ ROB<Impl>::doSquash(unsigned tid)
 
 
     // Check if ROB is done squashing.
-    if ((*squashIt[tid])->seqNum <= squashedSeqNum) {
+    if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) {
         DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
                 tid);
 
@@ -519,7 +520,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid)
 
     doneSquashing[tid] = false;
 
-    squashedSeqNum = squash_num;
+    squashedSeqNum[tid] = squash_num;
 
     if (!instList[tid].empty()) {
         InstIt tail_thread = instList[tid].end();
@@ -543,6 +544,7 @@ ROB<Impl>::readHeadInst()
     }
 }
 */
+
 template <class Impl>
 typename Impl::DynInstPtr
 ROB<Impl>::readHeadInst(unsigned tid)
@@ -557,6 +559,7 @@ ROB<Impl>::readHeadInst(unsigned tid)
         return dummyInst;
     }
 }
+
 /*
 template <class Impl>
 uint64_t
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
new file mode 100755
index 000000000..df8d1a6d8
--- /dev/null
+++ b/src/cpu/o3/thread_context.hh
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ *          Korey Sewell
+ */
+
+#ifndef __CPU_O3_THREAD_CONTEXT_HH__
+#define __CPU_O3_THREAD_CONTEXT_HH__
+
+#include "cpu/o3/isa_specific.hh"
+
+class EndQuiesceEvent;
+namespace Kernel {
+    class Statistics;
+};
+
+class TranslatingPort;
+
+/**
+ * Derived ThreadContext class for use with the O3CPU.  It
+ * provides the interface for any external objects to access a
+ * single thread's state and some general CPU state.  Any time
+ * external objects try to update state through this interface,
+ * the CPU will create an event to squash all in-flight
+ * instructions in order to ensure state is maintained correctly.
+ * It must be defined specifically for the O3CPU because
+ * not all architectural state is located within the O3ThreadState
+ * (such as the commit PC, and registers), and specific actions
+ * must be taken when using this interface (such as squashing all
+ * in-flight instructions when doing a write to this interface).
+ */
+template <class Impl>
+class O3ThreadContext : public ThreadContext
+{
+  public:
+    typedef typename Impl::O3CPU O3CPU;
+
+   /** Pointer to the CPU. */
+    O3CPU *cpu;
+
+    /** Pointer to the thread state that this TC corrseponds to. */
+    O3ThreadState<Impl> *thread;
+
+    /** Returns a pointer to this CPU. */
+    virtual BaseCPU *getCpuPtr() { return cpu; }
+
+    /** Sets this CPU's ID. */
+    virtual void setCpuId(int id) { cpu->setCpuId(id); }
+
+    /** Reads this CPU's ID. */
+    virtual int readCpuId() { return cpu->readCpuId(); }
+
+#if FULL_SYSTEM
+    /** Returns a pointer to the system. */
+    virtual System *getSystemPtr() { return cpu->system; }
+
+    /** Returns a pointer to physical memory. */
+    virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; }
+
+    /** Returns a pointer to this thread's kernel statistics. */
+    virtual Kernel::Statistics *getKernelStats()
+    { return thread->kernelStats; }
+
+    virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); }
+
+    virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL);
+
+    void delVirtPort(VirtualPort *vp);
+#else
+    virtual TranslatingPort *getMemPort() { return thread->getMemPort(); }
+
+    /** Returns a pointer to this thread's process. */
+    virtual Process *getProcessPtr() { return thread->getProcessPtr(); }
+#endif
+    /** Returns this thread's status. */
+    virtual Status status() const { return thread->status(); }
+
+    /** Sets this thread's status. */
+    virtual void setStatus(Status new_status)
+    { thread->setStatus(new_status); }
+
+    /** Set the status to Active.  Optional delay indicates number of
+     * cycles to wait before beginning execution. */
+    virtual void activate(int delay = 1);
+
+    /** Set the status to Suspended. */
+    virtual void suspend();
+
+    /** Set the status to Unallocated. */
+    virtual void deallocate(int delay = 0);
+
+    /** Set the status to Halted. */
+    virtual void halt();
+
+#if FULL_SYSTEM
+    /** Dumps the function profiling information.
+     * @todo: Implement.
+     */
+    virtual void dumpFuncProfile();
+#endif
+    /** Takes over execution of a thread from another CPU. */
+    virtual void takeOverFrom(ThreadContext *old_context);
+
+    /** Registers statistics associated with this TC. */
+    virtual void regStats(const std::string &name);
+
+    /** Serializes state. */
+    virtual void serialize(std::ostream &os);
+    /** Unserializes state. */
+    virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+#if FULL_SYSTEM
+    /** Reads the last tick that this thread was activated on. */
+    virtual Tick readLastActivate();
+    /** Reads the last tick that this thread was suspended on. */
+    virtual Tick readLastSuspend();
+
+    /** Clears the function profiling information. */
+    virtual void profileClear();
+    /** Samples the function profiling information. */
+    virtual void profileSample();
+#endif
+    /** Returns this thread's ID number. */
+    virtual int getThreadNum() { return thread->readTid(); }
+
+    /** Returns the instruction this thread is currently committing.
+     *  Only used when an instruction faults.
+     */
+    virtual TheISA::MachInst getInst();
+
+    /** Copies the architectural registers from another TC into this TC. */
+    virtual void copyArchRegs(ThreadContext *tc);
+
+    /** Resets all architectural registers to 0. */
+    virtual void clearArchRegs();
+
+    /** Reads an integer register. */
+    virtual uint64_t readIntReg(int reg_idx);
+
+    virtual FloatReg readFloatReg(int reg_idx, int width);
+
+    virtual FloatReg readFloatReg(int reg_idx);
+
+    virtual FloatRegBits readFloatRegBits(int reg_idx, int width);
+
+    virtual FloatRegBits readFloatRegBits(int reg_idx);
+
+    /** Sets an integer register to a value. */
+    virtual void setIntReg(int reg_idx, uint64_t val);
+
+    virtual void setFloatReg(int reg_idx, FloatReg val, int width);
+
+    virtual void setFloatReg(int reg_idx, FloatReg val);
+
+    virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
+
+    virtual void setFloatRegBits(int reg_idx, FloatRegBits val);
+
+    /** Reads this thread's PC. */
+    virtual uint64_t readPC()
+    { return cpu->readPC(thread->readTid()); }
+
+    /** Sets this thread's PC. */
+    virtual void setPC(uint64_t val);
+
+    /** Reads this thread's next PC. */
+    virtual uint64_t readNextPC()
+    { return cpu->readNextPC(thread->readTid()); }
+
+    /** Sets this thread's next PC. */
+    virtual void setNextPC(uint64_t val);
+
+    /** Reads a miscellaneous register. */
+    virtual MiscReg readMiscReg(int misc_reg)
+    { return cpu->readMiscReg(misc_reg, thread->readTid()); }
+
+    /** Reads a misc. register, including any side-effects the
+     * read might have as defined by the architecture. */
+    virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
+    { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); }
+
+    /** Sets a misc. register. */
+    virtual Fault setMiscReg(int misc_reg, const MiscReg &val);
+
+    /** Sets a misc. register, including any side-effects the
+     * write might have as defined by the architecture. */
+    virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
+
+    /** Returns the number of consecutive store conditional failures. */
+    // @todo: Figure out where these store cond failures should go.
+    virtual unsigned readStCondFailures()
+    { return thread->storeCondFailures; }
+
+    /** Sets the number of consecutive store conditional failures. */
+    virtual void setStCondFailures(unsigned sc_failures)
+    { thread->storeCondFailures = sc_failures; }
+
+    // Only really makes sense for old CPU model.  Lots of code
+    // outside the CPU still checks this function, so it will
+    // always return false to keep everything working.
+    /** Checks if the thread is misspeculating.  Because it is
+     * very difficult to determine if the thread is
+     * misspeculating, this is set as false. */
+    virtual bool misspeculating() { return false; }
+
+#if !FULL_SYSTEM
+    /** Gets a syscall argument by index. */
+    virtual IntReg getSyscallArg(int i);
+
+    /** Sets a syscall argument. */
+    virtual void setSyscallArg(int i, IntReg val);
+
+    /** Sets the syscall return value. */
+    virtual void setSyscallReturn(SyscallReturn return_value);
+
+    /** Executes a syscall in SE mode. */
+    virtual void syscall(int64_t callnum)
+    { return cpu->syscall(callnum, thread->readTid()); }
+
+    /** Reads the funcExeInst counter. */
+    virtual Counter readFuncExeInst() { return thread->funcExeInst; }
+#endif
+};
+
+#endif
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
new file mode 100755
index 000000000..bf8cbf850
--- /dev/null
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -0,0 +1,493 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ *          Korey Sewell
+ */
+
+#include "cpu/o3/thread_context.hh"
+#include "cpu/quiesce_event.hh"
+
+using namespace TheISA;
+
+#if FULL_SYSTEM
+template <class Impl>
+VirtualPort *
+O3ThreadContext<Impl>::getVirtPort(ThreadContext *src_tc)
+{
+    if (!src_tc)
+        return thread->getVirtPort();
+
+    VirtualPort *vp;
+    Port *mem_port;
+
+    vp = new VirtualPort("tc-vport", src_tc);
+    mem_port = cpu->system->physmem->getPort("functional");
+    mem_port->setPeer(vp);
+    vp->setPeer(mem_port);
+    return vp;
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::dumpFuncProfile()
+{
+    // Currently not supported
+}
+#endif
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::takeOverFrom(ThreadContext *old_context)
+{
+    // some things should already be set up
+#if FULL_SYSTEM
+    assert(getSystemPtr() == old_context->getSystemPtr());
+#else
+    assert(getProcessPtr() == old_context->getProcessPtr());
+#endif
+
+    // copy over functional state
+    setStatus(old_context->status());
+    copyArchRegs(old_context);
+    setCpuId(old_context->readCpuId());
+
+#if !FULL_SYSTEM
+    thread->funcExeInst = old_context->readFuncExeInst();
+#else
+    EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent();
+    if (other_quiesce) {
+        // Point the quiesce event's TC at this TC so that it wakes up
+        // the proper CPU.
+        other_quiesce->tc = this;
+    }
+    if (thread->quiesceEvent) {
+        thread->quiesceEvent->tc = this;
+    }
+
+    // Transfer kernel stats from one CPU to the other.
+    thread->kernelStats = old_context->getKernelStats();
+//    storeCondFailures = 0;
+    cpu->lockFlag = false;
+#endif
+
+    old_context->setStatus(ThreadContext::Unallocated);
+
+    thread->inSyscall = false;
+    thread->trapPending = false;
+}
+
+#if FULL_SYSTEM
+template <class Impl>
+void
+O3ThreadContext<Impl>::delVirtPort(VirtualPort *vp)
+{
+    delete vp->getPeer();
+    delete vp;
+}
+#endif
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::activate(int delay)
+{
+    DPRINTF(O3CPU, "Calling activate on Thread Context %d\n",
+            getThreadNum());
+
+    if (thread->status() == ThreadContext::Active)
+        return;
+
+#if FULL_SYSTEM
+    thread->lastActivate = curTick;
+#endif
+
+    if (thread->status() == ThreadContext::Unallocated) {
+        cpu->activateWhenReady(thread->readTid());
+        return;
+    }
+
+    thread->setStatus(ThreadContext::Active);
+
+    // status() == Suspended
+    cpu->activateContext(thread->readTid(), delay);
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::suspend()
+{
+    DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n",
+            getThreadNum());
+
+    if (thread->status() == ThreadContext::Suspended)
+        return;
+
+#if FULL_SYSTEM
+    thread->lastActivate = curTick;
+    thread->lastSuspend = curTick;
+#endif
+/*
+#if FULL_SYSTEM
+    // Don't change the status from active if there are pending interrupts
+    if (cpu->check_interrupts()) {
+        assert(status() == ThreadContext::Active);
+        return;
+    }
+#endif
+*/
+    thread->setStatus(ThreadContext::Suspended);
+    cpu->suspendContext(thread->readTid());
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::deallocate(int delay)
+{
+    DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n",
+            getThreadNum());
+
+    if (thread->status() == ThreadContext::Unallocated)
+        return;
+
+    thread->setStatus(ThreadContext::Unallocated);
+    cpu->deallocateContext(thread->readTid(), delay);
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::halt()
+{
+    DPRINTF(O3CPU, "Calling halt on Thread Context %d\n",
+            getThreadNum());
+
+    if (thread->status() == ThreadContext::Halted)
+        return;
+
+    thread->setStatus(ThreadContext::Halted);
+    cpu->haltContext(thread->readTid());
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::regStats(const std::string &name)
+{
+#if FULL_SYSTEM
+    thread->kernelStats = new Kernel::Statistics(cpu->system);
+    thread->kernelStats->regStats(name + ".kern");
+#endif
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::serialize(std::ostream &os)
+{
+#if FULL_SYSTEM
+    if (thread->kernelStats)
+        thread->kernelStats->serialize(os);
+#endif
+
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::unserialize(Checkpoint *cp, const std::string &section)
+{
+#if FULL_SYSTEM
+    if (thread->kernelStats)
+        thread->kernelStats->unserialize(cp, section);
+#endif
+
+}
+
+#if FULL_SYSTEM
+template <class Impl>
+Tick
+O3ThreadContext<Impl>::readLastActivate()
+{
+    return thread->lastActivate;
+}
+
+template <class Impl>
+Tick
+O3ThreadContext<Impl>::readLastSuspend()
+{
+    return thread->lastSuspend;
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::profileClear()
+{}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::profileSample()
+{}
+#endif
+
+template <class Impl>
+TheISA::MachInst
+O3ThreadContext<Impl>:: getInst()
+{
+    return thread->getInst();
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::copyArchRegs(ThreadContext *tc)
+{
+    // This function will mess things up unless the ROB is empty and
+    // there are no instructions in the pipeline.
+    unsigned tid = thread->readTid();
+    PhysRegIndex renamed_reg;
+
+    // First loop through the integer registers.
+    for (int i = 0; i < TheISA::NumIntRegs; ++i) {
+        renamed_reg = cpu->renameMap[tid].lookup(i);
+
+        DPRINTF(O3CPU, "Copying over register %i, had data %lli, "
+                "now has data %lli.\n",
+                renamed_reg, cpu->readIntReg(renamed_reg),
+                tc->readIntReg(i));
+
+        cpu->setIntReg(renamed_reg, tc->readIntReg(i));
+    }
+
+    // Then loop through the floating point registers.
+    for (int i = 0; i < TheISA::NumFloatRegs; ++i) {
+        renamed_reg = cpu->renameMap[tid].lookup(i + TheISA::FP_Base_DepTag);
+        cpu->setFloatRegBits(renamed_reg,
+                             tc->readFloatRegBits(i));
+    }
+
+    // Copy the misc regs.
+    copyMiscRegs(tc, this);
+
+    // Then finally set the PC and the next PC.
+    cpu->setPC(tc->readPC(), tid);
+    cpu->setNextPC(tc->readNextPC(), tid);
+#if !FULL_SYSTEM
+    this->thread->funcExeInst = tc->readFuncExeInst();
+#endif
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::clearArchRegs()
+{}
+
+template <class Impl>
+uint64_t
+O3ThreadContext<Impl>::readIntReg(int reg_idx)
+{
+    return cpu->readArchIntReg(reg_idx, thread->readTid());
+}
+
+template <class Impl>
+FloatReg
+O3ThreadContext<Impl>::readFloatReg(int reg_idx, int width)
+{
+    switch(width) {
+      case 32:
+        return cpu->readArchFloatRegSingle(reg_idx, thread->readTid());
+      case 64:
+        return cpu->readArchFloatRegDouble(reg_idx, thread->readTid());
+      default:
+        panic("Unsupported width!");
+        return 0;
+    }
+}
+
+template <class Impl>
+FloatReg
+O3ThreadContext<Impl>::readFloatReg(int reg_idx)
+{
+    return cpu->readArchFloatRegSingle(reg_idx, thread->readTid());
+}
+
+template <class Impl>
+FloatRegBits
+O3ThreadContext<Impl>::readFloatRegBits(int reg_idx, int width)
+{
+    DPRINTF(Fault, "Reading floatint register through the TC!\n");
+    return cpu->readArchFloatRegInt(reg_idx, thread->readTid());
+}
+
+template <class Impl>
+FloatRegBits
+O3ThreadContext<Impl>::readFloatRegBits(int reg_idx)
+{
+    return cpu->readArchFloatRegInt(reg_idx, thread->readTid());
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::setIntReg(int reg_idx, uint64_t val)
+{
+    cpu->setArchIntReg(reg_idx, val, thread->readTid());
+
+    // Squash if we're not already in a state update mode.
+    if (!thread->trapPending && !thread->inSyscall) {
+        cpu->squashFromTC(thread->readTid());
+    }
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::setFloatReg(int reg_idx, FloatReg val, int width)
+{
+    switch(width) {
+      case 32:
+        cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid());
+        break;
+      case 64:
+        cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid());
+        break;
+    }
+
+    // Squash if we're not already in a state update mode.
+    if (!thread->trapPending && !thread->inSyscall) {
+        cpu->squashFromTC(thread->readTid());
+    }
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::setFloatReg(int reg_idx, FloatReg val)
+{
+    cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid());
+
+    if (!thread->trapPending && !thread->inSyscall) {
+        cpu->squashFromTC(thread->readTid());
+    }
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val,
+                                             int width)
+{
+    DPRINTF(Fault, "Setting floatint register through the TC!\n");
+    cpu->setArchFloatRegInt(reg_idx, val, thread->readTid());
+
+    // Squash if we're not already in a state update mode.
+    if (!thread->trapPending && !thread->inSyscall) {
+        cpu->squashFromTC(thread->readTid());
+    }
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val)
+{
+    cpu->setArchFloatRegInt(reg_idx, val, thread->readTid());
+
+    // Squash if we're not already in a state update mode.
+    if (!thread->trapPending && !thread->inSyscall) {
+        cpu->squashFromTC(thread->readTid());
+    }
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::setPC(uint64_t val)
+{
+    cpu->setPC(val, thread->readTid());
+
+    // Squash if we're not already in a state update mode.
+    if (!thread->trapPending && !thread->inSyscall) {
+        cpu->squashFromTC(thread->readTid());
+    }
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::setNextPC(uint64_t val)
+{
+    cpu->setNextPC(val, thread->readTid());
+
+    // Squash if we're not already in a state update mode.
+    if (!thread->trapPending && !thread->inSyscall) {
+        cpu->squashFromTC(thread->readTid());
+    }
+}
+
+template <class Impl>
+Fault
+O3ThreadContext<Impl>::setMiscReg(int misc_reg, const MiscReg &val)
+{
+    Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid());
+
+    // Squash if we're not already in a state update mode.
+    if (!thread->trapPending && !thread->inSyscall) {
+        cpu->squashFromTC(thread->readTid());
+    }
+
+    return ret_fault;
+}
+
+template <class Impl>
+Fault
+O3ThreadContext<Impl>::setMiscRegWithEffect(int misc_reg,
+                                                const MiscReg &val)
+{
+    Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val,
+                                                thread->readTid());
+
+    // Squash if we're not already in a state update mode.
+    if (!thread->trapPending && !thread->inSyscall) {
+        cpu->squashFromTC(thread->readTid());
+    }
+
+    return ret_fault;
+}
+
+#if !FULL_SYSTEM
+
+template <class Impl>
+TheISA::IntReg
+O3ThreadContext<Impl>::getSyscallArg(int i)
+{
+    return cpu->getSyscallArg(i, thread->readTid());
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::setSyscallArg(int i, IntReg val)
+{
+    cpu->setSyscallArg(i, val, thread->readTid());
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::setSyscallReturn(SyscallReturn return_value)
+{
+    cpu->setSyscallReturn(return_value, thread->readTid());
+}
+
+#endif // FULL_SYSTEM
+
diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh
index b6535baa1..1c8105204 100644
--- a/src/cpu/o3/thread_state.hh
+++ b/src/cpu/o3/thread_state.hh
@@ -58,11 +58,11 @@ class Process;
 template <class Impl>
 struct O3ThreadState : public ThreadState {
     typedef ThreadContext::Status Status;
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::O3CPU O3CPU;
 
   private:
     /** Pointer to the CPU. */
-    FullCPU *cpu;
+    O3CPU *cpu;
   public:
     /** Whether or not the thread is currently in syscall mode, and
      * thus able to be externally updated without squashing.
@@ -75,14 +75,14 @@ struct O3ThreadState : public ThreadState {
     bool trapPending;
 
 #if FULL_SYSTEM
-    O3ThreadState(FullCPU *_cpu, int _thread_num)
+    O3ThreadState(O3CPU *_cpu, int _thread_num)
         : ThreadState(-1, _thread_num),
           inSyscall(0), trapPending(0)
     { }
 #else
-    O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid,
+    O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid,
                   MemObject *mem)
-        : ThreadState(-1, _thread_num, mem, _process, _asid),
+        : ThreadState(-1, _thread_num, _process, _asid, mem),
           cpu(_cpu), inSyscall(0), trapPending(0)
     { }
 #endif
diff --git a/src/cpu/ozone/base_dyn_inst.cc b/src/cpu/ozone/base_dyn_inst.cc
new file mode 100644
index 000000000..5a3a69dff
--- /dev/null
+++ b/src/cpu/ozone/base_dyn_inst.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#include "cpu/base_dyn_inst_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+
+// Explicit instantiation
+template class BaseDynInst<OzoneImpl>;
+
+template <>
+int
+BaseDynInst<OzoneImpl>::instcount = 0;
diff --git a/src/cpu/ozone/bpred_unit.cc b/src/cpu/ozone/bpred_unit.cc
new file mode 100644
index 000000000..c823f5e80
--- /dev/null
+++ b/src/cpu/ozone/bpred_unit.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#include "cpu/o3/bpred_unit_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+//#include "cpu/ozone/simple_impl.hh"
+
+template class BPredUnit<OzoneImpl>;
+//template class BPredUnit<SimpleImpl>;
diff --git a/src/cpu/checker/cpu_builder.cc b/src/cpu/ozone/checker_builder.cc
index 3b7583294..c372e51d6 100644
--- a/src/cpu/checker/cpu_builder.cc
+++ b/src/cpu/ozone/checker_builder.cc
@@ -30,19 +30,24 @@
 
 #include <string>
 
-#include "cpu/checker/cpu.hh"
+#include "cpu/checker/cpu_impl.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/ozone/dyn_inst.hh"
 #include "cpu/ozone/ozone_impl.hh"
-#include "mem/base_mem.hh"
 #include "sim/builder.hh"
 #include "sim/process.hh"
 #include "sim/sim_object.hh"
 
+class MemObject;
+
+template
+class Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >;
+
 /**
  * Specific non-templated derived class used for SimObject configuration.
  */
-class OzoneChecker : public Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >
+class OzoneChecker :
+    public Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >
 {
   public:
     OzoneChecker(Params *p)
@@ -64,7 +69,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
 #if FULL_SYSTEM
     SimObjectParam<AlphaITB *> itb;
     SimObjectParam<AlphaDTB *> dtb;
-    SimObjectParam<FunctionalMemory *> mem;
     SimObjectParam<System *> system;
     Param<int> cpu_id;
     Param<Tick> profile;
@@ -72,11 +76,10 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
     SimObjectParam<Process *> workload;
 #endif // FULL_SYSTEM
     Param<int> clock;
-    SimObjectParam<BaseMem *> icache;
-    SimObjectParam<BaseMem *> dcache;
 
     Param<bool> defer_registration;
     Param<bool> exitOnError;
+    Param<bool> warnOnlyOnLoadError;
     Param<bool> function_trace;
     Param<Tick> function_trace_start;
 
@@ -96,7 +99,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
 #if FULL_SYSTEM
     INIT_PARAM(itb, "Instruction TLB"),
     INIT_PARAM(dtb, "Data TLB"),
-    INIT_PARAM(mem, "memory"),
     INIT_PARAM(system, "system object"),
     INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(profile, ""),
@@ -105,11 +107,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
 #endif // FULL_SYSTEM
 
     INIT_PARAM(clock, "clock speed"),
-    INIT_PARAM(icache, "L1 instruction cache object"),
-    INIT_PARAM(dcache, "L1 data cache object"),
 
     INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
     INIT_PARAM(exitOnError, "exit on error"),
+    INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
+                    "result errors", false),
     INIT_PARAM(function_trace, "Enable function trace"),
     INIT_PARAM(function_trace_start, "Cycle to start function trace")
 
@@ -126,6 +128,7 @@ CREATE_SIM_OBJECT(OzoneChecker)
     params->max_loads_any_thread = 0;
     params->max_loads_all_threads = 0;
     params->exitOnError = exitOnError;
+    params->warnOnlyOnLoadError = warnOnlyOnLoadError;
     params->deferRegistration = defer_registration;
     params->functionTrace = function_trace;
     params->functionTraceStart = function_trace_start;
@@ -137,13 +140,10 @@ CREATE_SIM_OBJECT(OzoneChecker)
     temp = max_insts_all_threads;
     temp = max_loads_any_thread;
     temp = max_loads_all_threads;
-    BaseMem *cache = icache;
-    cache = dcache;
 
 #if FULL_SYSTEM
     params->itb = itb;
     params->dtb = dtb;
-    params->mem = mem;
     params->system = system;
     params->cpu_id = cpu_id;
     params->profile = profile;
diff --git a/src/cpu/ozone/cpu.cc b/src/cpu/ozone/cpu.cc
index 303c78eea..eb6ac37bd 100644
--- a/src/cpu/ozone/cpu.cc
+++ b/src/cpu/ozone/cpu.cc
@@ -31,7 +31,7 @@
 
 #include "cpu/ozone/cpu_impl.hh"
 #include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
+//#include "cpu/ozone/simple_impl.hh"
 
-template class OzoneCPU<SimpleImpl>;
+//template class OzoneCPU<SimpleImpl>;
 template class OzoneCPU<OzoneImpl>;
diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh
index e9550c39b..e411c12bd 100644
--- a/src/cpu/ozone/cpu.hh
+++ b/src/cpu/ozone/cpu.hh
@@ -43,6 +43,7 @@
 #include "cpu/ozone/thread_state.hh"
 #include "cpu/pc_event.hh"
 #include "cpu/static_inst.hh"
+#include "mem/page_table.hh"
 #include "sim/eventq.hh"
 
 // forward declarations
@@ -54,7 +55,6 @@ class AlphaDTB;
 class PhysicalMemory;
 class MemoryController;
 
-class Sampler;
 class RemoteGDB;
 class GDBListener;
 
@@ -70,6 +70,7 @@ class Process;
 
 class Checkpoint;
 class EndQuiesceEvent;
+class MemObject;
 class Request;
 
 namespace Trace {
@@ -111,7 +112,7 @@ class OzoneCPU : public BaseCPU
 
         void setCpuId(int id);
 
-        int readCpuId() { return thread->cpuId; }
+        int readCpuId() { return thread->readCpuId(); }
 
 #if FULL_SYSTEM
         System *getSystemPtr() { return cpu->system; }
@@ -122,22 +123,22 @@ class OzoneCPU : public BaseCPU
 
         AlphaDTB * getDTBPtr() { return cpu->dtb; }
 
-        Kernel::Statistics *getKernelStats() { return thread->kernelStats; }
+        Kernel::Statistics *getKernelStats()
+        { return thread->getKernelStats(); }
 
         FunctionalPort *getPhysPort() { return thread->getPhysPort(); }
 
         VirtualPort *getVirtPort(ThreadContext *tc = NULL)
         { return thread->getVirtPort(tc); }
 
-        void delVirtPort(VirtualPort *vp)
-        { thread->delVirtPort(vp); }
+        void delVirtPort(VirtualPort *vp);
 #else
-        TranslatingPort *getMemPort() { return thread->port; }
+        TranslatingPort *getMemPort() { return thread->getMemPort(); }
 
-        Process *getProcessPtr() { return thread->process; }
+        Process *getProcessPtr() { return thread->getProcessPtr(); }
 #endif
 
-        Status status() const { return thread->_status; }
+        Status status() const { return thread->status(); }
 
         void setStatus(Status new_status);
 
@@ -149,7 +150,7 @@ class OzoneCPU : public BaseCPU
         void suspend();
 
         /// Set the status to Unallocated.
-        void deallocate();
+        void deallocate(int delay = 0);
 
         /// Set the status to Halted.
         void halt();
@@ -212,12 +213,11 @@ class OzoneCPU : public BaseCPU
 
         uint64_t readNextNPC()
         {
-            panic("Alpha has no NextNPC!");
             return 0;
         }
 
         void setNextNPC(uint64_t val)
-        { panic("Alpha has no NextNPC!"); }
+        { }
 
       public:
         // ISA stuff:
@@ -250,7 +250,7 @@ class OzoneCPU : public BaseCPU
         { thread->renameTable[TheISA::ArgumentReg0 + i]->setIntResult(i); }
 
         void setSyscallReturn(SyscallReturn return_value)
-        { cpu->setSyscallReturn(return_value, thread->tid); }
+        { cpu->setSyscallReturn(return_value, thread->readTid()); }
 
         Counter readFuncExeInst() { return thread->funcExeInst; }
 
@@ -355,12 +355,10 @@ class OzoneCPU : public BaseCPU
 
     int cpuId;
 
-    void switchOut(Sampler *sampler);
+    void switchOut();
     void signalSwitched();
     void takeOverFrom(BaseCPU *oldCPU);
 
-    Sampler *sampler;
-
     int switchCount;
 
 #if FULL_SYSTEM
@@ -374,6 +372,10 @@ class OzoneCPU : public BaseCPU
     PhysicalMemory *physmem;
 #endif
 
+    virtual Port *getPort(const std::string &name, int idx);
+
+    MemObject *mem;
+
     FrontEnd *frontEnd;
 
     BackEnd *backEnd;
@@ -383,7 +385,7 @@ class OzoneCPU : public BaseCPU
 
     virtual void activateContext(int thread_num, int delay);
     virtual void suspendContext(int thread_num);
-    virtual void deallocateContext(int thread_num);
+    virtual void deallocateContext(int thread_num, int delay);
     virtual void haltContext(int thread_num);
 
     // statistics
@@ -415,50 +417,41 @@ class OzoneCPU : public BaseCPU
 
 
 #if FULL_SYSTEM
-    bool validInstAddr(Addr addr) { return true; }
-    bool validDataAddr(Addr addr) { return true; }
-
-    Fault translateInstReq(Request *req)
+    /** Translates instruction requestion. */
+    Fault translateInstReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
     {
-        return itb->translate(req, tc);
+        return itb->translate(req, thread->getTC());
     }
 
-    Fault translateDataReadReq(Request *req)
+    /** Translates data read request. */
+    Fault translateDataReadReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
     {
-        return dtb->translate(req, tc, false);
+        return dtb->translate(req, thread->getTC(), false);
     }
 
-    Fault translateDataWriteReq(Request *req)
+    /** Translates data write request. */
+    Fault translateDataWriteReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
     {
-        return dtb->translate(req, tc, true);
+        return dtb->translate(req, thread->getTC(), true);
     }
 
 #else
-    bool validInstAddr(Addr addr)
-    { return true; }
-
-    bool validDataAddr(Addr addr)
-    { return true; }
-
-    int getInstAsid() { return thread.asid; }
-    int getDataAsid() { return thread.asid; }
-
     /** Translates instruction requestion in syscall emulation mode. */
-    Fault translateInstReq(Request *req)
+    Fault translateInstReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
     {
-        return thread.translateInstReq(req);
+        return thread->getProcessPtr()->pTable->translate(req);
     }
 
     /** Translates data read request in syscall emulation mode. */
-    Fault translateDataReadReq(Request *req)
+    Fault translateDataReadReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
     {
-        return thread.translateDataReadReq(req);
+        return thread->getProcessPtr()->pTable->translate(req);
     }
 
     /** Translates data write request in syscall emulation mode. */
-    Fault translateDataWriteReq(Request *req)
+    Fault translateDataWriteReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
     {
-        return thread.translateDataWriteReq(req);
+        return thread->getProcessPtr()->pTable->translate(req);
     }
 #endif
 
@@ -599,14 +592,14 @@ class OzoneCPU : public BaseCPU
 
 #if FULL_SYSTEM
     Fault hwrei();
-    int readIntrFlag() { return thread.regs.intrflag; }
-    void setIntrFlag(int val) { thread.regs.intrflag = val; }
+    int readIntrFlag() { return thread.intrflag; }
+    void setIntrFlag(int val) { thread.intrflag = val; }
     bool inPalMode() { return AlphaISA::PcPAL(thread.PC); }
     bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); }
     bool simPalCheck(int palFunc);
     void processInterrupts();
 #else
-    void syscall();
+    void syscall(uint64_t &callnum);
     void setSyscallReturn(SyscallReturn return_value, int tid);
 #endif
 
diff --git a/src/cpu/ozone/cpu_builder.cc b/src/cpu/ozone/cpu_builder.cc
index 18f257a25..e239b7a94 100644
--- a/src/cpu/ozone/cpu_builder.cc
+++ b/src/cpu/ozone/cpu_builder.cc
@@ -34,9 +34,7 @@
 #include "cpu/inst_seq.hh"
 #include "cpu/ozone/cpu.hh"
 #include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
 #include "cpu/ozone/simple_params.hh"
-#include "mem/cache/base_cache.hh"
 #include "sim/builder.hh"
 #include "sim/process.hh"
 #include "sim/sim_object.hh"
@@ -49,14 +47,6 @@ class DerivOzoneCPU : public OzoneCPU<OzoneImpl>
     { }
 };
 
-class SimpleOzoneCPU : public OzoneCPU<SimpleImpl>
-{
-  public:
-    SimpleOzoneCPU(SimpleParams *p)
-        : OzoneCPU<SimpleImpl>(p)
-    { }
-};
-
 
 ////////////////////////////////////////////////////////////////////////
 //
@@ -78,7 +68,7 @@ SimObjectVectorParam<Process *> workload;
 //SimObjectParam<PageTable *> page_table;
 #endif // FULL_SYSTEM
 
-SimObjectParam<FunctionalMemory *> mem;
+SimObjectParam<MemObject *> mem;
 
 SimObjectParam<BaseCPU *> checker;
 
@@ -87,8 +77,8 @@ Param<Counter> max_insts_all_threads;
 Param<Counter> max_loads_any_thread;
 Param<Counter> max_loads_all_threads;
 
-SimObjectParam<BaseCache *> icache;
-SimObjectParam<BaseCache *> dcache;
+//SimObjectParam<BaseCache *> icache;
+//SimObjectParam<BaseCache *> dcache;
 
 Param<unsigned> cachePorts;
 Param<unsigned> width;
@@ -215,8 +205,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
                     "count",
                     0),
 
-    INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
-    INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
+//    INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
+//    INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
 
     INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
     INIT_PARAM_DFLT(width, "Width", 1),
@@ -361,8 +351,8 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
     //
     // Caches
     //
-    params->icacheInterface = icache ? icache->getInterface() : NULL;
-    params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
+//    params->icacheInterface = icache ? icache->getInterface() : NULL;
+//    params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
     params->cachePorts = cachePorts;
 
     params->width = width;
@@ -459,405 +449,3 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
 }
 
 REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU)
-
-
-
-////////////////////////////////////////////////////////////////////////
-//
-//  OzoneCPU Simulation Object
-//
-
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
-
-    Param<int> clock;
-    Param<int> numThreads;
-
-#if FULL_SYSTEM
-SimObjectParam<System *> system;
-Param<int> cpu_id;
-SimObjectParam<AlphaITB *> itb;
-SimObjectParam<AlphaDTB *> dtb;
-#else
-SimObjectVectorParam<Process *> workload;
-//SimObjectParam<PageTable *> page_table;
-#endif // FULL_SYSTEM
-
-SimObjectParam<FunctionalMemory *> mem;
-
-SimObjectParam<BaseCPU *> checker;
-
-Param<Counter> max_insts_any_thread;
-Param<Counter> max_insts_all_threads;
-Param<Counter> max_loads_any_thread;
-Param<Counter> max_loads_all_threads;
-
-SimObjectParam<BaseCache *> icache;
-SimObjectParam<BaseCache *> dcache;
-
-Param<unsigned> cachePorts;
-Param<unsigned> width;
-Param<unsigned> frontEndWidth;
-Param<unsigned> backEndWidth;
-Param<unsigned> backEndSquashLatency;
-Param<unsigned> backEndLatency;
-Param<unsigned> maxInstBufferSize;
-Param<unsigned> numPhysicalRegs;
-
-Param<unsigned> decodeToFetchDelay;
-Param<unsigned> renameToFetchDelay;
-Param<unsigned> iewToFetchDelay;
-Param<unsigned> commitToFetchDelay;
-Param<unsigned> fetchWidth;
-
-Param<unsigned> renameToDecodeDelay;
-Param<unsigned> iewToDecodeDelay;
-Param<unsigned> commitToDecodeDelay;
-Param<unsigned> fetchToDecodeDelay;
-Param<unsigned> decodeWidth;
-
-Param<unsigned> iewToRenameDelay;
-Param<unsigned> commitToRenameDelay;
-Param<unsigned> decodeToRenameDelay;
-Param<unsigned> renameWidth;
-
-Param<unsigned> commitToIEWDelay;
-Param<unsigned> renameToIEWDelay;
-Param<unsigned> issueToExecuteDelay;
-Param<unsigned> issueWidth;
-Param<unsigned> executeWidth;
-Param<unsigned> executeIntWidth;
-Param<unsigned> executeFloatWidth;
-Param<unsigned> executeBranchWidth;
-Param<unsigned> executeMemoryWidth;
-
-Param<unsigned> iewToCommitDelay;
-Param<unsigned> renameToROBDelay;
-Param<unsigned> commitWidth;
-Param<unsigned> squashWidth;
-
-Param<std::string> predType;
-Param<unsigned> localPredictorSize;
-Param<unsigned> localCtrBits;
-Param<unsigned> localHistoryTableSize;
-Param<unsigned> localHistoryBits;
-Param<unsigned> globalPredictorSize;
-Param<unsigned> globalCtrBits;
-Param<unsigned> globalHistoryBits;
-Param<unsigned> choicePredictorSize;
-Param<unsigned> choiceCtrBits;
-
-Param<unsigned> BTBEntries;
-Param<unsigned> BTBTagSize;
-
-Param<unsigned> RASSize;
-
-Param<unsigned> LQEntries;
-Param<unsigned> SQEntries;
-Param<unsigned> LFSTSize;
-Param<unsigned> SSITSize;
-
-Param<unsigned> numPhysIntRegs;
-Param<unsigned> numPhysFloatRegs;
-Param<unsigned> numIQEntries;
-Param<unsigned> numROBEntries;
-
-Param<bool> decoupledFrontEnd;
-Param<int> dispatchWidth;
-Param<int> wbWidth;
-
-Param<unsigned> smtNumFetchingThreads;
-Param<std::string>   smtFetchPolicy;
-Param<std::string>   smtLSQPolicy;
-Param<unsigned> smtLSQThreshold;
-Param<std::string>   smtIQPolicy;
-Param<unsigned> smtIQThreshold;
-Param<std::string>   smtROBPolicy;
-Param<unsigned> smtROBThreshold;
-Param<std::string>   smtCommitPolicy;
-
-Param<unsigned> instShiftAmt;
-
-Param<bool> defer_registration;
-
-Param<bool> function_trace;
-Param<Tick> function_trace_start;
-
-END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
-
-BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
-
-    INIT_PARAM(clock, "clock speed"),
-    INIT_PARAM(numThreads, "number of HW thread contexts"),
-
-#if FULL_SYSTEM
-    INIT_PARAM(system, "System object"),
-    INIT_PARAM(cpu_id, "processor ID"),
-    INIT_PARAM(itb, "Instruction translation buffer"),
-    INIT_PARAM(dtb, "Data translation buffer"),
-#else
-    INIT_PARAM(workload, "Processes to run"),
-//    INIT_PARAM(page_table, "Page table"),
-#endif // FULL_SYSTEM
-
-    INIT_PARAM_DFLT(mem, "Memory", NULL),
-
-    INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
-
-    INIT_PARAM_DFLT(max_insts_any_thread,
-                    "Terminate when any thread reaches this inst count",
-                    0),
-    INIT_PARAM_DFLT(max_insts_all_threads,
-                    "Terminate when all threads have reached"
-                    "this inst count",
-                    0),
-    INIT_PARAM_DFLT(max_loads_any_thread,
-                    "Terminate when any thread reaches this load count",
-                    0),
-    INIT_PARAM_DFLT(max_loads_all_threads,
-                    "Terminate when all threads have reached this load"
-                    "count",
-                    0),
-
-    INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
-    INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
-
-    INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
-    INIT_PARAM_DFLT(width, "Width", 1),
-    INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
-    INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
-    INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
-    INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
-    INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
-    INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
-
-    INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
-    INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
-    INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
-               "delay"),
-    INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
-    INIT_PARAM(fetchWidth, "Fetch width"),
-    INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
-    INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
-               "delay"),
-    INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
-    INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
-    INIT_PARAM(decodeWidth, "Decode width"),
-
-    INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
-               "delay"),
-    INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
-    INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
-    INIT_PARAM(renameWidth, "Rename width"),
-
-    INIT_PARAM(commitToIEWDelay, "Commit to "
-               "Issue/Execute/Writeback delay"),
-    INIT_PARAM(renameToIEWDelay, "Rename to "
-               "Issue/Execute/Writeback delay"),
-    INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
-               "to the IEW stage)"),
-    INIT_PARAM(issueWidth, "Issue width"),
-    INIT_PARAM(executeWidth, "Execute width"),
-    INIT_PARAM(executeIntWidth, "Integer execute width"),
-    INIT_PARAM(executeFloatWidth, "Floating point execute width"),
-    INIT_PARAM(executeBranchWidth, "Branch execute width"),
-    INIT_PARAM(executeMemoryWidth, "Memory execute width"),
-
-    INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
-               "delay"),
-    INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
-    INIT_PARAM(commitWidth, "Commit width"),
-    INIT_PARAM(squashWidth, "Squash width"),
-
-    INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
-    INIT_PARAM(localPredictorSize, "Size of local predictor"),
-    INIT_PARAM(localCtrBits, "Bits per counter"),
-    INIT_PARAM(localHistoryTableSize, "Size of local history table"),
-    INIT_PARAM(localHistoryBits, "Bits for the local history"),
-    INIT_PARAM(globalPredictorSize, "Size of global predictor"),
-    INIT_PARAM(globalCtrBits, "Bits per counter"),
-    INIT_PARAM(globalHistoryBits, "Bits of history"),
-    INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
-    INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
-
-    INIT_PARAM(BTBEntries, "Number of BTB entries"),
-    INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
-
-    INIT_PARAM(RASSize, "RAS size"),
-
-    INIT_PARAM(LQEntries, "Number of load queue entries"),
-    INIT_PARAM(SQEntries, "Number of store queue entries"),
-    INIT_PARAM(LFSTSize, "Last fetched store table size"),
-    INIT_PARAM(SSITSize, "Store set ID table size"),
-
-    INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
-    INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
-               "registers"),
-    INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
-    INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
-
-    INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true),
-    INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0),
-    INIT_PARAM_DFLT(wbWidth, "Writeback width", 0),
-
-    INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
-    INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
-    INIT_PARAM_DFLT(smtLSQPolicy,   "SMT LSQ Sharing Policy",    "Partitioned"),
-    INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
-    INIT_PARAM_DFLT(smtIQPolicy,    "SMT IQ Policy",    "Partitioned"),
-    INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
-    INIT_PARAM_DFLT(smtROBPolicy,   "SMT ROB Sharing Policy", "Partitioned"),
-    INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
-    INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
-
-    INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
-    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
-
-    INIT_PARAM(function_trace, "Enable function trace"),
-    INIT_PARAM(function_trace_start, "Cycle to start function trace")
-
-END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
-
-CREATE_SIM_OBJECT(SimpleOzoneCPU)
-{
-    SimpleOzoneCPU *cpu;
-
-#if FULL_SYSTEM
-    // Full-system only supports a single thread for the moment.
-    int actual_num_threads = 1;
-#else
-    // In non-full-system mode, we infer the number of threads from
-    // the workload if it's not explicitly specified.
-    int actual_num_threads =
-        numThreads.isValid() ? numThreads : workload.size();
-
-    if (workload.size() == 0) {
-        fatal("Must specify at least one workload!");
-    }
-
-#endif
-
-    SimpleParams *params = new SimpleParams;
-
-    params->clock = clock;
-
-    params->name = getInstanceName();
-    params->numberOfThreads = actual_num_threads;
-
-#if FULL_SYSTEM
-    params->system = system;
-    params->cpu_id = cpu_id;
-    params->itb = itb;
-    params->dtb = dtb;
-#else
-    params->workload = workload;
-//    params->pTable = page_table;
-#endif // FULL_SYSTEM
-
-    params->mem = mem;
-    params->checker = checker;
-    params->max_insts_any_thread = max_insts_any_thread;
-    params->max_insts_all_threads = max_insts_all_threads;
-    params->max_loads_any_thread = max_loads_any_thread;
-    params->max_loads_all_threads = max_loads_all_threads;
-
-    //
-    // Caches
-    //
-    params->icacheInterface = icache ? icache->getInterface() : NULL;
-    params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
-    params->cachePorts = cachePorts;
-
-    params->width = width;
-    params->frontEndWidth = frontEndWidth;
-    params->backEndWidth = backEndWidth;
-    params->backEndSquashLatency = backEndSquashLatency;
-    params->backEndLatency = backEndLatency;
-    params->maxInstBufferSize = maxInstBufferSize;
-    params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
-
-    params->decodeToFetchDelay = decodeToFetchDelay;
-    params->renameToFetchDelay = renameToFetchDelay;
-    params->iewToFetchDelay = iewToFetchDelay;
-    params->commitToFetchDelay = commitToFetchDelay;
-    params->fetchWidth = fetchWidth;
-
-    params->renameToDecodeDelay = renameToDecodeDelay;
-    params->iewToDecodeDelay = iewToDecodeDelay;
-    params->commitToDecodeDelay = commitToDecodeDelay;
-    params->fetchToDecodeDelay = fetchToDecodeDelay;
-    params->decodeWidth = decodeWidth;
-
-    params->iewToRenameDelay = iewToRenameDelay;
-    params->commitToRenameDelay = commitToRenameDelay;
-    params->decodeToRenameDelay = decodeToRenameDelay;
-    params->renameWidth = renameWidth;
-
-    params->commitToIEWDelay = commitToIEWDelay;
-    params->renameToIEWDelay = renameToIEWDelay;
-    params->issueToExecuteDelay = issueToExecuteDelay;
-    params->issueWidth = issueWidth;
-    params->executeWidth = executeWidth;
-    params->executeIntWidth = executeIntWidth;
-    params->executeFloatWidth = executeFloatWidth;
-    params->executeBranchWidth = executeBranchWidth;
-    params->executeMemoryWidth = executeMemoryWidth;
-
-    params->iewToCommitDelay = iewToCommitDelay;
-    params->renameToROBDelay = renameToROBDelay;
-    params->commitWidth = commitWidth;
-    params->squashWidth = squashWidth;
-
-    params->predType = predType;
-    params->localPredictorSize = localPredictorSize;
-    params->localCtrBits = localCtrBits;
-    params->localHistoryTableSize = localHistoryTableSize;
-    params->localHistoryBits = localHistoryBits;
-    params->globalPredictorSize = globalPredictorSize;
-    params->globalCtrBits = globalCtrBits;
-    params->globalHistoryBits = globalHistoryBits;
-    params->choicePredictorSize = choicePredictorSize;
-    params->choiceCtrBits = choiceCtrBits;
-
-    params->BTBEntries = BTBEntries;
-    params->BTBTagSize = BTBTagSize;
-
-    params->RASSize = RASSize;
-
-    params->LQEntries = LQEntries;
-    params->SQEntries = SQEntries;
-
-    params->SSITSize = SSITSize;
-    params->LFSTSize = LFSTSize;
-
-    params->numPhysIntRegs = numPhysIntRegs;
-    params->numPhysFloatRegs = numPhysFloatRegs;
-    params->numIQEntries = numIQEntries;
-    params->numROBEntries = numROBEntries;
-
-    params->decoupledFrontEnd = decoupledFrontEnd;
-    params->dispatchWidth = dispatchWidth;
-    params->wbWidth = wbWidth;
-
-    params->smtNumFetchingThreads = smtNumFetchingThreads;
-    params->smtFetchPolicy = smtFetchPolicy;
-    params->smtIQPolicy    = smtIQPolicy;
-    params->smtLSQPolicy    = smtLSQPolicy;
-    params->smtLSQThreshold = smtLSQThreshold;
-    params->smtROBPolicy   = smtROBPolicy;
-    params->smtROBThreshold = smtROBThreshold;
-    params->smtCommitPolicy = smtCommitPolicy;
-
-    params->instShiftAmt = 2;
-
-    params->deferRegistration = defer_registration;
-
-    params->functionTrace = function_trace;
-    params->functionTraceStart = function_trace_start;
-
-    cpu = new SimpleOzoneCPU(params);
-
-    return cpu;
-}
-
-REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU)
-
diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh
index 76e2318aa..f58b81990 100644
--- a/src/cpu/ozone/cpu_impl.hh
+++ b/src/cpu/ozone/cpu_impl.hh
@@ -29,21 +29,17 @@
  *          Nathan Binkert
  */
 
-//#include <cstdio>
-//#include <cstdlib>
+#include "config/full_system.hh"
+#include "config/use_checker.hh"
 
 #include "arch/isa_traits.hh" // For MachInst
 #include "base/trace.hh"
-#include "config/full_system.hh"
 #include "cpu/base.hh"
-#include "cpu/checker/thread_context.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/ozone/cpu.hh"
 #include "cpu/quiesce_event.hh"
 #include "cpu/static_inst.hh"
-//#include "mem/base_mem.hh"
-#include "mem/mem_interface.hh"
 #include "sim/sim_object.hh"
 #include "sim/stats.hh"
 
@@ -56,17 +52,18 @@
 //#include "base/remote_gdb.hh"
 #include "cpu/profile.hh"
 #include "kern/kernel_stats.hh"
-#include "mem/functional/memory_control.hh"
-#include "mem/functional/physical.hh"
 #include "sim/faults.hh"
 #include "sim/sim_events.hh"
 #include "sim/sim_exit.hh"
 #include "sim/system.hh"
 #else // !FULL_SYSTEM
-#include "mem/functional/functional.hh"
 #include "sim/process.hh"
 #endif // FULL_SYSTEM
 
+#if USE_CHECKER
+#include "cpu/checker/thread_context.hh"
+#endif
+
 using namespace TheISA;
 
 template <class Impl>
@@ -101,13 +98,12 @@ OzoneCPU<Impl>::TickEvent::description()
 template <class Impl>
 OzoneCPU<Impl>::OzoneCPU(Params *p)
 #if FULL_SYSTEM
-    : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width),
-      mem(p->mem),
+    : BaseCPU(p), thread(this, 0), tickEvent(this, p->width),
 #else
-    : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width),
-      mem(p->workload[0]->getMemory()),
+    : BaseCPU(p), thread(this, 0, p->workload[0], 0, p->mem),
+      tickEvent(this, p->width),
 #endif
-      comm(5, 5)
+      mem(p->mem), comm(5, 5)
 {
     frontEnd = new FrontEnd(p);
     backEnd = new BackEnd(p);
@@ -115,6 +111,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
     _status = Idle;
 
     if (p->checker) {
+#if USE_CHECKER
         BaseCPU *temp_checker = p->checker;
         checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
         checker->setMemory(mem);
@@ -123,7 +120,10 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
 #endif
         checkerTC = new CheckerThreadContext<OzoneTC>(&ozoneTC, checker);
         thread.tc = checkerTC;
-        tc = checkerXC;
+        tc = checkerTC;
+#else
+        panic("Checker enabled but not compiled in!");
+#endif
     } else {
         checker = NULL;
         thread.tc = &ozoneTC;
@@ -139,15 +139,13 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
 #if FULL_SYSTEM
     /***** All thread state stuff *****/
     thread.cpu = this;
-    thread.tid = 0;
-    thread.mem = p->mem;
+    thread.setTid(0);
 
     thread.quiesceEvent = new EndQuiesceEvent(tc);
 
     system = p->system;
     itb = p->itb;
     dtb = p->dtb;
-    memctrl = p->system->memctrl;
     physmem = p->system->physmem;
 
     if (p->profile) {
@@ -166,9 +164,6 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
     thread.profilePC = 3;
 #else
     thread.cpu = this;
-    thread.tid = 0;
-    thread.process = p->workload[0];
-    thread.asid = 0;
 #endif // !FULL_SYSTEM
 
     numInst = 0;
@@ -206,7 +201,35 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
     backEnd->renameTable.copyFrom(thread.renameTable);
 
 #if !FULL_SYSTEM
-//    pTable = p->pTable;
+    /* Use this port to for syscall emulation writes to memory. */
+    Port *mem_port;
+    TranslatingPort *trans_port;
+    trans_port = new TranslatingPort(csprintf("%s-%d-funcport",
+                                              name(), 0),
+                                     p->workload[0]->pTable,
+                                     false);
+    mem_port = p->mem->getPort("functional");
+    mem_port->setPeer(trans_port);
+    trans_port->setPeer(mem_port);
+    thread.setMemPort(trans_port);
+#else
+    Port *mem_port;
+    FunctionalPort *phys_port;
+    VirtualPort *virt_port;
+    phys_port = new FunctionalPort(csprintf("%s-%d-funcport",
+                                            name(), 0));
+    mem_port = system->physmem->getPort("functional");
+    mem_port->setPeer(phys_port);
+    phys_port->setPeer(mem_port);
+
+    virt_port = new VirtualPort(csprintf("%s-%d-vport",
+                                         name(), 0));
+    mem_port = system->physmem->getPort("functional");
+    mem_port->setPeer(virt_port);
+    virt_port->setPeer(mem_port);
+
+    thread.setPhysPort(phys_port);
+    thread.setVirtPort(virt_port);
 #endif
 
     lockFlag = 0;
@@ -221,9 +244,8 @@ OzoneCPU<Impl>::~OzoneCPU()
 
 template <class Impl>
 void
-OzoneCPU<Impl>::switchOut(Sampler *_sampler)
+OzoneCPU<Impl>::switchOut()
 {
-    sampler = _sampler;
     switchCount = 0;
     // Front end needs state from back end, so switch out the back end first.
     backEnd->switchOut();
@@ -237,12 +259,14 @@ OzoneCPU<Impl>::signalSwitched()
     if (++switchCount == 2) {
         backEnd->doSwitchOut();
         frontEnd->doSwitchOut();
+#if USE_CHECKER
         if (checker)
-            checker->switchOut(sampler);
+            checker->switchOut();
+#endif
+
         _status = SwitchedOut;
         if (tickEvent.scheduled())
             tickEvent.squash();
-        sampler->signalSwitched();
     }
     assert(switchCount <= 2);
 }
@@ -291,7 +315,7 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay)
     notIdleFraction++;
     scheduleTickEvent(delay);
     _status = Running;
-    thread._status = ThreadContext::Active;
+    thread.setStatus(ThreadContext::Active);
     frontEnd->wakeFromQuiesce();
 }
 
@@ -311,7 +335,7 @@ OzoneCPU<Impl>::suspendContext(int thread_num)
 
 template <class Impl>
 void
-OzoneCPU<Impl>::deallocateContext(int thread_num)
+OzoneCPU<Impl>::deallocateContext(int thread_num, int delay)
 {
     // for now, these are equivalent
     suspendContext(thread_num);
@@ -395,6 +419,18 @@ OzoneCPU<Impl>::init()
 }
 
 template <class Impl>
+Port *
+OzoneCPU<Impl>::getPort(const std::string &if_name, int idx)
+{
+    if (if_name == "dcache_port")
+        return backEnd->getDcachePort();
+    else if (if_name == "icache_port")
+        return frontEnd->getIcachePort();
+    else
+        panic("No Such Port\n");
+}
+
+template <class Impl>
 void
 OzoneCPU<Impl>::serialize(std::ostream &os)
 {
@@ -510,7 +546,7 @@ template <class Impl>
 Addr
 OzoneCPU<Impl>::dbg_vtophys(Addr addr)
 {
-    return vtophys(tcProxy, addr);
+    return vtophys(tc, addr);
 }
 #endif // FULL_SYSTEM
 
@@ -526,7 +562,7 @@ OzoneCPU<Impl>::post_interrupt(int int_num, int index)
 //	thread.activate();
         // Hack for now.  Otherwise might have to go through the tc, or
         // I need to figure out what's the right thing to call.
-        activateContext(thread.tid, 1);
+        activateContext(thread.readTid(), 1);
     }
 }
 #endif // FULL_SYSTEM
@@ -565,7 +601,7 @@ OzoneCPU<Impl>::squashFromTC()
 #if !FULL_SYSTEM
 template <class Impl>
 void
-OzoneCPU<Impl>::syscall()
+OzoneCPU<Impl>::syscall(uint64_t &callnum)
 {
     // Not sure this copy is needed, depending on how the TC proxy is made.
     thread.renameTable.copyFrom(backEnd->renameTable);
@@ -576,7 +612,7 @@ OzoneCPU<Impl>::syscall()
 
     DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst);
 
-    thread.process->syscall(yc);
+    thread.process->syscall(callnum, tc);
 
     thread.funcExeInst--;
 
@@ -690,9 +726,9 @@ OzoneCPU<Impl>::simPalCheck(int palFunc)
 
     switch (palFunc) {
       case PAL::halt:
-        haltContext(thread.tid);
+        haltContext(thread.readTid());
         if (--System::numSystemsRunning == 0)
-            new SimExitEvent("all cpus halted");
+            exitSimLoop("all cpus halted");
         break;
 
       case PAL::bpt:
@@ -718,21 +754,31 @@ void
 OzoneCPU<Impl>::OzoneTC::setCpuId(int id)
 {
     cpu->cpuId = id;
-    thread->cpuId = id;
+    thread->setCpuId(id);
 }
 
+#if FULL_SYSTEM
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneTC::delVirtPort(VirtualPort *vp)
+{
+    delete vp->getPeer();
+    delete vp;
+}
+#endif
+
 template <class Impl>
 void
 OzoneCPU<Impl>::OzoneTC::setStatus(Status new_status)
 {
-    thread->_status = new_status;
+    thread->setStatus(new_status);
 }
 
 template <class Impl>
 void
 OzoneCPU<Impl>::OzoneTC::activate(int delay)
 {
-    cpu->activateContext(thread->tid, delay);
+    cpu->activateContext(thread->readTid(), delay);
 }
 
 /// Set the status to Suspended.
@@ -740,15 +786,15 @@ template <class Impl>
 void
 OzoneCPU<Impl>::OzoneTC::suspend()
 {
-    cpu->suspendContext(thread->tid);
+    cpu->suspendContext(thread->readTid());
 }
 
 /// Set the status to Unallocated.
 template <class Impl>
 void
-OzoneCPU<Impl>::OzoneTC::deallocate()
+OzoneCPU<Impl>::OzoneTC::deallocate(int delay)
 {
-    cpu->deallocateContext(thread->tid);
+    cpu->deallocateContext(thread->readTid(), delay);
 }
 
 /// Set the status to Halted.
@@ -756,7 +802,7 @@ template <class Impl>
 void
 OzoneCPU<Impl>::OzoneTC::halt()
 {
-    cpu->haltContext(thread->tid);
+    cpu->haltContext(thread->readTid());
 }
 
 #if FULL_SYSTEM
@@ -771,7 +817,6 @@ void
 OzoneCPU<Impl>::OzoneTC::takeOverFrom(ThreadContext *old_context)
 {
     // some things should already be set up
-    assert(getMemPtr() == old_context->getMemPtr());
 #if FULL_SYSTEM
     assert(getSystemPtr() == old_context->getSystemPtr());
 #else
@@ -867,7 +912,7 @@ template <class Impl>
 int
 OzoneCPU<Impl>::OzoneTC::getThreadNum()
 {
-    return thread->tid;
+    return thread->readTid();
 }
 
 // Also somewhat obnoxious.  Really only used for the TLB fault.
@@ -875,7 +920,7 @@ template <class Impl>
 TheISA::MachInst
 OzoneCPU<Impl>::OzoneTC::getInst()
 {
-    return thread->inst;
+    return thread->getInst();
 }
 
 template <class Impl>
@@ -894,7 +939,7 @@ OzoneCPU<Impl>::OzoneTC::copyArchRegs(ThreadContext *tc)
         } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
             int fp_idx = i - TheISA::FP_Base_DepTag;
             thread->renameTable[i]->setDoubleResult(
-                tc->readFloatRegDouble(fp_idx));
+                tc->readFloatReg(fp_idx, 64));
         }
     }
 
@@ -904,7 +949,7 @@ OzoneCPU<Impl>::OzoneTC::copyArchRegs(ThreadContext *tc)
 
     // Need to copy the TC values into the current rename table,
     // copy the misc regs.
-    thread->regs.miscRegs.copyMiscRegs(tc);
+    copyMiscRegs(tc, this);
 }
 
 template <class Impl>
@@ -922,7 +967,7 @@ OzoneCPU<Impl>::OzoneTC::readIntReg(int reg_idx)
 }
 
 template <class Impl>
-float
+TheISA::FloatReg
 OzoneCPU<Impl>::OzoneTC::readFloatReg(int reg_idx, int width)
 {
     int idx = reg_idx + TheISA::FP_Base_DepTag;
@@ -1049,15 +1094,15 @@ template <class Impl>
 TheISA::MiscReg
 OzoneCPU<Impl>::OzoneTC::readMiscReg(int misc_reg)
 {
-    return thread->regs.miscRegs.readReg(misc_reg);
+    return thread->miscRegFile.readReg(misc_reg);
 }
 
 template <class Impl>
 TheISA::MiscReg
 OzoneCPU<Impl>::OzoneTC::readMiscRegWithEffect(int misc_reg, Fault &fault)
 {
-    return thread->regs.miscRegs.readRegWithEffect(misc_reg,
-                                                   fault, this);
+    return thread->miscRegFile.readRegWithEffect(misc_reg,
+                                                 fault, this);
 }
 
 template <class Impl>
@@ -1065,7 +1110,7 @@ Fault
 OzoneCPU<Impl>::OzoneTC::setMiscReg(int misc_reg, const MiscReg &val)
 {
     // Needs to setup a squash event unless we're in syscall mode
-    Fault ret_fault = thread->regs.miscRegs.setReg(misc_reg, val);
+    Fault ret_fault = thread->miscRegFile.setReg(misc_reg, val);
 
     if (!thread->inSyscall) {
         cpu->squashFromTC();
@@ -1079,8 +1124,8 @@ Fault
 OzoneCPU<Impl>::OzoneTC::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
 {
     // Needs to setup a squash event unless we're in syscall mode
-    Fault ret_fault = thread->regs.miscRegs.setRegWithEffect(misc_reg, val,
-                                                             this);
+    Fault ret_fault = thread->miscRegFile.setRegWithEffect(misc_reg, val,
+                                                           this);
 
     if (!thread->inSyscall) {
         cpu->squashFromTC();
diff --git a/src/cpu/ozone/dyn_inst.hh b/src/cpu/ozone/dyn_inst.hh
index 0bb50bd69..67691d416 100644
--- a/src/cpu/ozone/dyn_inst.hh
+++ b/src/cpu/ozone/dyn_inst.hh
@@ -34,9 +34,8 @@
 #include "arch/isa_traits.hh"
 #include "config/full_system.hh"
 #include "cpu/base_dyn_inst.hh"
-#include "cpu/ozone/cpu.hh"   // MUST include this
 #include "cpu/inst_seq.hh"
-//#include "cpu/ozone/simple_impl.hh" // Would be nice to not have to include this
+#include "cpu/ozone/cpu.hh"   // MUST include this
 #include "cpu/ozone/ozone_impl.hh"
 
 #include <list>
@@ -47,15 +46,17 @@ class OzoneDynInst : public BaseDynInst<Impl>
 {
   public:
     // Typedefs
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::OzoneCPU OzoneCPU;
 
-    typedef typename FullCPU::ImplState ImplState;
+    typedef typename OzoneCPU::ImplState ImplState;
 
     // Typedef for DynInstPtr.  This is really just a RefCountingPtr<OoODynInst>.
     typedef typename Impl::DynInstPtr DynInstPtr;
 
     typedef TheISA::ExtMachInst ExtMachInst;
     typedef TheISA::MachInst MachInst;
+    typedef TheISA::FloatReg FloatReg;
+    typedef TheISA::FloatRegBits FloatRegBits;
     typedef TheISA::MiscReg MiscReg;
     typedef typename std::list<DynInstPtr>::iterator ListIt;
 
@@ -67,10 +68,10 @@ class OzoneDynInst : public BaseDynInst<Impl>
         MaxInstDestRegs = TheISA::MaxInstDestRegs
     };
 
-    OzoneDynInst(FullCPU *cpu);
+    OzoneDynInst(OzoneCPU *cpu);
 
     OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
-                 InstSeqNum seq_num, FullCPU *cpu);
+                 InstSeqNum seq_num, OzoneCPU *cpu);
 
     OzoneDynInst(StaticInstPtr inst);
 
@@ -131,7 +132,7 @@ class OzoneDynInst : public BaseDynInst<Impl>
 
     Fault initiateAcc();
 
-    Fault completeAcc();
+    Fault completeAcc(Packet *pkt);
 
     // The register accessor methods provide the index of the
     // instruction's operand (e.g., 0 or 1), not the architectural
@@ -149,17 +150,30 @@ class OzoneDynInst : public BaseDynInst<Impl>
         return srcInsts[idx]->readIntResult();
     }
 
-    float readFloatRegSingle(const StaticInst *si, int idx)
+    FloatReg readFloatReg(const StaticInst *si, int idx, int width)
+    {
+        switch(width) {
+          case 32:
+            return srcInsts[idx]->readFloatResult();
+          case 64:
+            return srcInsts[idx]->readDoubleResult();
+          default:
+            panic("Width not supported");
+            return 0;
+        }
+    }
+
+    FloatReg readFloatReg(const StaticInst *si, int idx)
     {
         return srcInsts[idx]->readFloatResult();
     }
 
-    double readFloatRegDouble(const StaticInst *si, int idx)
+    FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width)
     {
-        return srcInsts[idx]->readDoubleResult();
+        return srcInsts[idx]->readIntResult();
     }
 
-    uint64_t readFloatRegInt(const StaticInst *si, int idx)
+    FloatRegBits readFloatRegBits(const StaticInst *si, int idx)
     {
         return srcInsts[idx]->readIntResult();
     }
@@ -172,19 +186,25 @@ class OzoneDynInst : public BaseDynInst<Impl>
         BaseDynInst<Impl>::setIntReg(si, idx, val);
     }
 
-    void setFloatRegSingle(const StaticInst *si, int idx, float val)
+    void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width)
+    {
+        BaseDynInst<Impl>::setFloatReg(si, idx, val, width);
+    }
+
+    void setFloatReg(const StaticInst *si, int idx, FloatReg val)
     {
-        BaseDynInst<Impl>::setFloatRegSingle(si, idx, val);
+        BaseDynInst<Impl>::setFloatReg(si, idx, val);
     }
 
-    void setFloatRegDouble(const StaticInst *si, int idx, double val)
+    void setFloatRegBits(const StaticInst *si, int idx,
+            FloatRegBits val, int width)
     {
-        BaseDynInst<Impl>::setFloatRegDouble(si, idx, val);
+        BaseDynInst<Impl>::setFloatRegBits(si, idx, val);
     }
 
-    void setFloatRegInt(const StaticInst *si, int idx, uint64_t val)
+    void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val)
     {
-        BaseDynInst<Impl>::setFloatRegInt(si, idx, val);
+        BaseDynInst<Impl>::setFloatRegBits(si, idx, val);
     }
 
     void setIntResult(uint64_t result) { this->instResult.integer = result; }
@@ -223,7 +243,7 @@ class OzoneDynInst : public BaseDynInst<Impl>
     void trap(Fault fault);
     bool simPalCheck(int palFunc);
 #else
-    void syscall();
+    void syscall(uint64_t &callnum);
 #endif
 
     ListIt iqIt;
diff --git a/src/cpu/ozone/dyn_inst_impl.hh b/src/cpu/ozone/dyn_inst_impl.hh
index 4149bf144..bad902c2a 100644
--- a/src/cpu/ozone/dyn_inst_impl.hh
+++ b/src/cpu/ozone/dyn_inst_impl.hh
@@ -37,7 +37,7 @@
 using namespace TheISA;
 
 template <class Impl>
-OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu)
+OzoneDynInst<Impl>::OzoneDynInst(OzoneCPU *cpu)
     : BaseDynInst<Impl>(0, 0, 0, 0, cpu)
 {
     this->setResultReady();
@@ -47,7 +47,7 @@ OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu)
 
 template <class Impl>
 OzoneDynInst<Impl>::OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
-                                 InstSeqNum seq_num, FullCPU *cpu)
+                                 InstSeqNum seq_num, OzoneCPU *cpu)
     : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
 {
     initInstPtrs();
@@ -111,19 +111,9 @@ OzoneDynInst<Impl>::initiateAcc()
 
 template <class Impl>
 Fault
-OzoneDynInst<Impl>::completeAcc()
+OzoneDynInst<Impl>::completeAcc(Packet *pkt)
 {
-    if (this->isLoad()) {
-        this->fault = this->staticInst->completeAcc(this->req->data,
-                                                    this,
-                                                    this->traceData);
-    } else if (this->isStore()) {
-        this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result,
-                                                    this,
-                                                    this->traceData);
-    } else {
-        panic("Unknown type!");
-    }
+    this->fault = this->staticInst->completeAcc(pkt, this, this->traceData);
 
     return this->fault;
 }
@@ -298,7 +288,7 @@ template <class Impl>
 void
 OzoneDynInst<Impl>::trap(Fault fault)
 {
-    fault->invoke(this->thread->getXCProxy());
+    fault->invoke(this->thread->getTC());
 }
 
 template <class Impl>
@@ -310,8 +300,8 @@ OzoneDynInst<Impl>::simPalCheck(int palFunc)
 #else
 template <class Impl>
 void
-OzoneDynInst<Impl>::syscall()
+OzoneDynInst<Impl>::syscall(uint64_t &callnum)
 {
-    this->cpu->syscall();
+    this->cpu->syscall(callnum);
 }
 #endif
diff --git a/src/cpu/ozone/front_end.cc b/src/cpu/ozone/front_end.cc
index f0ea8eae1..cfd033564 100644
--- a/src/cpu/ozone/front_end.cc
+++ b/src/cpu/ozone/front_end.cc
@@ -30,7 +30,7 @@
 
 #include "cpu/ozone/front_end_impl.hh"
 #include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
+//#include "cpu/ozone/simple_impl.hh"
 
 template class FrontEnd<OzoneImpl>;
-template class FrontEnd<SimpleImpl>;
+//template class FrontEnd<SimpleImpl>;
diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh
index af190008c..3ed3c4d18 100644
--- a/src/cpu/ozone/front_end.hh
+++ b/src/cpu/ozone/front_end.hh
@@ -33,15 +33,17 @@
 
 #include <deque>
 
+#include "arch/utility.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/o3/bpred_unit.hh"
 #include "cpu/ozone/rename_table.hh"
+#include "mem/port.hh"
 #include "mem/request.hh"
 #include "sim/eventq.hh"
 #include "sim/stats.hh"
 
 class ThreadContext;
-class MemInterface;
+class MemObject;
 template <class>
 class OzoneThreadState;
 class PageTable;
@@ -55,18 +57,55 @@ class FrontEnd
     typedef typename Impl::Params Params;
     typedef typename Impl::DynInst DynInst;
     typedef typename Impl::DynInstPtr DynInstPtr;
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::CPUType CPUType;
     typedef typename Impl::BackEnd BackEnd;
 
-    typedef typename Impl::FullCPU::OzoneTC OzoneTC;
-    typedef typename Impl::FullCPU::CommStruct CommStruct;
+    typedef typename Impl::CPUType::OzoneTC OzoneTC;
+    typedef typename Impl::CPUType::CommStruct CommStruct;
+
+    /** IcachePort class.  Handles doing the communication with the
+     * cache/memory.
+     */
+    class IcachePort : public Port
+    {
+      protected:
+        /** Pointer to FE. */
+        FrontEnd<Impl> *fe;
+
+      public:
+        /** Default constructor. */
+        IcachePort(FrontEnd<Impl> *_fe)
+            : fe(_fe)
+        { }
+
+      protected:
+        /** Atomic version of receive.  Panics. */
+        virtual Tick recvAtomic(PacketPtr pkt);
+
+        /** Functional version of receive.  Panics. */
+        virtual void recvFunctional(PacketPtr pkt);
+
+        /** Receives status change.  Other than range changing, panics. */
+        virtual void recvStatusChange(Status status);
+
+        /** Returns the address ranges of this device. */
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+                                            AddrRangeList &snoop)
+        { resp.clear(); snoop.clear(); }
+
+        /** Timing version of receive.  Handles setting fetch to the
+         * proper status to start fetching. */
+        virtual bool recvTiming(PacketPtr pkt);
+
+        /** Handles doing a retry of a failed fetch. */
+        virtual void recvRetry();
+    };
 
     FrontEnd(Params *params);
 
     std::string name() const;
 
-    void setCPU(FullCPU *cpu_ptr)
-    { cpu = cpu_ptr; }
+    void setCPU(CPUType *cpu_ptr);
 
     void setBackEnd(BackEnd *back_end_ptr)
     { backEnd = back_end_ptr; }
@@ -80,6 +119,8 @@ class FrontEnd
 
     void regStats();
 
+    Port *getIcachePort() { return &icachePort; }
+
     void tick();
     Fault fetchCacheLine();
     void processInst(DynInstPtr &inst);
@@ -104,6 +145,8 @@ class FrontEnd
     bool switchedOut;
 
   private:
+    void recvRetry();
+
     bool updateStatus();
 
     void checkBE();
@@ -130,7 +173,7 @@ class FrontEnd
     { return cpu->globalSeqNum++; }
 
   public:
-    FullCPU *cpu;
+    CPUType *cpu;
 
     BackEnd *backEnd;
 
@@ -141,8 +184,9 @@ class FrontEnd
     enum Status {
         Running,
         Idle,
-        IcacheMissStall,
-        IcacheMissComplete,
+        IcacheWaitResponse,
+        IcacheWaitRetry,
+        IcacheAccessComplete,
         SerializeBlocked,
         SerializeComplete,
         RenameBlocked,
@@ -161,37 +205,9 @@ class FrontEnd
 
     BranchPred branchPred;
 
-    class IcachePort : public Port
-    {
-      protected:
-        FrontEnd *fe;
-
-      public:
-        IcachePort(const std::string &_name, FrontEnd *_fe)
-            : Port(_name), fe(_fe)
-        { }
-
-      protected:
-        virtual Tick recvAtomic(PacketPtr pkt);
-
-        virtual void recvFunctional(PacketPtr pkt);
-
-        virtual void recvStatusChange(Status status);
-
-        virtual void getDeviceAddressRanges(AddrRangeList &resp,
-                                            AddrRangeList &snoop)
-        { resp.clear(); snoop.clear(); }
-
-        virtual bool recvTiming(PacketPtr pkt);
-
-        virtual void recvRetry();
-    };
-
     IcachePort icachePort;
 
-#if !FULL_SYSTEM
-    PageTable *pTable;
-#endif
+    MemObject *mem;
 
     RequestPtr memReq;
 
@@ -209,6 +225,11 @@ class FrontEnd
 
     bool cacheBlkValid;
 
+    bool cacheBlocked;
+
+    /** The packet that is waiting to be retried. */
+    PacketPtr retryPkt;
+
   public:
     RenameTable<Impl> renameTable;
 
diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh
index 467567c10..9da937320 100644
--- a/src/cpu/ozone/front_end_impl.hh
+++ b/src/cpu/ozone/front_end_impl.hh
@@ -28,21 +28,69 @@
  * Authors: Kevin Lim
  */
 
+#include "config/use_checker.hh"
+
 #include "arch/faults.hh"
 #include "arch/isa_traits.hh"
 #include "base/statistics.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/ozone/front_end.hh"
-#include "mem/mem_interface.hh"
-#include "sim/byte_swap.hh"
+#include "mem/mem_object.hh"
+#include "mem/packet.hh"
+#include "mem/request.hh"
+
+#if USE_CHECKER
+#include "cpu/checker/cpu.hh"
+#endif
 
 using namespace TheISA;
 
+template<class Impl>
+Tick
+FrontEnd<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
+{
+    panic("FrontEnd doesn't expect recvAtomic callback!");
+    return curTick;
+}
+
+template<class Impl>
+void
+FrontEnd<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
+{
+    panic("FrontEnd doesn't expect recvFunctional callback!");
+}
+
+template<class Impl>
+void
+FrontEnd<Impl>::IcachePort::recvStatusChange(Status status)
+{
+    if (status == RangeChange)
+        return;
+
+    panic("FrontEnd doesn't expect recvStatusChange callback!");
+}
+
+template<class Impl>
+bool
+FrontEnd<Impl>::IcachePort::recvTiming(Packet *pkt)
+{
+    fe->processCacheCompletion(pkt);
+    return true;
+}
+
+template<class Impl>
+void
+FrontEnd<Impl>::IcachePort::recvRetry()
+{
+    fe->recvRetry();
+}
+
 template <class Impl>
 FrontEnd<Impl>::FrontEnd(Params *params)
     : branchPred(params),
-      icacheInterface(params->icacheInterface),
+      icachePort(this),
+      mem(params->mem),
       instBufferSize(0),
       maxInstBufferSize(params->maxInstBufferSize),
       width(params->frontEndWidth),
@@ -57,7 +105,7 @@ FrontEnd<Impl>::FrontEnd(Params *params)
 
     memReq = NULL;
     // Size of cache block.
-    cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
+    cacheBlkSize = 64;
 
     assert(isPowerOf2(cacheBlkSize));
 
@@ -69,11 +117,10 @@ FrontEnd<Impl>::FrontEnd(Params *params)
 
     fetchCacheLineNextCycle = true;
 
-    cacheBlkValid = false;
+    cacheBlkValid = cacheBlocked = false;
+
+    retryPkt = NULL;
 
-#if !FULL_SYSTEM
-//    pTable = params->pTable;
-#endif
     fetchFault = NoFault;
 }
 
@@ -86,6 +133,21 @@ FrontEnd<Impl>::name() const
 
 template <class Impl>
 void
+FrontEnd<Impl>::setCPU(CPUType *cpu_ptr)
+{
+    cpu = cpu_ptr;
+
+    icachePort.setName(this->name() + "-iport");
+
+#if USE_CHECKER
+    if (cpu->checker) {
+        cpu->checker->setIcachePort(&icachePort);
+    }
+#endif
+}
+
+template <class Impl>
+void
 FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
 {
     comm = _comm;
@@ -272,7 +334,7 @@ FrontEnd<Impl>::tick()
     IFQFcount += instBufferSize == maxInstBufferSize;
 
     // Fetch cache line
-    if (status == IcacheMissComplete) {
+    if (status == IcacheAccessComplete) {
         cacheBlkValid = true;
 
         status = Running;
@@ -281,8 +343,8 @@ FrontEnd<Impl>::tick()
         if (freeRegs <= 0)
             status = RenameBlocked;
         checkBE();
-    } else if (status == IcacheMissStall) {
-        DPRINTF(FE, "Still in Icache miss stall.\n");
+    } else if (status == IcacheWaitResponse || status == IcacheWaitRetry) {
+        DPRINTF(FE, "Still in Icache wait.\n");
         icacheStallCycles++;
         return;
     }
@@ -303,7 +365,7 @@ FrontEnd<Impl>::tick()
     } else if (status == QuiescePending) {
         DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n");
         return;
-    } else if (status != IcacheMissComplete) {
+    } else if (status != IcacheAccessComplete) {
         if (fetchCacheLineNextCycle) {
             Fault fault = fetchCacheLine();
             if (fault != NoFault) {
@@ -314,7 +376,7 @@ FrontEnd<Impl>::tick()
             fetchCacheLineNextCycle = false;
         }
         // If miss, stall until it returns.
-        if (status == IcacheMissStall) {
+        if (status == IcacheWaitResponse || status == IcacheWaitRetry) {
             // Tell CPU to not tick me for now.
             return;
         }
@@ -404,22 +466,16 @@ FrontEnd<Impl>::fetchCacheLine()
 
     // Setup the memReq to do a read of the first isntruction's address.
     // Set the appropriate read size and flags as well.
-    memReq = new MemReq();
-
-    memReq->asid = 0;
-    memReq->thread_num = 0;
-    memReq->data = new uint8_t[64];
-    memReq->tc = tc;
-    memReq->cmd = Read;
-    memReq->reset(fetch_PC, cacheBlkSize, flags);
+    memReq = new Request(0, fetch_PC, cacheBlkSize, flags,
+                         fetch_PC, cpu->readCpuId(), 0);
 
     // Translate the instruction request.
-    fault = cpu->translateInstReq(memReq);
+    fault = cpu->translateInstReq(memReq, thread);
 
     // Now do the timing access to see whether or not the instruction
     // exists within the cache.
-    if (icacheInterface && fault == NoFault) {
-#if FULL_SYSTEM
+    if (fault == NoFault) {
+#if 0
         if (cpu->system->memctrl->badaddr(memReq->paddr) ||
             memReq->flags & UNCACHEABLE) {
             DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
@@ -429,30 +485,21 @@ FrontEnd<Impl>::fetchCacheLine()
         }
 #endif
 
-        memReq->completionEvent = NULL;
-
-        memReq->time = curTick;
-        fault = cpu->mem->read(memReq, cacheData);
-
-        MemAccessResult res = icacheInterface->access(memReq);
-
-        // If the cache missed then schedule an event to wake
-        // up this stage once the cache miss completes.
-        if (icacheInterface->doEvents() && res != MA_HIT) {
-            memReq->completionEvent = new ICacheCompletionEvent(memReq, this);
-
-            status = IcacheMissStall;
-
-            cacheBlkValid = false;
-
-            DPRINTF(FE, "Cache miss.\n");
-        }  else {
-            DPRINTF(FE, "Cache hit.\n");
-
-            cacheBlkValid = true;
-
-//            memcpy(cacheData, memReq->data, memReq->size);
+        // Build packet here.
+        PacketPtr data_pkt = new Packet(memReq,
+                                        Packet::ReadReq, Packet::Broadcast);
+        data_pkt->dataStatic(cacheData);
+
+        if (!icachePort.sendTiming(data_pkt)) {
+            assert(retryPkt == NULL);
+            DPRINTF(Fetch, "Out of MSHRs!\n");
+            status = IcacheWaitRetry;
+            retryPkt = data_pkt;
+            cacheBlocked = true;
+            return NoFault;
         }
+
+        status = IcacheWaitResponse;
     }
 
     // Note that this will set the cache block PC a bit earlier than it should
@@ -565,7 +612,7 @@ FrontEnd<Impl>::handleFault(Fault &fault)
 
 //    instruction->setASID(tid);
 
-    instruction->setState(thread);
+    instruction->setThreadState(thread);
 
     instruction->traceData = NULL;
 
@@ -614,8 +661,8 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
     }
 
     // Clear the icache miss if it's outstanding.
-    if (status == IcacheMissStall && icacheInterface) {
-        DPRINTF(FE, "Squashing outstanding Icache miss.\n");
+    if (status == IcacheWaitResponse) {
+        DPRINTF(FE, "Squashing outstanding Icache access.\n");
         memReq = NULL;
     }
 
@@ -652,20 +699,22 @@ FrontEnd<Impl>::getInst()
 
 template <class Impl>
 void
-FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req)
+FrontEnd<Impl>::processCacheCompletion(PacketPtr pkt)
 {
     DPRINTF(FE, "Processing cache completion\n");
 
     // Do something here.
-    if (status != IcacheMissStall ||
-        req != memReq ||
+    if (status != IcacheWaitResponse ||
+        pkt->req != memReq ||
         switchedOut) {
         DPRINTF(FE, "Previous fetch was squashed.\n");
         fetchIcacheSquashes++;
+        delete pkt->req;
+        delete pkt;
         return;
     }
 
-    status = IcacheMissComplete;
+    status = IcacheAccessComplete;
 
 /*    if (checkStall(tid)) {
         fetchStatus[tid] = Blocked;
@@ -677,6 +726,8 @@ FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req)
 
     // Reset the completion event to NULL.
 //    memReq->completionEvent = NULL;
+    delete pkt->req;
+    delete pkt;
     memReq = NULL;
 }
 
@@ -698,6 +749,27 @@ FrontEnd<Impl>::addFreeRegs(int num_freed)
 }
 
 template <class Impl>
+void
+FrontEnd<Impl>::recvRetry()
+{
+    assert(cacheBlocked);
+    if (retryPkt != NULL) {
+        assert(status == IcacheWaitRetry);
+
+        if (icachePort.sendTiming(retryPkt)) {
+            status = IcacheWaitResponse;
+            retryPkt = NULL;
+            cacheBlocked = false;
+        }
+    } else {
+        // Access has been squashed since it was sent out.  Just clear
+        // the cache being blocked.
+        cacheBlocked = false;
+    }
+
+}
+
+template <class Impl>
 bool
 FrontEnd<Impl>::updateStatus()
 {
@@ -775,7 +847,7 @@ FrontEnd<Impl>::getInstFromCacheline()
     DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst),
                                          inst_seq, cpu);
 
-    instruction->setState(thread);
+    instruction->setThreadState(thread);
 
     DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n",
             inst_seq, instruction->readPC(),
@@ -899,24 +971,3 @@ FrontEnd<Impl>::dumpInsts()
         buff_it++;
     }
 }
-
-template <class Impl>
-FrontEnd<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe)
-    : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe)
-{
-    this->setFlags(Event::AutoDelete);
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::ICacheCompletionEvent::process()
-{
-    frontEnd->processCacheCompletion(req);
-}
-
-template <class Impl>
-const char *
-FrontEnd<Impl>::ICacheCompletionEvent::description()
-{
-    return "ICache completion event";
-}
diff --git a/src/cpu/ozone/lw_back_end.hh b/src/cpu/ozone/lw_back_end.hh
index bb81f60c8..d836ceebd 100644
--- a/src/cpu/ozone/lw_back_end.hh
+++ b/src/cpu/ozone/lw_back_end.hh
@@ -51,6 +51,8 @@ class ThreadContext;
 template <class Impl>
 class OzoneThreadState;
 
+class Port;
+
 template <class Impl>
 class LWBackEnd
 {
@@ -60,9 +62,9 @@ class LWBackEnd
     typedef typename Impl::Params Params;
     typedef typename Impl::DynInst DynInst;
     typedef typename Impl::DynInstPtr DynInstPtr;
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::OzoneCPU OzoneCPU;
     typedef typename Impl::FrontEnd FrontEnd;
-    typedef typename Impl::FullCPU::CommStruct CommStruct;
+    typedef typename Impl::OzoneCPU::CommStruct CommStruct;
 
     struct SizeStruct {
         int size;
@@ -95,35 +97,13 @@ class LWBackEnd
         const char *description();
     };
 
-    /** LdWriteback event for a load completion. */
-    class LdWritebackEvent : public Event {
-      private:
-        /** Instruction that is writing back data to the register file. */
-        DynInstPtr inst;
-        /** Pointer to IEW stage. */
-        LWBackEnd *be;
-
-        bool dcacheMiss;
-
-      public:
-        /** Constructs a load writeback event. */
-        LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be);
-
-        /** Processes writeback event. */
-        virtual void process();
-        /** Returns the description of the writeback event. */
-        virtual const char *description();
-
-        void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); }
-    };
-
     LWBackEnd(Params *params);
 
     std::string name() const;
 
     void regStats();
 
-    void setCPU(FullCPU *cpu_ptr);
+    void setCPU(OzoneCPU *cpu_ptr);
 
     void setFrontEnd(FrontEnd *front_end_ptr)
     { frontEnd = front_end_ptr; }
@@ -136,6 +116,8 @@ class LWBackEnd
 
     void setCommBuffer(TimeBuffer<CommStruct> *_comm);
 
+    Port *getDcachePort() { return LSQ.getDcachePort(); }
+
     void tick();
     void squash();
     void generateTCEvent() { tcSquash = true; }
@@ -239,7 +221,7 @@ class LWBackEnd
     void updateComInstStats(DynInstPtr &inst);
 
   public:
-    FullCPU *cpu;
+    OzoneCPU *cpu;
 
     FrontEnd *frontEnd;
 
@@ -273,24 +255,6 @@ class LWBackEnd
 
     RenameTable<Impl> renameTable;
   private:
-    class DCacheCompletionEvent : public Event
-    {
-      private:
-        LWBackEnd *be;
-
-      public:
-        DCacheCompletionEvent(LWBackEnd *_be);
-
-        virtual void process();
-        virtual const char *description();
-    };
-
-    friend class DCacheCompletionEvent;
-
-    DCacheCompletionEvent cacheCompletionEvent;
-
-    MemInterface *dcacheInterface;
-
     // General back end width. Used if the more specific isn't given.
     int width;
 
diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh
index ed406d5a3..a4f1d805e 100644
--- a/src/cpu/ozone/lw_back_end_impl.hh
+++ b/src/cpu/ozone/lw_back_end_impl.hh
@@ -28,9 +28,14 @@
  * Authors: Kevin Lim
  */
 
-#include "cpu/checker/cpu.hh"
+#include "config/use_checker.hh"
+
 #include "cpu/ozone/lw_back_end.hh"
-#include "encumbered/cpu/full/op_class.hh"
+#include "cpu/op_class.hh"
+
+#if USE_CHECKER
+#include "cpu/checker/cpu.hh"
+#endif
 
 template <class Impl>
 void
@@ -134,86 +139,11 @@ LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst)
     assert(found_inst);
 }
 
-template<class Impl>
-LWBackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
-                                                  LWBackEnd<Impl> *_be)
-    : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false)
-{
-    this->setFlags(Event::AutoDelete);
-}
-
-template<class Impl>
-void
-LWBackEnd<Impl>::LdWritebackEvent::process()
-{
-    DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
-//    DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
-
-    //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
-
-//    iewStage->wakeCPU();
-
-    if (be->isSwitchedOut())
-        return;
-
-    if (dcacheMiss) {
-        be->removeDcacheMiss(inst);
-    }
-
-    if (inst->isSquashed()) {
-        inst = NULL;
-        return;
-    }
-
-    if (!inst->isExecuted()) {
-        inst->setExecuted();
-
-        // Execute again to copy data to proper place.
-        inst->completeAcc();
-    }
-
-    // Need to insert instruction into queue to commit
-    be->instToCommit(inst);
-
-    //wroteToTimeBuffer = true;
-//    iewStage->activityThisCycle();
-
-    inst = NULL;
-}
-
-template<class Impl>
-const char *
-LWBackEnd<Impl>::LdWritebackEvent::description()
-{
-    return "Load writeback event";
-}
-
-
-template <class Impl>
-LWBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be)
-    : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
-{
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::DCacheCompletionEvent::process()
-{
-}
-
-template <class Impl>
-const char *
-LWBackEnd<Impl>::DCacheCompletionEvent::description()
-{
-    return "Cache completion event";
-}
-
 template <class Impl>
 LWBackEnd<Impl>::LWBackEnd(Params *params)
     : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
-      trapSquash(false), tcSquash(false), cacheCompletionEvent(this),
-      dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
-      exactFullStall(true)
+      trapSquash(false), tcSquash(false),
+      width(params->backEndWidth), exactFullStall(true)
 {
     numROBEntries = params->numROBEntries;
     numInsts = 0;
@@ -239,6 +169,7 @@ LWBackEnd<Impl>::LWBackEnd(Params *params)
     LSQ.init(params, params->LQEntries, params->SQEntries, 0);
 
     dispatchStatus = Running;
+    commitStatus = Running;
 }
 
 template <class Impl>
@@ -569,7 +500,7 @@ LWBackEnd<Impl>::regStats()
 
 template <class Impl>
 void
-LWBackEnd<Impl>::setCPU(FullCPU *cpu_ptr)
+LWBackEnd<Impl>::setCPU(OzoneCPU *cpu_ptr)
 {
     cpu = cpu_ptr;
     LSQ.setCPU(cpu_ptr);
@@ -626,6 +557,7 @@ LWBackEnd<Impl>::checkInterrupts()
         }
     }
 }
+#endif
 
 template <class Impl>
 void
@@ -639,7 +571,7 @@ LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency)
 
     // Consider holding onto the trap and waiting until the trap event
     // happens for this to be executed.
-    fault->invoke(thread->getTCProxy());
+    fault->invoke(thread->getTC());
 
     // Exit state update mode to avoid accidental updating.
     thread->inSyscall = false;
@@ -649,7 +581,6 @@ LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency)
     // Generate trap squash event.
     generateTrapEvent(latency);
 }
-#endif
 
 template <class Impl>
 void
@@ -671,6 +602,7 @@ LWBackEnd<Impl>::tick()
 
 #if FULL_SYSTEM
     checkInterrupts();
+#endif
 
     if (trapSquash) {
         assert(!tcSquash);
@@ -678,7 +610,6 @@ LWBackEnd<Impl>::tick()
     } else if (tcSquash) {
         squashFromTC();
     }
-#endif
 
     if (dispatchStatus != Blocked) {
         dispatchInsts();
@@ -929,11 +860,6 @@ LWBackEnd<Impl>::executeInsts()
         // at the commit stage.
         if (inst->isMemRef() &&
             (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
-            if (dcacheInterface->isBlocked()) {
-                // Should I move the instruction aside?
-                DPRINTF(BE, "Execute: dcache is blocked\n");
-                break;
-            }
             DPRINTF(BE, "Execute: Initiating access for memory "
                     "reference.\n");
 
@@ -941,7 +867,7 @@ LWBackEnd<Impl>::executeInsts()
                 LSQ.executeLoad(inst);
             } else if (inst->isStore()) {
                 LSQ.executeStore(inst);
-                if (inst->req && !(inst->req->flags & LOCKED)) {
+                if (inst->req && !(inst->req->getFlags() & LOCKED)) {
                     inst->setExecuted();
 
                     instToCommit(inst);
@@ -1078,7 +1004,7 @@ LWBackEnd<Impl>::commitInst(int inst_num)
 
     thread->setPC(inst->readPC());
     thread->setNextPC(inst->readNextPC());
-    inst->reachedCommit = true;
+    inst->setAtCommit();
 
     // If the instruction is not executed yet, then it is a non-speculative
     // or store inst.  Signal backwards that it should be executed.
@@ -1183,9 +1109,11 @@ LWBackEnd<Impl>::commitInst(int inst_num)
 
     // Use checker prior to updating anything due to traps or PC
     // based events.
+#if USE_CHECKER
     if (checker) {
-        checker->tick(inst);
+        checker->verify(inst);
     }
+#endif
 
     if (inst_fault != NoFault) {
         DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
@@ -1200,19 +1128,18 @@ LWBackEnd<Impl>::commitInst(int inst_num)
         } else if (inst_num != 0) {
             DPRINTF(BE, "Will wait until instruction is head of commit group.\n");
             return false;
-        } else if (checker && inst->isStore()) {
-            checker->tick(inst);
         }
+#if USE_CHECKER
+        else if (checker && inst->isStore()) {
+            checker->verify(inst);
+        }
+#endif
 
         thread->setInst(
             static_cast<TheISA::MachInst>(inst->staticInst->machInst));
-#if FULL_SYSTEM
+
         handleFault(inst_fault);
         return false;
-#else // !FULL_SYSTEM
-        panic("fault (%d) detected @ PC %08p", inst_fault,
-              inst->PC);
-#endif // FULL_SYSTEM
     }
 
     int freed_regs = 0;
@@ -1259,7 +1186,7 @@ LWBackEnd<Impl>::commitInst(int inst_num)
             assert(!thread->inSyscall && !thread->trapPending);
         oldpc = thread->readPC();
         cpu->system->pcEventQueue.service(
-            thread->getTCProxy());
+            thread->getTC());
         count++;
     } while (oldpc != thread->readPC());
     if (count > 1) {
@@ -1346,7 +1273,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
 
         (*insts_it)->setCanCommit();
 
-        (*insts_it)->removeInROB();
+        (*insts_it)->clearInROB();
 
         for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
             DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
@@ -1497,10 +1424,10 @@ LWBackEnd<Impl>::doSwitchOut()
 
 template <class Impl>
 void
-LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_xc)
+LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_tc)
 {
     switchedOut = false;
-    xcSquash = false;
+    tcSquash = false;
     trapSquash = false;
 
     numInsts = 0;
@@ -1510,7 +1437,7 @@ LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_xc)
     switchedOut = false;
     dispatchStatus = Running;
     commitStatus = Running;
-    LSQ.takeOverFrom(old_xc);
+    LSQ.takeOverFrom(old_tc);
 }
 
 template <class Impl>
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh
index b2924db54..2eb09d01a 100644
--- a/src/cpu/ozone/lw_lsq.hh
+++ b/src/cpu/ozone/lw_lsq.hh
@@ -47,7 +47,7 @@
 #include "sim/debug.hh"
 #include "sim/sim_object.hh"
 
-//class PageTable;
+class MemObject;
 
 /**
  * Class that implements the actual LQ and SQ for each specific thread.
@@ -64,7 +64,7 @@ template <class Impl>
 class OzoneLWLSQ {
   public:
     typedef typename Impl::Params Params;
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::OzoneCPU OzoneCPU;
     typedef typename Impl::BackEnd BackEnd;
     typedef typename Impl::DynInstPtr DynInstPtr;
     typedef typename Impl::IssueStruct IssueStruct;
@@ -73,35 +73,6 @@ class OzoneLWLSQ {
 
     typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt;
 
-  private:
-    class StoreCompletionEvent : public Event {
-      public:
-        /** Constructs a store completion event. */
-        StoreCompletionEvent(DynInstPtr &inst, BackEnd *be,
-                             Event *wb_event, OzoneLWLSQ *lsq_ptr);
-
-        /** Processes the store completion event. */
-        void process();
-
-        /** Returns the description of this event. */
-        const char *description();
-
-      private:
-        /** The store index of the store being written back. */
-        DynInstPtr inst;
-
-        BackEnd *be;
-        /** The writeback event for the store.  Needed for store
-         * conditionals.
-         */
-      public:
-        Event *wbEvent;
-        bool miss;
-      private:
-        /** The pointer to the LSQ unit that issued the store. */
-        OzoneLWLSQ<Impl> *lsqPtr;
-    };
-
   public:
     /** Constructs an LSQ unit. init() must be called prior to use. */
     OzoneLWLSQ();
@@ -114,15 +85,13 @@ class OzoneLWLSQ {
     std::string name() const;
 
     /** Sets the CPU pointer. */
-    void setCPU(FullCPU *cpu_ptr)
-    { cpu = cpu_ptr; }
+    void setCPU(OzoneCPU *cpu_ptr);
 
     /** Sets the back-end stage pointer. */
     void setBE(BackEnd *be_ptr)
     { be = be_ptr; }
 
-    /** Sets the page table pointer. */
-//    void setPageTable(PageTable *pt_ptr);
+    Port *getDcachePort() { return &dcachePort; }
 
     /** Ticks the LSQ unit, which in this case only resets the number of
      * used cache ports.
@@ -155,6 +124,10 @@ class OzoneLWLSQ {
     /** Writes back stores. */
     void writebackStores();
 
+    /** Completes the data access that has been returned from the
+     * memory system. */
+    void completeDataAccess(PacketPtr pkt);
+
     // @todo: Include stats in the LSQ unit.
     //void regStats();
 
@@ -231,8 +204,8 @@ class OzoneLWLSQ {
 
     /** Returns if the LSQ unit will writeback on this cycle. */
     bool willWB() { return storeQueue.back().canWB &&
-                        !storeQueue.back().completed/* &&
-                                                       !dcacheInterface->isBlocked()*/; }
+                        !storeQueue.back().completed &&
+                        !isStoreBlocked; }
 
     void switchOut();
 
@@ -243,12 +216,21 @@ class OzoneLWLSQ {
     bool switchedOut;
 
   private:
+    /** Writes back the instruction, sending it to IEW. */
+    void writeback(DynInstPtr &inst, PacketPtr pkt);
+
+    /** Handles completing the send of a store to memory. */
+    void storePostSend(Packet *pkt, DynInstPtr &inst);
+
     /** Completes the store at the specified index. */
     void completeStore(int store_idx);
 
+    /** Handles doing the retry. */
+    void recvRetry();
+
   private:
     /** Pointer to the CPU. */
-    FullCPU *cpu;
+    OzoneCPU *cpu;
 
     /** Pointer to the back-end stage. */
     BackEnd *be;
@@ -258,11 +240,11 @@ class OzoneLWLSQ {
     class DcachePort : public Port
     {
       protected:
-        FullCPU *cpu;
+        OzoneLWLSQ *lsq;
 
       public:
-        DcachePort(const std::string &_name, FullCPU *_cpu)
-            : Port(_name), cpu(_cpu)
+        DcachePort(OzoneLWLSQ *_lsq)
+            : lsq(_lsq)
         { }
 
       protected:
@@ -281,12 +263,9 @@ class OzoneLWLSQ {
         virtual void recvRetry();
     };
 
-    /** Pointer to the D-cache. */
+    /** D-cache port. */
     DcachePort dcachePort;
 
-    /** Pointer to the page table. */
-//    PageTable *pTable;
-
   public:
     struct SQEntry {
         /** Constructs an empty store queue entry. */
@@ -319,6 +298,48 @@ class OzoneLWLSQ {
         typename std::list<DynInstPtr>::iterator lqIt;
     };
 
+    /** Derived class to hold any sender state the LSQ needs. */
+    class LSQSenderState : public Packet::SenderState
+    {
+      public:
+        /** Default constructor. */
+        LSQSenderState()
+            : noWB(false)
+        { }
+
+        /** Instruction who initiated the access to memory. */
+        DynInstPtr inst;
+        /** Whether or not it is a load. */
+        bool isLoad;
+        /** The LQ/SQ index of the instruction. */
+        int idx;
+        /** Whether or not the instruction will need to writeback. */
+        bool noWB;
+    };
+
+    /** Writeback event, specifically for when stores forward data to loads. */
+    class WritebackEvent : public Event {
+      public:
+        /** Constructs a writeback event. */
+        WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, OzoneLWLSQ *lsq_ptr);
+
+        /** Processes the writeback event. */
+        void process();
+
+        /** Returns the description of this event. */
+        const char *description();
+
+      private:
+        /** Instruction whose results are being written back. */
+        DynInstPtr inst;
+
+        /** The packet that would have been sent to memory. */
+        PacketPtr pkt;
+
+        /** The pointer to the LSQ unit that issued the store. */
+        OzoneLWLSQ<Impl> *lsqPtr;
+    };
+
     enum Status {
         Running,
         Idle,
@@ -395,6 +416,12 @@ class OzoneLWLSQ {
     /** The index of the above store. */
     LQIt stallingLoad;
 
+    /** The packet that needs to be retried. */
+    PacketPtr retryPkt;
+
+    /** Whehter or not a store is blocked due to the memory system. */
+    bool isStoreBlocked;
+
     /** Whether or not a load is blocked due to the memory system.  It is
      *  cleared when this value is checked via loadBlocked().
      */
@@ -470,7 +497,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
     // too).
     // @todo: Fix uncached accesses.
     if (req->getFlags() & UNCACHEABLE &&
-        (inst != loadQueue.back() || !inst->reachedCommit)) {
+        (inst != loadQueue.back() || !inst->isAtCommit())) {
         DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of "
                 "commit/LSQ!\n",
                 inst->seqNum);
@@ -532,17 +559,19 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
 
             DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to "
                     "[sn:%lli] addr %#x, data %#x\n",
-                    (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(inst->memData));
-/*
-            typename BackEnd::LdWritebackEvent *wb =
-                new typename BackEnd::LdWritebackEvent(inst,
-                                                       be);
+                    (*sq_it).inst->seqNum, inst->seqNum, req->getVaddr(),
+                    *(inst->memData));
+
+            PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+            data_pkt->dataStatic(inst->memData);
+
+            WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this);
 
             // We'll say this has a 1 cycle load-store forwarding latency
             // for now.
-            // FIXME - Need to make this a parameter.
+            // @todo: Need to make this a parameter.
             wb->schedule(curTick);
-*/
+
             // Should keep track of stat for forwarded data
             return NoFault;
         } else if ((store_has_lower_limit && lower_load_has_store_part) ||
@@ -575,7 +604,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
 
             DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. "
                     "Store [sn:%lli] to load addr %#x\n",
-                    (*sq_it).inst->seqNum, req->vaddr);
+                    (*sq_it).inst->seqNum, req->getVaddr());
 
             return NoFault;
         }
@@ -597,6 +626,12 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
     PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
     data_pkt->dataStatic(inst->memData);
 
+    LSQSenderState *state = new LSQSenderState;
+    state->isLoad = true;
+    state->idx = load_idx;
+    state->inst = inst;
+    data_pkt->senderState = state;
+
     // if we have a cache, do cache access too
     if (!dcachePort.sendTiming(data_pkt)) {
         // There's an older load that's already going to squash.
@@ -613,6 +648,10 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
         return NoFault;
     }
 
+    if (req->getFlags() & LOCKED) {
+        cpu->lockFlag = true;
+    }
+
     if (data_pkt->result != Packet::Success) {
         DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n");
         DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index 05db3028a..88e9c218f 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -28,64 +28,112 @@
  * Authors: Kevin Lim
  */
 
+#include "config/use_checker.hh"
+
 #include "arch/isa_traits.hh"
 #include "base/str.hh"
 #include "cpu/ozone/lw_lsq.hh"
 #include "cpu/checker/cpu.hh"
 
-template <class Impl>
-OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst,
-                                                             BackEnd *_be,
-                                                             Event *wb_event,
-                                                             OzoneLWLSQ<Impl> *lsq_ptr)
-    : Event(&mainEventQueue),
-      inst(_inst),
-      be(_be),
-      wbEvent(wb_event),
-      miss(false),
-      lsqPtr(lsq_ptr)
+template<class Impl>
+OzoneLWLSQ<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt,
+                                                 OzoneLWLSQ *lsq_ptr)
+    : Event(&mainEventQueue), inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
 {
     this->setFlags(Event::AutoDelete);
 }
 
-template <class Impl>
+template<class Impl>
 void
-OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
+OzoneLWLSQ<Impl>::WritebackEvent::process()
+{
+    if (!lsqPtr->isSwitchedOut()) {
+        lsqPtr->writeback(inst, pkt);
+    }
+    delete pkt;
+}
+
+template<class Impl>
+const char *
+OzoneLWLSQ<Impl>::WritebackEvent::description()
 {
-    DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n",
-            inst->seqNum);
+    return "Store writeback event";
+}
 
-    //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
+template <class Impl>
+Tick
+OzoneLWLSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
+{
+    panic("O3CPU model does not work with atomic mode!");
+    return curTick;
+}
 
-//    lsqPtr->cpu->wakeCPU();
-    if (lsqPtr->isSwitchedOut()) {
-        if (wbEvent)
-            delete wbEvent;
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+{
+    panic("O3CPU doesn't expect recvFunctional callback!");
+}
 
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::DcachePort::recvStatusChange(Status status)
+{
+    if (status == RangeChange)
         return;
-    }
 
-    if (wbEvent) {
-        wbEvent->process();
-        delete wbEvent;
-    }
+    panic("O3CPU doesn't expect recvStatusChange callback!");
+}
 
-    lsqPtr->completeStore(inst->sqIdx);
-    if (miss)
-        be->removeDcacheMiss(inst);
+template <class Impl>
+bool
+OzoneLWLSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
+{
+    lsq->completeDataAccess(pkt);
+    return true;
 }
 
 template <class Impl>
-const char *
-OzoneLWLSQ<Impl>::StoreCompletionEvent::description()
+void
+OzoneLWLSQ<Impl>::DcachePort::recvRetry()
+{
+    lsq->recvRetry();
+}
+
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::completeDataAccess(PacketPtr pkt)
 {
-    return "LSQ store completion event";
+    LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
+    DynInstPtr inst = state->inst;
+    DPRINTF(IEW, "Writeback event [sn:%lli]\n", inst->seqNum);
+    DPRINTF(Activity, "Activity: Writeback event [sn:%lli]\n", inst->seqNum);
+
+    //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
+
+    if (isSwitchedOut() || inst->isSquashed()) {
+        delete state;
+        delete pkt;
+        return;
+    } else {
+        if (!state->noWB) {
+            writeback(inst, pkt);
+        }
+
+        if (inst->isStore()) {
+            completeStore(state->idx);
+        }
+    }
+
+    delete state;
+    delete pkt;
 }
 
 template <class Impl>
 OzoneLWLSQ<Impl>::OzoneLWLSQ()
-    : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
-      loadBlockedHandled(false)
+    : switchedOut(false), dcachePort(this), loads(0), stores(0),
+      storesToWB(0), stalled(false), isStoreBlocked(false),
+      isLoadBlocked(false), loadBlockedHandled(false)
 {
 }
 
@@ -106,11 +154,11 @@ OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
         SQIndices.push(i);
     }
 
+    mem = params->mem;
+
     usedPorts = 0;
     cachePorts = params->cachePorts;
 
-    dcacheInterface = params->dcacheInterface;
-
     loadFaultInst = storeFaultInst = memDepViolator = NULL;
 
     blockedLoadSeqNum = 0;
@@ -125,6 +173,20 @@ OzoneLWLSQ<Impl>::name() const
 
 template<class Impl>
 void
+OzoneLWLSQ<Impl>::setCPU(OzoneCPU *cpu_ptr)
+{
+    cpu = cpu_ptr;
+    dcachePort.setName(this->name() + "-dport");
+
+#if USE_CHECKER
+    if (cpu->checker) {
+        cpu->checker->setDcachePort(&dcachePort);
+    }
+#endif
+}
+
+template<class Impl>
+void
 OzoneLWLSQ<Impl>::clearLQ()
 {
     loadQueue.clear();
@@ -481,6 +543,12 @@ OzoneLWLSQ<Impl>::writebackStores()
            (*sq_it).canWB &&
            usedPorts < cachePorts) {
 
+        if (isStoreBlocked) {
+            DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache"
+                    " is blocked!\n");
+            break;
+        }
+
         DynInstPtr inst = (*sq_it).inst;
 
         if ((*sq_it).size == 0 && !(*sq_it).completed) {
@@ -495,48 +563,64 @@ OzoneLWLSQ<Impl>::writebackStores()
             continue;
         }
 
-        if (dcacheInterface && dcacheInterface->isBlocked()) {
-            DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache"
-                    " is blocked!\n");
-            break;
-        }
-
         ++usedPorts;
 
         assert((*sq_it).req);
         assert(!(*sq_it).committed);
 
+        Request *req = (*sq_it).req;
         (*sq_it).committed = true;
 
-        MemReqPtr req = (*sq_it).req;
+        assert(!inst->memData);
+        inst->memData = new uint8_t[64];
+        memcpy(inst->memData, (uint8_t *)&(*sq_it).data,
+               req->getSize());
 
-        req->cmd = Write;
-        req->completionEvent = NULL;
-        req->time = curTick;
+        PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+        data_pkt->dataStatic(inst->memData);
 
-        switch((*sq_it).size) {
-          case 1:
-            cpu->write(req, (uint8_t &)(*sq_it).data);
-            break;
-          case 2:
-            cpu->write(req, (uint16_t &)(*sq_it).data);
-            break;
-          case 4:
-            cpu->write(req, (uint32_t &)(*sq_it).data);
-            break;
-          case 8:
-            cpu->write(req, (uint64_t &)(*sq_it).data);
-            break;
-          default:
-            panic("Unexpected store size!\n");
-        }
-        if (!(req->flags & LOCKED)) {
-            (*sq_it).inst->setCompleted();
-            if (cpu->checker) {
-                cpu->checker->tick((*sq_it).inst);
+        LSQSenderState *state = new LSQSenderState;
+        state->isLoad = false;
+        state->idx = inst->sqIdx;
+        state->inst = inst;
+        data_pkt->senderState = state;
+
+        DPRINTF(OzoneLSQ, "D-Cache: Writing back store PC:%#x "
+                "to Addr:%#x, data:%#x [sn:%lli]\n",
+                (*sq_it).inst->readPC(),
+                req->getPaddr(), *(inst->memData),
+                inst->seqNum);
+
+        // @todo: Remove this SC hack once the memory system handles it.
+        if (req->getFlags() & LOCKED) {
+            if (req->getFlags() & UNCACHEABLE) {
+                req->setScResult(2);
+            } else {
+                if (cpu->lockFlag) {
+                    req->setScResult(1);
+                } else {
+                    req->setScResult(0);
+                    // Hack: Instantly complete this store.
+                    completeDataAccess(data_pkt);
+                    --sq_it;
+                    continue;
+                }
             }
+        } else {
+            // Non-store conditionals do not need a writeback.
+            state->noWB = true;
         }
 
+        if (!dcachePort.sendTiming(data_pkt)) {
+            // Need to handle becoming blocked on a store.
+            isStoreBlocked = true;
+            assert(retryPkt == NULL);
+            retryPkt = data_pkt;
+        } else {
+            storePostSend(data_pkt, inst);
+            --sq_it;
+        }
+/*
         DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
                 "to Addr:%#x, data:%#x [sn:%lli]\n",
                 inst->sqIdx,inst->readPC(),
@@ -606,6 +690,7 @@ OzoneLWLSQ<Impl>::writebackStores()
         } else {
             panic("Must HAVE DCACHE!!!!!\n");
         }
+*/
     }
 
     // Not sure this should set it to 0.
@@ -685,10 +770,6 @@ OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
         SQIndices.push((*sq_it).inst->sqIdx);
         (*sq_it).inst = NULL;
         (*sq_it).canWB = 0;
-
-        if ((*sq_it).req) {
-            assert(!(*sq_it).req->completionEvent);
-        }
         (*sq_it).req = NULL;
         --stores;
         storeQueue.erase(sq_it++);
@@ -734,6 +815,72 @@ OzoneLWLSQ<Impl>::dumpInsts()
 
 template <class Impl>
 void
+OzoneLWLSQ<Impl>::storePostSend(Packet *pkt, DynInstPtr &inst)
+{
+    if (isStalled() &&
+        inst->seqNum == stallingStoreIsn) {
+        DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+                "load [sn:%lli]\n",
+                stallingStoreIsn, (*stallingLoad)->seqNum);
+        stalled = false;
+        stallingStoreIsn = 0;
+        be->replayMemInst((*stallingLoad));
+    }
+
+    if (!inst->isStoreConditional()) {
+        // The store is basically completed at this time. This
+        // only works so long as the checker doesn't try to
+        // verify the value in memory for stores.
+        inst->setCompleted();
+#if USE_CHECKER
+        if (cpu->checker) {
+            cpu->checker->verify(inst);
+        }
+#endif
+    }
+
+    if (pkt->result != Packet::Success) {
+        DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
+
+        DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
+                inst->seqNum);
+
+        //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
+
+        //DPRINTF(OzoneLWLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size());
+
+        // @todo: Increment stat here.
+    } else {
+        DPRINTF(OzoneLSQ,"D-Cache: Write Hit!\n");
+
+        DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
+                inst->seqNum);
+    }
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
+{
+    // Squashed instructions do not need to complete their access.
+    if (inst->isSquashed()) {
+        assert(!inst->isStore());
+        return;
+    }
+
+    if (!inst->isExecuted()) {
+        inst->setExecuted();
+
+        // Complete access to copy data to proper place.
+        inst->completeAcc(pkt);
+    }
+
+    // Need to insert instruction into queue to commit
+    be->instToCommit(inst);
+}
+
+template <class Impl>
+void
 OzoneLWLSQ<Impl>::completeStore(int store_idx)
 {
     SQHashIt sq_hash_it = SQItHash.find(store_idx);
@@ -766,9 +913,18 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
     --stores;
 
     inst->setCompleted();
+#if USE_CHECKER
     if (cpu->checker) {
-        cpu->checker->tick(inst);
+        cpu->checker->verify(inst);
     }
+#endif
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::recvRetry()
+{
+    panic("Unimplemented!");
 }
 
 template <class Impl>
@@ -777,68 +933,6 @@ OzoneLWLSQ<Impl>::switchOut()
 {
     assert(storesToWB == 0);
     switchedOut = true;
-    SQIt sq_it = --(storeQueue.end());
-    while (storesToWB > 0 &&
-           sq_it != storeQueue.end() &&
-           (*sq_it).inst &&
-           (*sq_it).canWB) {
-
-        DynInstPtr inst = (*sq_it).inst;
-
-        if ((*sq_it).size == 0 && !(*sq_it).completed) {
-            sq_it--;
-            continue;
-        }
-
-        // Store conditionals don't complete until *after* they have written
-        // back.  If it's here and not yet sent to memory, then don't bother
-        // as it's not part of committed state.
-        if (inst->isDataPrefetch() || (*sq_it).committed) {
-            sq_it--;
-            continue;
-        } else if ((*sq_it).req->flags & LOCKED) {
-            sq_it--;
-            assert(!(*sq_it).canWB ||
-                   ((*sq_it).canWB && (*sq_it).req->flags & LOCKED));
-            continue;
-        }
-
-        assert((*sq_it).req);
-        assert(!(*sq_it).committed);
-
-        MemReqPtr req = (*sq_it).req;
-        (*sq_it).committed = true;
-
-        req->cmd = Write;
-        req->completionEvent = NULL;
-        req->time = curTick;
-        assert(!req->data);
-        req->data = new uint8_t[64];
-        memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
-
-        DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x "
-                "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n",
-                inst->sqIdx,inst->readPC(),
-                req->paddr, *(req->data),
-                inst->seqNum);
-
-        switch((*sq_it).size) {
-          case 1:
-            cpu->write(req, (uint8_t &)(*sq_it).data);
-            break;
-          case 2:
-            cpu->write(req, (uint16_t &)(*sq_it).data);
-            break;
-          case 4:
-            cpu->write(req, (uint32_t &)(*sq_it).data);
-            break;
-          case 8:
-            cpu->write(req, (uint64_t &)(*sq_it).data);
-            break;
-          default:
-            panic("Unexpected store size!\n");
-        }
-    }
 
     // Clear the queue to free up resources
     storeQueue.clear();
diff --git a/src/cpu/ozone/ozone_base_dyn_inst.cc b/src/cpu/ozone/ozone_base_dyn_inst.cc
new file mode 100644
index 000000000..5a3a69dff
--- /dev/null
+++ b/src/cpu/ozone/ozone_base_dyn_inst.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#include "cpu/base_dyn_inst_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+
+// Explicit instantiation
+template class BaseDynInst<OzoneImpl>;
+
+template <>
+int
+BaseDynInst<OzoneImpl>::instcount = 0;
diff --git a/src/cpu/ozone/ozone_impl.hh b/src/cpu/ozone/ozone_impl.hh
index e977d06a9..503675738 100644
--- a/src/cpu/ozone/ozone_impl.hh
+++ b/src/cpu/ozone/ozone_impl.hh
@@ -50,7 +50,7 @@ class OzoneDynInst;
 struct OzoneImpl {
     typedef SimpleParams Params;
     typedef OzoneCPU<OzoneImpl> OzoneCPU;
-    typedef OzoneCPU FullCPU;
+    typedef OzoneCPU CPUType;
 
     // Would like to put these into their own area.
 //    typedef NullPredictor BranchPred;
diff --git a/src/cpu/ozone/rename_table.cc b/src/cpu/ozone/rename_table.cc
index b0a36afbe..a44054b6e 100644
--- a/src/cpu/ozone/rename_table.cc
+++ b/src/cpu/ozone/rename_table.cc
@@ -30,7 +30,7 @@
 
 #include "cpu/ozone/rename_table_impl.hh"
 #include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
+//#include "cpu/ozone/simple_impl.hh"
 
 template class RenameTable<OzoneImpl>;
-template class RenameTable<SimpleImpl>;
+//template class RenameTable<SimpleImpl>;
diff --git a/src/cpu/ozone/simple_base_dyn_inst.cc b/src/cpu/ozone/simple_base_dyn_inst.cc
new file mode 100644
index 000000000..fdaeaf57e
--- /dev/null
+++ b/src/cpu/ozone/simple_base_dyn_inst.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#include "cpu/base_dyn_inst_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+
+// Explicit instantiation
+template class BaseDynInst<SimpleImpl>;
+
+template <>
+int
+BaseDynInst<SimpleImpl>::instcount = 0;
diff --git a/src/cpu/ozone/simple_cpu_builder.cc b/src/cpu/ozone/simple_cpu_builder.cc
new file mode 100644
index 000000000..baaf7c708
--- /dev/null
+++ b/src/cpu/ozone/simple_cpu_builder.cc
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#include <string>
+
+#include "cpu/checker/cpu.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/cpu_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+#include "cpu/ozone/simple_params.hh"
+#include "mem/cache/base_cache.hh"
+#include "sim/builder.hh"
+#include "sim/process.hh"
+#include "sim/sim_object.hh"
+
+template
+class OzoneCPU<SimpleImpl>;
+
+class SimpleOzoneCPU : public OzoneCPU<SimpleImpl>
+{
+  public:
+    SimpleOzoneCPU(SimpleParams *p)
+        : OzoneCPU<SimpleImpl>(p)
+    { }
+};
+
+////////////////////////////////////////////////////////////////////////
+//
+//  OzoneCPU Simulation Object
+//
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+    Param<int> clock;
+    Param<int> numThreads;
+
+#if FULL_SYSTEM
+SimObjectParam<System *> system;
+Param<int> cpu_id;
+SimObjectParam<AlphaITB *> itb;
+SimObjectParam<AlphaDTB *> dtb;
+#else
+SimObjectVectorParam<Process *> workload;
+//SimObjectParam<PageTable *> page_table;
+#endif // FULL_SYSTEM
+
+SimObjectParam<FunctionalMemory *> mem;
+
+SimObjectParam<BaseCPU *> checker;
+
+Param<Counter> max_insts_any_thread;
+Param<Counter> max_insts_all_threads;
+Param<Counter> max_loads_any_thread;
+Param<Counter> max_loads_all_threads;
+
+SimObjectParam<BaseCache *> icache;
+SimObjectParam<BaseCache *> dcache;
+
+Param<unsigned> cachePorts;
+Param<unsigned> width;
+Param<unsigned> frontEndWidth;
+Param<unsigned> backEndWidth;
+Param<unsigned> backEndSquashLatency;
+Param<unsigned> backEndLatency;
+Param<unsigned> maxInstBufferSize;
+Param<unsigned> numPhysicalRegs;
+
+Param<unsigned> decodeToFetchDelay;
+Param<unsigned> renameToFetchDelay;
+Param<unsigned> iewToFetchDelay;
+Param<unsigned> commitToFetchDelay;
+Param<unsigned> fetchWidth;
+
+Param<unsigned> renameToDecodeDelay;
+Param<unsigned> iewToDecodeDelay;
+Param<unsigned> commitToDecodeDelay;
+Param<unsigned> fetchToDecodeDelay;
+Param<unsigned> decodeWidth;
+
+Param<unsigned> iewToRenameDelay;
+Param<unsigned> commitToRenameDelay;
+Param<unsigned> decodeToRenameDelay;
+Param<unsigned> renameWidth;
+
+Param<unsigned> commitToIEWDelay;
+Param<unsigned> renameToIEWDelay;
+Param<unsigned> issueToExecuteDelay;
+Param<unsigned> issueWidth;
+Param<unsigned> executeWidth;
+Param<unsigned> executeIntWidth;
+Param<unsigned> executeFloatWidth;
+Param<unsigned> executeBranchWidth;
+Param<unsigned> executeMemoryWidth;
+
+Param<unsigned> iewToCommitDelay;
+Param<unsigned> renameToROBDelay;
+Param<unsigned> commitWidth;
+Param<unsigned> squashWidth;
+
+Param<std::string> predType;
+Param<unsigned> localPredictorSize;
+Param<unsigned> localCtrBits;
+Param<unsigned> localHistoryTableSize;
+Param<unsigned> localHistoryBits;
+Param<unsigned> globalPredictorSize;
+Param<unsigned> globalCtrBits;
+Param<unsigned> globalHistoryBits;
+Param<unsigned> choicePredictorSize;
+Param<unsigned> choiceCtrBits;
+
+Param<unsigned> BTBEntries;
+Param<unsigned> BTBTagSize;
+
+Param<unsigned> RASSize;
+
+Param<unsigned> LQEntries;
+Param<unsigned> SQEntries;
+Param<unsigned> LFSTSize;
+Param<unsigned> SSITSize;
+
+Param<unsigned> numPhysIntRegs;
+Param<unsigned> numPhysFloatRegs;
+Param<unsigned> numIQEntries;
+Param<unsigned> numROBEntries;
+
+Param<bool> decoupledFrontEnd;
+Param<int> dispatchWidth;
+Param<int> wbWidth;
+
+Param<unsigned> smtNumFetchingThreads;
+Param<std::string>   smtFetchPolicy;
+Param<std::string>   smtLSQPolicy;
+Param<unsigned> smtLSQThreshold;
+Param<std::string>   smtIQPolicy;
+Param<unsigned> smtIQThreshold;
+Param<std::string>   smtROBPolicy;
+Param<unsigned> smtROBThreshold;
+Param<std::string>   smtCommitPolicy;
+
+Param<unsigned> instShiftAmt;
+
+Param<bool> defer_registration;
+
+Param<bool> function_trace;
+Param<Tick> function_trace_start;
+
+END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+    INIT_PARAM(clock, "clock speed"),
+    INIT_PARAM(numThreads, "number of HW thread contexts"),
+
+#if FULL_SYSTEM
+    INIT_PARAM(system, "System object"),
+    INIT_PARAM(cpu_id, "processor ID"),
+    INIT_PARAM(itb, "Instruction translation buffer"),
+    INIT_PARAM(dtb, "Data translation buffer"),
+#else
+    INIT_PARAM(workload, "Processes to run"),
+//    INIT_PARAM(page_table, "Page table"),
+#endif // FULL_SYSTEM
+
+    INIT_PARAM_DFLT(mem, "Memory", NULL),
+
+    INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
+
+    INIT_PARAM_DFLT(max_insts_any_thread,
+                    "Terminate when any thread reaches this inst count",
+                    0),
+    INIT_PARAM_DFLT(max_insts_all_threads,
+                    "Terminate when all threads have reached"
+                    "this inst count",
+                    0),
+    INIT_PARAM_DFLT(max_loads_any_thread,
+                    "Terminate when any thread reaches this load count",
+                    0),
+    INIT_PARAM_DFLT(max_loads_all_threads,
+                    "Terminate when all threads have reached this load"
+                    "count",
+                    0),
+
+    INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
+    INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
+
+    INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
+    INIT_PARAM_DFLT(width, "Width", 1),
+    INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
+    INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
+    INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
+    INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
+    INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
+    INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
+
+    INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
+    INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
+    INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
+               "delay"),
+    INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
+    INIT_PARAM(fetchWidth, "Fetch width"),
+    INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
+    INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
+               "delay"),
+    INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
+    INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
+    INIT_PARAM(decodeWidth, "Decode width"),
+
+    INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
+               "delay"),
+    INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
+    INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
+    INIT_PARAM(renameWidth, "Rename width"),
+
+    INIT_PARAM(commitToIEWDelay, "Commit to "
+               "Issue/Execute/Writeback delay"),
+    INIT_PARAM(renameToIEWDelay, "Rename to "
+               "Issue/Execute/Writeback delay"),
+    INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
+               "to the IEW stage)"),
+    INIT_PARAM(issueWidth, "Issue width"),
+    INIT_PARAM(executeWidth, "Execute width"),
+    INIT_PARAM(executeIntWidth, "Integer execute width"),
+    INIT_PARAM(executeFloatWidth, "Floating point execute width"),
+    INIT_PARAM(executeBranchWidth, "Branch execute width"),
+    INIT_PARAM(executeMemoryWidth, "Memory execute width"),
+
+    INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
+               "delay"),
+    INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
+    INIT_PARAM(commitWidth, "Commit width"),
+    INIT_PARAM(squashWidth, "Squash width"),
+
+    INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
+    INIT_PARAM(localPredictorSize, "Size of local predictor"),
+    INIT_PARAM(localCtrBits, "Bits per counter"),
+    INIT_PARAM(localHistoryTableSize, "Size of local history table"),
+    INIT_PARAM(localHistoryBits, "Bits for the local history"),
+    INIT_PARAM(globalPredictorSize, "Size of global predictor"),
+    INIT_PARAM(globalCtrBits, "Bits per counter"),
+    INIT_PARAM(globalHistoryBits, "Bits of history"),
+    INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
+    INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
+
+    INIT_PARAM(BTBEntries, "Number of BTB entries"),
+    INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
+
+    INIT_PARAM(RASSize, "RAS size"),
+
+    INIT_PARAM(LQEntries, "Number of load queue entries"),
+    INIT_PARAM(SQEntries, "Number of store queue entries"),
+    INIT_PARAM(LFSTSize, "Last fetched store table size"),
+    INIT_PARAM(SSITSize, "Store set ID table size"),
+
+    INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
+    INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
+               "registers"),
+    INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
+    INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
+
+    INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true),
+    INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0),
+    INIT_PARAM_DFLT(wbWidth, "Writeback width", 0),
+
+    INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
+    INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
+    INIT_PARAM_DFLT(smtLSQPolicy,   "SMT LSQ Sharing Policy",    "Partitioned"),
+    INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
+    INIT_PARAM_DFLT(smtIQPolicy,    "SMT IQ Policy",    "Partitioned"),
+    INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
+    INIT_PARAM_DFLT(smtROBPolicy,   "SMT ROB Sharing Policy", "Partitioned"),
+    INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
+    INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
+
+    INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+
+    INIT_PARAM(function_trace, "Enable function trace"),
+    INIT_PARAM(function_trace_start, "Cycle to start function trace")
+
+END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+CREATE_SIM_OBJECT(SimpleOzoneCPU)
+{
+    SimpleOzoneCPU *cpu;
+
+#if FULL_SYSTEM
+    // Full-system only supports a single thread for the moment.
+    int actual_num_threads = 1;
+#else
+    // In non-full-system mode, we infer the number of threads from
+    // the workload if it's not explicitly specified.
+    int actual_num_threads =
+        numThreads.isValid() ? numThreads : workload.size();
+
+    if (workload.size() == 0) {
+        fatal("Must specify at least one workload!");
+    }
+
+#endif
+
+    SimpleParams *params = new SimpleParams;
+
+    params->clock = clock;
+
+    params->name = getInstanceName();
+    params->numberOfThreads = actual_num_threads;
+
+#if FULL_SYSTEM
+    params->system = system;
+    params->cpu_id = cpu_id;
+    params->itb = itb;
+    params->dtb = dtb;
+#else
+    params->workload = workload;
+//    params->pTable = page_table;
+#endif // FULL_SYSTEM
+
+    params->mem = mem;
+    params->checker = checker;
+    params->max_insts_any_thread = max_insts_any_thread;
+    params->max_insts_all_threads = max_insts_all_threads;
+    params->max_loads_any_thread = max_loads_any_thread;
+    params->max_loads_all_threads = max_loads_all_threads;
+
+    //
+    // Caches
+    //
+    params->icacheInterface = icache ? icache->getInterface() : NULL;
+    params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
+    params->cachePorts = cachePorts;
+
+    params->width = width;
+    params->frontEndWidth = frontEndWidth;
+    params->backEndWidth = backEndWidth;
+    params->backEndSquashLatency = backEndSquashLatency;
+    params->backEndLatency = backEndLatency;
+    params->maxInstBufferSize = maxInstBufferSize;
+    params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
+
+    params->decodeToFetchDelay = decodeToFetchDelay;
+    params->renameToFetchDelay = renameToFetchDelay;
+    params->iewToFetchDelay = iewToFetchDelay;
+    params->commitToFetchDelay = commitToFetchDelay;
+    params->fetchWidth = fetchWidth;
+
+    params->renameToDecodeDelay = renameToDecodeDelay;
+    params->iewToDecodeDelay = iewToDecodeDelay;
+    params->commitToDecodeDelay = commitToDecodeDelay;
+    params->fetchToDecodeDelay = fetchToDecodeDelay;
+    params->decodeWidth = decodeWidth;
+
+    params->iewToRenameDelay = iewToRenameDelay;
+    params->commitToRenameDelay = commitToRenameDelay;
+    params->decodeToRenameDelay = decodeToRenameDelay;
+    params->renameWidth = renameWidth;
+
+    params->commitToIEWDelay = commitToIEWDelay;
+    params->renameToIEWDelay = renameToIEWDelay;
+    params->issueToExecuteDelay = issueToExecuteDelay;
+    params->issueWidth = issueWidth;
+    params->executeWidth = executeWidth;
+    params->executeIntWidth = executeIntWidth;
+    params->executeFloatWidth = executeFloatWidth;
+    params->executeBranchWidth = executeBranchWidth;
+    params->executeMemoryWidth = executeMemoryWidth;
+
+    params->iewToCommitDelay = iewToCommitDelay;
+    params->renameToROBDelay = renameToROBDelay;
+    params->commitWidth = commitWidth;
+    params->squashWidth = squashWidth;
+
+    params->predType = predType;
+    params->localPredictorSize = localPredictorSize;
+    params->localCtrBits = localCtrBits;
+    params->localHistoryTableSize = localHistoryTableSize;
+    params->localHistoryBits = localHistoryBits;
+    params->globalPredictorSize = globalPredictorSize;
+    params->globalCtrBits = globalCtrBits;
+    params->globalHistoryBits = globalHistoryBits;
+    params->choicePredictorSize = choicePredictorSize;
+    params->choiceCtrBits = choiceCtrBits;
+
+    params->BTBEntries = BTBEntries;
+    params->BTBTagSize = BTBTagSize;
+
+    params->RASSize = RASSize;
+
+    params->LQEntries = LQEntries;
+    params->SQEntries = SQEntries;
+
+    params->SSITSize = SSITSize;
+    params->LFSTSize = LFSTSize;
+
+    params->numPhysIntRegs = numPhysIntRegs;
+    params->numPhysFloatRegs = numPhysFloatRegs;
+    params->numIQEntries = numIQEntries;
+    params->numROBEntries = numROBEntries;
+
+    params->decoupledFrontEnd = decoupledFrontEnd;
+    params->dispatchWidth = dispatchWidth;
+    params->wbWidth = wbWidth;
+
+    params->smtNumFetchingThreads = smtNumFetchingThreads;
+    params->smtFetchPolicy = smtFetchPolicy;
+    params->smtIQPolicy    = smtIQPolicy;
+    params->smtLSQPolicy    = smtLSQPolicy;
+    params->smtLSQThreshold = smtLSQThreshold;
+    params->smtROBPolicy   = smtROBPolicy;
+    params->smtROBThreshold = smtROBThreshold;
+    params->smtCommitPolicy = smtCommitPolicy;
+
+    params->instShiftAmt = 2;
+
+    params->deferRegistration = defer_registration;
+
+    params->functionTrace = function_trace;
+    params->functionTraceStart = function_trace_start;
+
+    cpu = new SimpleOzoneCPU(params);
+
+    return cpu;
+}
+
+REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU)
+
diff --git a/src/cpu/ozone/simple_params.hh b/src/cpu/ozone/simple_params.hh
index 13eb05e77..11cee716f 100644
--- a/src/cpu/ozone/simple_params.hh
+++ b/src/cpu/ozone/simple_params.hh
@@ -37,8 +37,7 @@
 class AlphaDTB;
 class AlphaITB;
 class FUPool;
-class FunctionalMemory;
-class MemInterface;
+class MemObject;
 class PageTable;
 class Process;
 class System;
@@ -62,13 +61,13 @@ class SimpleParams : public BaseCPU::Params
     //Page Table
     PageTable *pTable;
 
-    FunctionalMemory *mem;
+    MemObject *mem;
 
     //
     // Caches
     //
-    MemInterface *icacheInterface;
-    MemInterface *dcacheInterface;
+//    MemInterface *icacheInterface;
+//    MemInterface *dcacheInterface;
 
     unsigned cachePorts;
     unsigned width;
diff --git a/src/cpu/ozone/thread_state.hh b/src/cpu/ozone/thread_state.hh
index 299878c29..ef4b1429d 100644
--- a/src/cpu/ozone/thread_state.hh
+++ b/src/cpu/ozone/thread_state.hh
@@ -58,30 +58,23 @@ class FunctionalMemory;
 template <class Impl>
 struct OzoneThreadState : public ThreadState {
     typedef typename ThreadContext::Status Status;
-    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::CPUType CPUType;
     typedef TheISA::MiscReg MiscReg;
 
 #if FULL_SYSTEM
-    OzoneThreadState(FullCPU *_cpu, int _thread_num)
+    OzoneThreadState(CPUType *_cpu, int _thread_num)
         : ThreadState(-1, _thread_num),
-          inSyscall(0), trapPending(0)
+          intrflag(0), inSyscall(0), trapPending(0)
     {
-        memset(&regs, 0, sizeof(TheISA::RegFile));
+        miscRegFile.clear();
     }
 #else
-    OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
-        : ThreadState(-1, _thread_num, NULL, _process, _asid),
+    OzoneThreadState(CPUType *_cpu, int _thread_num, Process *_process,
+                     int _asid, MemObject *mem)
+        : ThreadState(-1, _thread_num, _process, _asid, mem),
           cpu(_cpu), inSyscall(0), trapPending(0)
     {
-        memset(&regs, 0, sizeof(TheISA::RegFile));
-    }
-
-    OzoneThreadState(FullCPU *_cpu, int _thread_num,
-                     int _asid)
-        : ThreadState(-1, _thread_num, NULL, NULL, _asid),
-          cpu(_cpu), inSyscall(0), trapPending(0)
-    {
-        memset(&regs, 0, sizeof(TheISA::RegFile));
+        miscRegFile.clear();
     }
 #endif
 
@@ -91,9 +84,11 @@ struct OzoneThreadState : public ThreadState {
 
     Addr nextPC;
 
-    TheISA::RegFile regs;
+    TheISA::MiscRegFile miscRegFile;
+
+    int intrflag;
 
-    typename Impl::FullCPU *cpu;
+    typename Impl::CPUType *cpu;
 
     bool inSyscall;
 
@@ -103,54 +98,24 @@ struct OzoneThreadState : public ThreadState {
 
     ThreadContext *getTC() { return tc; }
 
-#if !FULL_SYSTEM
-    Fault translateInstReq(Request *req)
-    {
-        return process->pTable->translate(req);
-    }
-    Fault translateDataReadReq(Request *req)
-    {
-        return process->pTable->translate(req);
-    }
-    Fault translateDataWriteReq(Request *req)
-    {
-        return process->pTable->translate(req);
-    }
-#else
-    Fault translateInstReq(Request *req)
-    {
-        return cpu->itb->translate(req);
-    }
-
-    Fault translateDataReadReq(Request *req)
-    {
-        return cpu->dtb->translate(req, false);
-    }
-
-    Fault translateDataWriteReq(Request *req)
-    {
-        return cpu->dtb->translate(req, true);
-    }
-#endif
-
     MiscReg readMiscReg(int misc_reg)
     {
-        return regs.readMiscReg(misc_reg);
+        return miscRegFile.readReg(misc_reg);
     }
 
     MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
     {
-        return regs.readMiscRegWithEffect(misc_reg, fault, tc);
+        return miscRegFile.readRegWithEffect(misc_reg, fault, tc);
     }
 
     Fault setMiscReg(int misc_reg, const MiscReg &val)
     {
-        return regs.setMiscReg(misc_reg, val);
+        return miscRegFile.setReg(misc_reg, val);
     }
 
     Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
     {
-        return regs.setMiscRegWithEffect(misc_reg, val, tc);
+        return miscRegFile.setRegWithEffect(misc_reg, val, tc);
     }
 
     uint64_t readPC()
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 071193f02..1752b2b5b 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -33,6 +33,7 @@
 #include "cpu/simple/atomic.hh"
 #include "mem/packet_impl.hh"
 #include "sim/builder.hh"
+#include "sim/system.hh"
 
 using namespace std;
 using namespace TheISA;
@@ -55,18 +56,28 @@ AtomicSimpleCPU::TickEvent::description()
     return "AtomicSimpleCPU tick event";
 }
 
+Port *
+AtomicSimpleCPU::getPort(const std::string &if_name, int idx)
+{
+    if (if_name == "dcache_port")
+        return &dcachePort;
+    else if (if_name == "icache_port")
+        return &icachePort;
+    else
+        panic("No Such Port\n");
+}
 
 void
 AtomicSimpleCPU::init()
 {
     //Create Memory Ports (conect them up)
-    Port *mem_dport = mem->getPort("");
-    dcachePort.setPeer(mem_dport);
-    mem_dport->setPeer(&dcachePort);
+//    Port *mem_dport = mem->getPort("");
+//    dcachePort.setPeer(mem_dport);
+//    mem_dport->setPeer(&dcachePort);
 
-    Port *mem_iport = mem->getPort("");
-    icachePort.setPeer(mem_iport);
-    mem_iport->setPeer(&icachePort);
+//    Port *mem_iport = mem->getPort("");
+//    icachePort.setPeer(mem_iport);
+//    mem_iport->setPeer(&icachePort);
 
     BaseCPU::init();
 #if FULL_SYSTEM
@@ -124,15 +135,18 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
 
     // @todo fix me and get the real cpu id & thread number!!!
     ifetch_req = new Request();
+    ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
     ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast);
     ifetch_pkt->dataStatic(&inst);
 
     data_read_req = new Request();
+    data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
     data_read_pkt = new Packet(data_read_req, Packet::ReadReq,
                                Packet::Broadcast);
     data_read_pkt->dataStatic(&dataReg);
 
     data_write_req = new Request();
+    data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
     data_write_pkt = new Packet(data_write_req, Packet::WriteReq,
                                 Packet::Broadcast);
 }
@@ -145,8 +159,8 @@ AtomicSimpleCPU::~AtomicSimpleCPU()
 void
 AtomicSimpleCPU::serialize(ostream &os)
 {
-    BaseSimpleCPU::serialize(os);
     SERIALIZE_ENUM(_status);
+    BaseSimpleCPU::serialize(os);
     nameOut(os, csprintf("%s.tickEvent", name()));
     tickEvent.serialize(os);
 }
@@ -154,21 +168,25 @@ AtomicSimpleCPU::serialize(ostream &os)
 void
 AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
 {
-    BaseSimpleCPU::unserialize(cp, section);
     UNSERIALIZE_ENUM(_status);
+    BaseSimpleCPU::unserialize(cp, section);
     tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
 }
 
 void
-AtomicSimpleCPU::switchOut(Sampler *s)
+AtomicSimpleCPU::resume()
 {
-    sampler = s;
-    if (status() == Running) {
-        _status = SwitchedOut;
+    assert(system->getMemoryMode() == System::Atomic);
+    changeState(SimObject::Running);
+}
 
-        tickEvent.squash();
-    }
-    sampler->signalSwitched();
+void
+AtomicSimpleCPU::switchOut()
+{
+    assert(status() == Running || status() == Idle);
+    _status = SwitchedOut;
+
+    tickEvent.squash();
 }
 
 
@@ -410,15 +428,14 @@ AtomicSimpleCPU::tick()
             postExecute();
 
             if (simulate_stalls) {
-                // This calculation assumes that the icache and dcache
-                // access latencies are always a multiple of the CPU's
-                // cycle time.  If not, the next tick event may get
-                // scheduled at a non-integer multiple of the CPU
-                // cycle time.
                 Tick icache_stall = icache_latency - cycles(1);
                 Tick dcache_stall =
                     dcache_access ? dcache_latency - cycles(1) : 0;
-                latency += icache_stall + dcache_stall;
+                Tick stall_cycles = (icache_stall + dcache_stall) / cycles(1);
+                if (cycles(stall_cycles) < (icache_stall + dcache_stall))
+                    latency += cycles(stall_cycles+1);
+                else
+                    latency += cycles(stall_cycles);
             }
 
         }
@@ -442,11 +459,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
     Param<Counter> max_loads_any_thread;
     Param<Counter> max_loads_all_threads;
     SimObjectParam<MemObject *> mem;
+    SimObjectParam<System *> system;
 
 #if FULL_SYSTEM
     SimObjectParam<AlphaITB *> itb;
     SimObjectParam<AlphaDTB *> dtb;
-    SimObjectParam<System *> system;
     Param<int> cpu_id;
     Param<Tick> profile;
 #else
@@ -474,11 +491,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
     INIT_PARAM(max_loads_all_threads,
                "terminate when all threads have reached this load count"),
     INIT_PARAM(mem, "memory"),
+    INIT_PARAM(system, "system object"),
 
 #if FULL_SYSTEM
     INIT_PARAM(itb, "Instruction TLB"),
     INIT_PARAM(dtb, "Data TLB"),
-    INIT_PARAM(system, "system object"),
     INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(profile, ""),
 #else
@@ -511,11 +528,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU)
     params->width = width;
     params->simulate_stalls = simulate_stalls;
     params->mem = mem;
+    params->system = system;
 
 #if FULL_SYSTEM
     params->itb = itb;
     params->dtb = dtb;
-    params->system = system;
     params->cpu_id = cpu_id;
     params->profile = profile;
 #else
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index 7f4956da9..d59ca01aa 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -122,10 +122,13 @@ class AtomicSimpleCPU : public BaseSimpleCPU
 
   public:
 
+    virtual Port *getPort(const std::string &if_name, int idx = -1);
+
     virtual void serialize(std::ostream &os);
     virtual void unserialize(Checkpoint *cp, const std::string &section);
 
-    void switchOut(Sampler *s);
+    virtual void resume();
+    void switchOut();
     void takeOverFrom(BaseCPU *oldCPU);
 
     virtual void activateContext(int thread_num, int delay);
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index d94b0e079..af10e64d7 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Steve Reinhardt
+ *          Korey Sewell
  */
 
 #include "arch/utility.hh"
@@ -40,7 +41,6 @@
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/profile.hh"
-#include "cpu/sampler/sampler.hh"
 #include "cpu/simple/base.hh"
 #include "cpu/simple_thread.hh"
 #include "cpu/smt.hh"
@@ -55,10 +55,10 @@
 #include "sim/sim_events.hh"
 #include "sim/sim_object.hh"
 #include "sim/stats.hh"
+#include "sim/system.hh"
 
 #if FULL_SYSTEM
 #include "base/remote_gdb.hh"
-#include "sim/system.hh"
 #include "arch/tlb.hh"
 #include "arch/stacktrace.hh"
 #include "arch/vtophys.hh"
@@ -358,8 +358,13 @@ Fault
 BaseSimpleCPU::setupFetchRequest(Request *req)
 {
     // set up memory request for instruction fetch
+#if THE_ISA == ALPHA_ISA
+    DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p",thread->readPC(),
+            thread->readNextPC());
+#else
     DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p NNPC:%08p\n",thread->readPC(),
             thread->readNextPC(),thread->readNextNPC());
+#endif
 
     req->setVirt(0, thread->readPC() & ~3, sizeof(MachInst),
                  (FULL_SYSTEM && (thread->readPC() & 1)) ? PHYSICAL : 0,
@@ -440,11 +445,7 @@ void
 BaseSimpleCPU::advancePC(Fault fault)
 {
     if (fault != NoFault) {
-#if FULL_SYSTEM
         fault->invoke(tc);
-#else // !FULL_SYSTEM
-        fatal("fault (%s) detected @ PC %08p", fault->name(), thread->readPC());
-#endif // FULL_SYSTEM
     }
     else {
         // go to the next instruction
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 39bc86050..57cfa3c2c 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -38,7 +38,6 @@
 #include "cpu/base.hh"
 #include "cpu/simple_thread.hh"
 #include "cpu/pc_event.hh"
-#include "cpu/sampler/sampler.hh"
 #include "cpu/static_inst.hh"
 #include "mem/packet.hh"
 #include "mem/port.hh"
@@ -128,11 +127,6 @@ class BaseSimpleCPU : public BaseCPU
     // Static data storage
     TheISA::IntReg dataReg;
 
-    // Pointer to the sampler that is telling us to switchover.
-    // Used to signal the completion of the pipe drain and schedule
-    // the next switchover
-    Sampler *sampler;
-
     StaticInstPtr curStaticInst;
 
     void checkForInterrupts();
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index c99db8fbf..d2c2c7c47 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -33,23 +33,25 @@
 #include "cpu/simple/timing.hh"
 #include "mem/packet_impl.hh"
 #include "sim/builder.hh"
+#include "sim/system.hh"
 
 using namespace std;
 using namespace TheISA;
 
+Port *
+TimingSimpleCPU::getPort(const std::string &if_name, int idx)
+{
+    if (if_name == "dcache_port")
+        return &dcachePort;
+    else if (if_name == "icache_port")
+        return &icachePort;
+    else
+        panic("No Such Port\n");
+}
 
 void
 TimingSimpleCPU::init()
 {
-    //Create Memory Ports (conect them up)
-    Port *mem_dport = mem->getPort("");
-    dcachePort.setPeer(mem_dport);
-    mem_dport->setPeer(&dcachePort);
-
-    Port *mem_iport = mem->getPort("");
-    icachePort.setPeer(mem_iport);
-    mem_iport->setPeer(&icachePort);
-
     BaseCPU::init();
 #if FULL_SYSTEM
     for (int i = 0; i < threadContexts.size(); ++i) {
@@ -88,6 +90,9 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p)
 {
     _status = Idle;
     ifetch_pkt = dcache_pkt = NULL;
+    drainEvent = NULL;
+    fetchEvent = NULL;
+    changeState(SimObject::Running);
 }
 
 
@@ -98,25 +103,61 @@ TimingSimpleCPU::~TimingSimpleCPU()
 void
 TimingSimpleCPU::serialize(ostream &os)
 {
-    BaseSimpleCPU::serialize(os);
     SERIALIZE_ENUM(_status);
+    BaseSimpleCPU::serialize(os);
 }
 
 void
 TimingSimpleCPU::unserialize(Checkpoint *cp, const string &section)
 {
-    BaseSimpleCPU::unserialize(cp, section);
     UNSERIALIZE_ENUM(_status);
+    BaseSimpleCPU::unserialize(cp, section);
+}
+
+unsigned int
+TimingSimpleCPU::drain(Event *drain_event)
+{
+    // TimingSimpleCPU is ready to drain if it's not waiting for
+    // an access to complete.
+    if (status() == Idle || status() == Running || status() == SwitchedOut) {
+        changeState(SimObject::Drained);
+        return 0;
+    } else {
+        changeState(SimObject::Draining);
+        drainEvent = drain_event;
+        return 1;
+    }
 }
 
 void
-TimingSimpleCPU::switchOut(Sampler *s)
+TimingSimpleCPU::resume()
 {
-    sampler = s;
-    if (status() == Running) {
-        _status = SwitchedOut;
+    if (_status != SwitchedOut && _status != Idle) {
+        // Delete the old event if it existed.
+        if (fetchEvent) {
+            assert(!fetchEvent->scheduled());
+            delete fetchEvent;
+        }
+
+        fetchEvent =
+            new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false);
+        fetchEvent->schedule(curTick);
     }
-    sampler->signalSwitched();
+
+    assert(system->getMemoryMode() == System::Timing);
+    changeState(SimObject::Running);
+}
+
+void
+TimingSimpleCPU::switchOut()
+{
+    assert(status() == Running || status() == Idle);
+    _status = SwitchedOut;
+
+    // If we've been scheduled to resume but are then told to switch out,
+    // we'll need to cancel it.
+    if (fetchEvent && fetchEvent->scheduled())
+        fetchEvent->deschedule();
 }
 
 
@@ -148,9 +189,9 @@ TimingSimpleCPU::activateContext(int thread_num, int delay)
     notIdleFraction++;
     _status = Running;
     // kick things off by initiating the fetch of the next instruction
-    Event *e =
-        new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, true);
-    e->schedule(curTick + cycles(delay));
+    fetchEvent =
+        new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false);
+    fetchEvent->schedule(curTick + cycles(delay));
 }
 
 
@@ -176,7 +217,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
 {
     // need to fill in CPU & thread IDs here
     Request *data_read_req = new Request();
-
+    data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
     data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
 
     if (traceData) {
@@ -257,6 +298,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 {
     // need to fill in CPU & thread IDs here
     Request *data_write_req = new Request();
+    data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
     data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
 
     // translate to physical address
@@ -340,6 +382,7 @@ TimingSimpleCPU::fetch()
 
     // need to fill in CPU & thread IDs here
     Request *ifetch_req = new Request();
+    ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
     Fault fault = setupFetchRequest(ifetch_req);
 
     ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast);
@@ -383,11 +426,17 @@ TimingSimpleCPU::completeIfetch(Packet *pkt)
     // instruction
     assert(pkt->result == Packet::Success);
     assert(_status == IcacheWaitResponse);
+
     _status = Running;
 
     delete pkt->req;
     delete pkt;
 
+    if (getState() == SimObject::Draining) {
+        completeDrain();
+        return;
+    }
+
     preExecute();
     if (curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) {
         // load or store: just send to dcache
@@ -440,6 +489,15 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt)
     assert(_status == DcacheWaitResponse);
     _status = Running;
 
+    if (getState() == SimObject::Draining) {
+        completeDrain();
+
+        delete pkt->req;
+        delete pkt;
+
+        return;
+    }
+
     Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
 
     delete pkt->req;
@@ -450,6 +508,13 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt)
 }
 
 
+void
+TimingSimpleCPU::completeDrain()
+{
+    DPRINTF(Config, "Done draining\n");
+    changeState(SimObject::Drained);
+    drainEvent->process();
+}
 
 bool
 TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt)
@@ -484,11 +549,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU)
     Param<Counter> max_loads_any_thread;
     Param<Counter> max_loads_all_threads;
     SimObjectParam<MemObject *> mem;
+    SimObjectParam<System *> system;
 
 #if FULL_SYSTEM
     SimObjectParam<AlphaITB *> itb;
     SimObjectParam<AlphaDTB *> dtb;
-    SimObjectParam<System *> system;
     Param<int> cpu_id;
     Param<Tick> profile;
 #else
@@ -516,11 +581,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU)
     INIT_PARAM(max_loads_all_threads,
                "terminate when all threads have reached this load count"),
     INIT_PARAM(mem, "memory"),
+    INIT_PARAM(system, "system object"),
 
 #if FULL_SYSTEM
     INIT_PARAM(itb, "Instruction TLB"),
     INIT_PARAM(dtb, "Data TLB"),
-    INIT_PARAM(system, "system object"),
     INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(profile, ""),
 #else
@@ -551,11 +616,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU)
     params->functionTrace = function_trace;
     params->functionTraceStart = function_trace_start;
     params->mem = mem;
+    params->system = system;
 
 #if FULL_SYSTEM
     params->itb = itb;
     params->dtb = dtb;
-    params->system = system;
     params->cpu_id = cpu_id;
     params->profile = profile;
 #else
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index ab0b2d2ca..ac36e5c99 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -64,6 +64,10 @@ class TimingSimpleCPU : public BaseSimpleCPU
 
     Status status() const { return _status; }
 
+    Event *drainEvent;
+
+    Event *fetchEvent;
+
   private:
 
     class CpuPort : public Port
@@ -128,10 +132,15 @@ class TimingSimpleCPU : public BaseSimpleCPU
 
   public:
 
+    virtual Port *getPort(const std::string &if_name, int idx = -1);
+
     virtual void serialize(std::ostream &os);
     virtual void unserialize(Checkpoint *cp, const std::string &section);
 
-    void switchOut(Sampler *s);
+    virtual unsigned int drain(Event *drain_event);
+    virtual void resume();
+
+    void switchOut();
     void takeOverFrom(BaseCPU *oldCPU);
 
     virtual void activateContext(int thread_num, int delay);
@@ -147,6 +156,8 @@ class TimingSimpleCPU : public BaseSimpleCPU
     void completeIfetch(Packet *);
     void completeDataAccess(Packet *);
     void advanceInst(Fault fault);
+  private:
+    void completeDrain();
 };
 
 #endif // __CPU_SIMPLE_TIMING_HH__
diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc
index 48383ca93..af1db2ff2 100644
--- a/src/cpu/simple_thread.cc
+++ b/src/cpu/simple_thread.cc
@@ -107,7 +107,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
 #else
 SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num,
                          Process *_process, int _asid, MemObject* memobj)
-    : ThreadState(-1, _thread_num, memobj, _process, _asid),
+    : ThreadState(-1, _thread_num, _process, _asid, memobj),
       cpu(_cpu)
 {
     /* Use this port to for syscall emulation writes to memory. */
@@ -123,15 +123,19 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num,
     tc = new ProxyThreadContext<SimpleThread>(this);
 }
 
-SimpleThread::SimpleThread(RegFile *regFile)
-    : ThreadState(-1, -1, NULL, NULL, -1), cpu(NULL)
+#endif
+
+SimpleThread::SimpleThread()
+#if FULL_SYSTEM
+    : ThreadState(-1, -1)
+#else
+    : ThreadState(-1, -1, NULL, -1, NULL)
+#endif
 {
-    regs = *regFile;
     tc = new ProxyThreadContext<SimpleThread>(this);
+    regs.clear();
 }
 
-#endif
-
 SimpleThread::~SimpleThread()
 {
     delete tc;
@@ -147,13 +151,8 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext)
     assert(process == oldContext->getProcessPtr());
 #endif
 
-    // copy over functional state
-    _status = oldContext->status();
-    copyArchRegs(oldContext);
-    cpuId = oldContext->readCpuId();
-#if !FULL_SYSTEM
-    funcExeInst = oldContext->readFuncExeInst();
-#else
+    copyState(oldContext);
+#if FULL_SYSTEM
     EndQuiesceEvent *quiesce = oldContext->getQuiesceEvent();
     if (quiesce) {
         // Point the quiesce event's TC at this TC so that it wakes up
@@ -171,42 +170,49 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext)
 }
 
 void
-SimpleThread::serialize(ostream &os)
+SimpleThread::copyTC(ThreadContext *context)
 {
-    SERIALIZE_ENUM(_status);
-    regs.serialize(os);
-    // thread_num and cpu_id are deterministic from the config
-    SERIALIZE_SCALAR(funcExeInst);
-    SERIALIZE_SCALAR(inst);
+    copyState(context);
 
 #if FULL_SYSTEM
-    Tick quiesceEndTick = 0;
-    if (quiesceEvent->scheduled())
-        quiesceEndTick = quiesceEvent->when();
-    SERIALIZE_SCALAR(quiesceEndTick);
-    if (kernelStats)
-        kernelStats->serialize(os);
+    EndQuiesceEvent *quiesce = context->getQuiesceEvent();
+    if (quiesce) {
+        quiesceEvent = quiesce;
+    }
+    Kernel::Statistics *stats = context->getKernelStats();
+    if (stats) {
+        kernelStats = stats;
+    }
 #endif
 }
 
+void
+SimpleThread::copyState(ThreadContext *oldContext)
+{
+    // copy over functional state
+    _status = oldContext->status();
+    copyArchRegs(oldContext);
+    cpuId = oldContext->readCpuId();
+#if !FULL_SYSTEM
+    funcExeInst = oldContext->readFuncExeInst();
+#endif
+}
+
+void
+SimpleThread::serialize(ostream &os)
+{
+    ThreadState::serialize(os);
+    regs.serialize(os);
+    // thread_num and cpu_id are deterministic from the config
+}
+
 
 void
 SimpleThread::unserialize(Checkpoint *cp, const std::string &section)
 {
-    UNSERIALIZE_ENUM(_status);
+    ThreadState::unserialize(cp, section);
     regs.unserialize(cp, section);
     // thread_num and cpu_id are deterministic from the config
-    UNSERIALIZE_SCALAR(funcExeInst);
-    UNSERIALIZE_SCALAR(inst);
-
-#if FULL_SYSTEM
-    Tick quiesceEndTick;
-    UNSERIALIZE_SCALAR(quiesceEndTick);
-    if (quiesceEndTick)
-        quiesceEvent->schedule(quiesceEndTick);
-    if (kernelStats)
-        kernelStats->unserialize(cp, section);
-#endif
 }
 
 #if FULL_SYSTEM
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index de65e9891..d36853db4 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -119,16 +119,20 @@ class SimpleThread : public ThreadState
 #else
     SimpleThread(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid,
                  MemObject *memobj);
-    // Constructor to use SimpleThread to pass reg file around.  Not
-    // used for anything else.
-    SimpleThread(RegFile *regFile);
 #endif
+
+    SimpleThread();
+
     virtual ~SimpleThread();
 
     virtual void takeOverFrom(ThreadContext *oldContext);
 
     void regStats(const std::string &name);
 
+    void copyTC(ThreadContext *context);
+
+    void copyState(ThreadContext *oldContext);
+
     void serialize(std::ostream &os);
     void unserialize(Checkpoint *cp, const std::string &section);
 
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index bea52f510..ea1a65148 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -34,10 +34,12 @@
 #include <bitset>
 #include <string>
 
+#include "base/bitfield.hh"
 #include "base/hashmap.hh"
 #include "base/misc.hh"
 #include "base/refcnt.hh"
 #include "cpu/op_class.hh"
+#include "cpu/o3/dyn_inst.hh"
 #include "sim/host.hh"
 #include "arch/isa_traits.hh"
 
@@ -50,9 +52,6 @@ class DynInst;
 class Packet;
 
 template <class Impl>
-class AlphaDynInst;
-
-template <class Impl>
 class OzoneDynInst;
 
 class CheckerCPU;
@@ -411,16 +410,10 @@ class StaticInst : public StaticInstBase
     //This is defined as inline below.
     static StaticInstPtr decode(ExtMachInst mach_inst);
 
-    //MIPS Decoder Debug Functions
-    int getOpcode() { return (machInst & 0xFC000000) >> 26 ; }//31..26
-    int getRs() {     return (machInst & 0x03E00000) >> 21; }    //25...21
-    int getRt() {     return (machInst & 0x001F0000) >> 16;  }    //20...16
-    int getRd() {     return (machInst & 0x0000F800) >> 11; }    //15...11
-    int getImm() {  return (machInst & 0x0000FFFF); }    //15...0
-    int getFunction(){  return (machInst & 0x0000003F); }//5...0
-    int getBranch(){  return (machInst & 0x0000FFFF); }//15...0
-    int getJump(){    return (machInst & 0x03FFFFFF); }//5...0
-    int getHint(){    return (machInst & 0x000007C0) >> 6; }  //10...6
+    /// Return opcode of machine instruction
+    uint32_t getOpcode() { return bits(machInst, 31, 26);}
+
+    /// Return name of machine instruction
     std::string getName() { return mnemonic; }
 };
 
diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index 48c8fa28d..e019e22bc 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -143,7 +143,7 @@ class ThreadContext
     virtual void suspend() = 0;
 
     /// Set the status to Unallocated.
-    virtual void deallocate() = 0;
+    virtual void deallocate(int delay = 0) = 0;
 
     /// Set the status to Halted.
     virtual void halt() = 0;
@@ -245,10 +245,13 @@ class ThreadContext
 
     virtual void setSyscallReturn(SyscallReturn return_value) = 0;
 
-    virtual void syscall(int64_t callnum) = 0;
-
     // Same with st cond failures.
     virtual Counter readFuncExeInst() = 0;
+
+    // This function exits the thread context in the CPU and returns
+    // 1 if the CPU has no more active threads (meaning it's OK to exit);
+    // Used in syscall-emulation mode when a  thread calls the exit syscall.
+    virtual int exit() { return 1; };
 #endif
 
     virtual void changeRegFileContext(RegFile::ContextParam param,
@@ -315,7 +318,7 @@ class ProxyThreadContext : public ThreadContext
     void suspend() { actualTC->suspend(); }
 
     /// Set the status to Unallocated.
-    void deallocate() { actualTC->deallocate(); }
+    void deallocate(int delay = 0) { actualTC->deallocate(); }
 
     /// Set the status to Halted.
     void halt() { actualTC->halt(); }
@@ -432,8 +435,6 @@ class ProxyThreadContext : public ThreadContext
     void setSyscallReturn(SyscallReturn return_value)
     { actualTC->setSyscallReturn(return_value); }
 
-    void syscall(int64_t callnum) { actualTC->syscall(callnum); }
-
     Counter readFuncExeInst() { return actualTC->readFuncExeInst(); }
 #endif
 
diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc
index dcfa93c3e..6a96560f1 100644
--- a/src/cpu/thread_state.cc
+++ b/src/cpu/thread_state.cc
@@ -31,6 +31,12 @@
 #include "base/output.hh"
 #include "cpu/profile.hh"
 #include "cpu/thread_state.hh"
+#include "sim/serialize.hh"
+
+#if FULL_SYSTEM
+#include "cpu/quiesce_event.hh"
+#include "kern/kernel_stats.hh"
+#endif
 
 #if FULL_SYSTEM
 ThreadState::ThreadState(int _cpuId, int _tid)
@@ -38,8 +44,8 @@ ThreadState::ThreadState(int _cpuId, int _tid)
       profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL),
       funcExeInst(0), storeCondFailures(0)
 #else
-ThreadState::ThreadState(int _cpuId, int _tid, MemObject *mem,
-                         Process *_process, short _asid)
+ThreadState::ThreadState(int _cpuId, int _tid, Process *_process,
+                         short _asid, MemObject *mem)
     : cpuId(_cpuId), tid(_tid), lastActivate(0), lastSuspend(0),
       process(_process), asid(_asid),
       funcExeInst(0), storeCondFailures(0)
@@ -49,6 +55,43 @@ ThreadState::ThreadState(int _cpuId, int _tid, MemObject *mem,
     numLoad = 0;
 }
 
+void
+ThreadState::serialize(std::ostream &os)
+{
+    SERIALIZE_ENUM(_status);
+    // thread_num and cpu_id are deterministic from the config
+    SERIALIZE_SCALAR(funcExeInst);
+    SERIALIZE_SCALAR(inst);
+
+#if FULL_SYSTEM
+    Tick quiesceEndTick = 0;
+    if (quiesceEvent->scheduled())
+        quiesceEndTick = quiesceEvent->when();
+    SERIALIZE_SCALAR(quiesceEndTick);
+    if (kernelStats)
+        kernelStats->serialize(os);
+#endif
+}
+
+void
+ThreadState::unserialize(Checkpoint *cp, const std::string &section)
+{
+
+    UNSERIALIZE_ENUM(_status);
+    // thread_num and cpu_id are deterministic from the config
+    UNSERIALIZE_SCALAR(funcExeInst);
+    UNSERIALIZE_SCALAR(inst);
+
+#if FULL_SYSTEM
+    Tick quiesceEndTick;
+    UNSERIALIZE_SCALAR(quiesceEndTick);
+    if (quiesceEndTick)
+        quiesceEvent->schedule(quiesceEndTick);
+    if (kernelStats)
+        kernelStats->unserialize(cp, section);
+#endif
+}
+
 #if FULL_SYSTEM
 
 void
diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh
index de9b2f14e..b03a2e2bb 100644
--- a/src/cpu/thread_state.hh
+++ b/src/cpu/thread_state.hh
@@ -49,6 +49,8 @@ namespace Kernel {
 };
 #endif
 
+class Checkpoint;
+
 /**
  *  Struct for holding general thread state that is needed across CPU
  *  models.  This includes things such as pointers to the process,
@@ -61,10 +63,14 @@ struct ThreadState {
 #if FULL_SYSTEM
     ThreadState(int _cpuId, int _tid);
 #else
-    ThreadState(int _cpuId, int _tid, MemObject *mem,
-                Process *_process, short _asid);
+    ThreadState(int _cpuId, int _tid, Process *_process,
+                short _asid, MemObject *mem);
 #endif
 
+    void serialize(std::ostream &os);
+
+    void unserialize(Checkpoint *cp, const std::string &section);
+
     void setCpuId(int id) { cpuId = id; }
 
     int readCpuId() { return cpuId; }
diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc
index 63435e87c..5ffc02d34 100644
--- a/src/dev/ide_ctrl.cc
+++ b/src/dev/ide_ctrl.cc
@@ -227,177 +227,143 @@ IdeController::setDmaComplete(IdeDisk *disk)
 // Read and write handling
 ////
 
-void
-IdeController::readConfig(int offset, uint8_t *data)
+Tick
+IdeController::readConfig(Packet *pkt)
 {
-    if (offset < PCI_DEVICE_SPECIFIC) {
-        PciDev::readConfig(offset, data);
-    } else if (offset >= IDE_CTRL_CONF_START &&
-               (offset + 1) <= IDE_CTRL_CONF_END) {
+    int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
+    if (offset < PCI_DEVICE_SPECIFIC)
+        return  PciDev::readConfig(pkt);
+    assert(offset >= IDE_CTRL_CONF_START && (offset + 1) <= IDE_CTRL_CONF_END);
 
+    pkt->allocate();
+
+    switch (pkt->getSize()) {
+      case sizeof(uint8_t):
         switch (offset) {
           case IDE_CTRL_CONF_DEV_TIMING:
-            *data = config_regs.sidetim;
+            pkt->set<uint8_t>(config_regs.sidetim);
             break;
           case IDE_CTRL_CONF_UDMA_CNTRL:
-            *data = config_regs.udmactl;
+            pkt->set<uint8_t>(config_regs.udmactl);
             break;
           case IDE_CTRL_CONF_PRIM_TIMING+1:
-            *data = htole(config_regs.idetim0) >> 8;
+            pkt->set<uint8_t>(htole(config_regs.idetim0) >> 8);
             break;
           case IDE_CTRL_CONF_SEC_TIMING+1:
-            *data = htole(config_regs.idetim1) >> 8;
+            pkt->set<uint8_t>(htole(config_regs.idetim1) >> 8);
             break;
           case IDE_CTRL_CONF_IDE_CONFIG:
-            *data = htole(config_regs.ideconfig) & 0xFF;
+            pkt->set<uint8_t>(htole(config_regs.ideconfig) & 0xFF);
             break;
           case IDE_CTRL_CONF_IDE_CONFIG+1:
-            *data = htole(config_regs.ideconfig) >> 8;
+            pkt->set<uint8_t>(htole(config_regs.ideconfig) >> 8);
             break;
           default:
             panic("Invalid PCI configuration read for size 1 at offset: %#x!\n",
                     offset);
         }
-
-    } else {
-        panic("Read of unimplemented PCI config. register: %x\n", offset);
-    }
-    DPRINTF(IdeCtrl, "PCI read offset: %#x size: 1 data: %#x\n",
-                offset, (uint32_t)*data);
-}
-
-void
-IdeController::readConfig(int offset, uint16_t *data)
-{
-    if (offset < PCI_DEVICE_SPECIFIC) {
-        PciDev::readConfig(offset, data);
-    } else if (offset >= IDE_CTRL_CONF_START &&
-               (offset + 2) <= IDE_CTRL_CONF_END) {
-
+        DPRINTF(IdeCtrl, "PCI read offset: %#x size: 1 data: %#x\n", offset,
+                (uint32_t)pkt->get<uint8_t>());
+        break;
+      case sizeof(uint16_t):
         switch (offset) {
           case IDE_CTRL_CONF_PRIM_TIMING:
-            *data = config_regs.idetim0;
+            pkt->set<uint16_t>(config_regs.idetim0);
             break;
           case IDE_CTRL_CONF_SEC_TIMING:
-            *data = config_regs.idetim1;
+            pkt->set<uint16_t>(config_regs.idetim1);
             break;
           case IDE_CTRL_CONF_UDMA_TIMING:
-            *data = config_regs.udmatim;
+            pkt->set<uint16_t>(config_regs.udmatim);
             break;
           case IDE_CTRL_CONF_IDE_CONFIG:
-            *data = config_regs.ideconfig;
+            pkt->set<uint16_t>(config_regs.ideconfig);
             break;
           default:
             panic("Invalid PCI configuration read for size 2 offset: %#x!\n",
                     offset);
         }
-
-    } else {
-        panic("Read of unimplemented PCI config. register: %x\n", offset);
+        DPRINTF(IdeCtrl, "PCI read offset: %#x size: 2 data: %#x\n", offset,
+                (uint32_t)pkt->get<uint16_t>());
+        break;
+      case sizeof(uint32_t):
+        panic("No 32bit reads implemented for this device.");
+        DPRINTF(IdeCtrl, "PCI read offset: %#x size: 4 data: %#x\n", offset,
+                (uint32_t)pkt->get<uint32_t>());
+        break;
+      default:
+        panic("invalid access size(?) for PCI configspace!\n");
     }
-    DPRINTF(IdeCtrl, "PCI read offset: %#x size: 2 data: %#x\n", offset, *data);
-}
+    pkt->result = Packet::Success;
+    return configDelay;
 
-void
-IdeController::readConfig(int offset, uint32_t *data)
-{
-    if (offset < PCI_DEVICE_SPECIFIC) {
-        PciDev::readConfig(offset, data);
-    } else {
-        panic("Read of unimplemented PCI config. register: %x\n", offset);
-    }
-    DPRINTF(IdeCtrl, "PCI read offset: %#x size: 4 data: %#x\n", offset, *data);
 }
-void
-IdeController::writeConfig(int offset, const uint8_t data)
-{
-    if (offset < PCI_DEVICE_SPECIFIC) {
-        PciDev::writeConfig(offset, data);
-    } else if (offset >= IDE_CTRL_CONF_START &&
-               (offset + 1) <= IDE_CTRL_CONF_END) {
 
-        switch (offset) {
-          case IDE_CTRL_CONF_DEV_TIMING:
-            config_regs.sidetim = data;
-            break;
-          case IDE_CTRL_CONF_UDMA_CNTRL:
-            config_regs.udmactl = data;
-            break;
-          case IDE_CTRL_CONF_IDE_CONFIG:
-            config_regs.ideconfig = (config_regs.ideconfig & 0xFF00) | (data);
-            break;
-          case IDE_CTRL_CONF_IDE_CONFIG+1:
-            config_regs.ideconfig = (config_regs.ideconfig & 0x00FF) | data << 8;
-            break;
-          default:
-            panic("Invalid PCI configuration write for size 1 offset: %#x!\n",
-                    offset);
-        }
 
-    } else {
-        panic("Read of unimplemented PCI config. register: %x\n", offset);
-    }
-    DPRINTF(IdeCtrl, "PCI write offset: %#x size: 1 data: %#x\n",
-                offset, (uint32_t)data);
-}
-
-void
-IdeController::writeConfig(int offset, const uint16_t data)
+Tick
+IdeController::writeConfig(Packet *pkt)
 {
+    int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
     if (offset < PCI_DEVICE_SPECIFIC) {
-        PciDev::writeConfig(offset, data);
-    } else if (offset >= IDE_CTRL_CONF_START &&
-               (offset + 2) <= IDE_CTRL_CONF_END) {
+        PciDev::writeConfig(pkt);
+    } else {
+        assert(offset >= IDE_CTRL_CONF_START && (offset + 1) <= IDE_CTRL_CONF_END);
 
-        switch (offset) {
-          case IDE_CTRL_CONF_PRIM_TIMING:
-            config_regs.idetim0 = data;
-            break;
-          case IDE_CTRL_CONF_SEC_TIMING:
-            config_regs.idetim1 = data;
+        switch (pkt->getSize()) {
+          case sizeof(uint8_t):
+            switch (offset) {
+              case IDE_CTRL_CONF_DEV_TIMING:
+                config_regs.sidetim = pkt->get<uint8_t>();
+                break;
+              case IDE_CTRL_CONF_UDMA_CNTRL:
+                config_regs.udmactl = pkt->get<uint8_t>();
+                break;
+              case IDE_CTRL_CONF_IDE_CONFIG:
+                config_regs.ideconfig = (config_regs.ideconfig & 0xFF00) |
+                    (pkt->get<uint8_t>());
+                break;
+              case IDE_CTRL_CONF_IDE_CONFIG+1:
+                config_regs.ideconfig = (config_regs.ideconfig & 0x00FF) |
+                    pkt->get<uint8_t>() << 8;
+                break;
+              default:
+                panic("Invalid PCI configuration write for size 1 offset: %#x!\n",
+                        offset);
+            }
+            DPRINTF(IdeCtrl, "PCI write offset: %#x size: 1 data: %#x\n",
+                    offset, (uint32_t)pkt->get<uint8_t>());
             break;
-          case IDE_CTRL_CONF_UDMA_TIMING:
-            config_regs.udmatim = data;
+          case sizeof(uint16_t):
+            switch (offset) {
+              case IDE_CTRL_CONF_PRIM_TIMING:
+                config_regs.idetim0 = pkt->get<uint16_t>();
+                break;
+              case IDE_CTRL_CONF_SEC_TIMING:
+                config_regs.idetim1 = pkt->get<uint16_t>();
+                break;
+              case IDE_CTRL_CONF_UDMA_TIMING:
+                config_regs.udmatim = pkt->get<uint16_t>();
+                break;
+              case IDE_CTRL_CONF_IDE_CONFIG:
+                config_regs.ideconfig = pkt->get<uint16_t>();
+                break;
+              default:
+                panic("Invalid PCI configuration write for size 2 offset: %#x!\n",
+                        offset);
+            }
+            DPRINTF(IdeCtrl, "PCI write offset: %#x size: 2 data: %#x\n",
+                    offset, (uint32_t)pkt->get<uint16_t>());
             break;
-          case IDE_CTRL_CONF_IDE_CONFIG:
-            config_regs.ideconfig = data;
+          case sizeof(uint32_t):
+            panic("Write of unimplemented PCI config. register: %x\n", offset);
             break;
           default:
-            panic("Invalid PCI configuration write for size 2 offset: %#x!\n",
-                    offset);
+            panic("invalid access size(?) for PCI configspace!\n");
         }
-
-    } else {
-        panic("Write of unimplemented PCI config. register: %x\n", offset);
     }
-    DPRINTF(IdeCtrl, "PCI write offset: %#x size: 2 data: %#x\n", offset, data);
-
-    /* Trap command register writes and enable IO/BM as appropriate. */
-    if (offset == PCI_COMMAND) {
-        if (letoh(config.command) & PCI_CMD_IOSE)
-            io_enabled = true;
-        else
-            io_enabled = false;
-
-        if (letoh(config.command) & PCI_CMD_BME)
-            bm_enabled = true;
-        else
-            bm_enabled = false;
-    }
-
-}
-
-void
-IdeController::writeConfig(int offset, const uint32_t data)
-{
-    if (offset < PCI_DEVICE_SPECIFIC) {
-        PciDev::writeConfig(offset, data);
-    } else {
-        panic("Read of unimplemented PCI config. register: %x\n", offset);
-    }
-
-    DPRINTF(IdeCtrl, "PCI write offset: %#x size: 4 data: %#x\n", offset, data);
 
+    /* Trap command register writes and enable IO/BM as appropriate as well as
+     * BARs. */
     switch(offset) {
       case PCI0_BASE_ADDR0:
         if (BARAddrs[0] != 0)
@@ -423,9 +389,24 @@ IdeController::writeConfig(int offset, const uint32_t data)
         if (BARAddrs[4] != 0)
             bmi_addr = BARAddrs[4];
         break;
+
+      case PCI_COMMAND:
+        if (letoh(config.command) & PCI_CMD_IOSE)
+            io_enabled = true;
+        else
+            io_enabled = false;
+
+        if (letoh(config.command) & PCI_CMD_BME)
+            bm_enabled = true;
+        else
+            bm_enabled = false;
+        break;
     }
+    pkt->result = Packet::Success;
+    return configDelay;
 }
 
+
 Tick
 IdeController::read(Packet *pkt)
 {
@@ -770,7 +751,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(IdeController)
 
     SimObjectParam<System *> system;
     SimObjectParam<Platform *> platform;
-    SimObjectParam<PciConfigAll *> configspace;
     SimObjectParam<PciConfigData *> configdata;
     Param<uint32_t> pci_bus;
     Param<uint32_t> pci_dev;
@@ -784,7 +764,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(IdeController)
 
     INIT_PARAM(system, "System pointer"),
     INIT_PARAM(platform, "Platform pointer"),
-    INIT_PARAM(configspace, "PCI Configspace"),
     INIT_PARAM(configdata, "PCI Config data"),
     INIT_PARAM(pci_bus, "PCI bus ID"),
     INIT_PARAM(pci_dev, "PCI device number"),
@@ -800,7 +779,6 @@ CREATE_SIM_OBJECT(IdeController)
     params->name = getInstanceName();
     params->platform = platform;
     params->system = system;
-    params->configSpace = configspace;
     params->configData = configdata;
     params->busNum = pci_bus;
     params->deviceNum = pci_dev;
diff --git a/src/dev/ide_ctrl.hh b/src/dev/ide_ctrl.hh
index 1d30c8b31..5842d322e 100644
--- a/src/dev/ide_ctrl.hh
+++ b/src/dev/ide_ctrl.hh
@@ -204,12 +204,8 @@ class IdeController : public PciDev
     IdeController(Params *p);
     ~IdeController();
 
-    virtual void writeConfig(int offset, const uint8_t data);
-    virtual void writeConfig(int offset, const uint16_t data);
-    virtual void writeConfig(int offset, const uint32_t data);
-    virtual void readConfig(int offset, uint8_t *data);
-    virtual void readConfig(int offset, uint16_t *data);
-    virtual void readConfig(int offset, uint32_t *data);
+    virtual Tick writeConfig(Packet *pkt);
+    virtual Tick readConfig(Packet *pkt);
 
     void setDmaComplete(IdeDisk *disk);
 
diff --git a/src/dev/ide_disk.cc b/src/dev/ide_disk.cc
index dc78021f8..12564ddd0 100644
--- a/src/dev/ide_disk.cc
+++ b/src/dev/ide_disk.cc
@@ -318,7 +318,7 @@ IdeDisk::doDmaTransfer()
         panic("Inconsistent DMA transfer state: dmaState = %d devState = %d\n",
               dmaState, devState);
 
-    if (ctrl->dmaPending()) {
+    if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) {
         dmaTransferEvent.schedule(curTick + DMA_BACKOFF_PERIOD);
         return;
     } else
@@ -398,8 +398,7 @@ IdeDisk::doDmaRead()
                 curPrd.getByteCount(), TheISA::PageBytes);
 
     }
-    if (ctrl->dmaPending()) {
-        panic("shouldn't be reentant??");
+    if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) {
         dmaReadWaitEvent.schedule(curTick + DMA_BACKOFF_PERIOD);
         return;
     } else if (!dmaReadCG->done()) {
@@ -474,8 +473,7 @@ IdeDisk::doDmaWrite()
         dmaWriteCG = new ChunkGenerator(curPrd.getBaseAddr(),
                 curPrd.getByteCount(), TheISA::PageBytes);
     }
-    if (ctrl->dmaPending()) {
-        panic("shouldn't be reentant??");
+    if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) {
         dmaWriteWaitEvent.schedule(curTick + DMA_BACKOFF_PERIOD);
         return;
     } else if (!dmaWriteCG->done()) {
diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc
index 485216874..660efabfd 100644
--- a/src/dev/io_device.cc
+++ b/src/dev/io_device.cc
@@ -32,10 +32,12 @@
 #include "base/trace.hh"
 #include "dev/io_device.hh"
 #include "sim/builder.hh"
+#include "sim/system.hh"
 
 
-PioPort::PioPort(PioDevice *dev, Platform *p)
-    : Port(dev->name() + "-pioport"), device(dev), platform(p)
+PioPort::PioPort(PioDevice *dev, System *s, std::string pname)
+    : Port(dev->name() + pname), device(dev), sys(s),
+      outTiming(0), drainEvent(NULL)
 { }
 
 
@@ -62,34 +64,68 @@ PioPort::getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop)
 void
 PioPort::recvRetry()
 {
-    Packet* pkt = transmitList.front();
-    if (Port::sendTiming(pkt)) {
-        transmitList.pop_front();
+    bool result = true;
+    while (result && transmitList.size()) {
+        result = Port::sendTiming(transmitList.front());
+        if (result)
+            transmitList.pop_front();
     }
+   if (transmitList.size() == 0 && drainEvent) {
+       drainEvent->process();
+       drainEvent = NULL;
+   }
 }
 
-
 void
 PioPort::SendEvent::process()
 {
+    port->outTiming--;
+    assert(port->outTiming >= 0);
     if (port->Port::sendTiming(packet))
-        return;
+       if (port->transmitList.size() == 0 && port->drainEvent) {
+           port->drainEvent->process();
+           port->drainEvent = NULL;
+       }
+       return;
 
     port->transmitList.push_back(packet);
 }
 
+void
+PioPort::resendNacked(Packet *pkt) {
+    pkt->reinitNacked();
+    if (transmitList.size()) {
+         transmitList.push_front(pkt);
+    } else {
+        if (!Port::sendTiming(pkt))
+            transmitList.push_front(pkt);
+    }
+};
 
 
 bool
 PioPort::recvTiming(Packet *pkt)
 {
-    Tick latency = device->recvAtomic(pkt);
-    // turn packet around to go back to requester
-    pkt->makeTimingResponse();
-    sendTiming(pkt, latency);
+    if (pkt->result == Packet::Nacked) {
+        resendNacked(pkt);
+    } else {
+        Tick latency = device->recvAtomic(pkt);
+        // turn packet around to go back to requester
+        pkt->makeTimingResponse();
+        sendTiming(pkt, latency);
+    }
     return true;
 }
 
+unsigned int
+PioPort::drain(Event *de)
+{
+    if (outTiming == 0 && transmitList.size() == 0)
+        return 0;
+    drainEvent = de;
+    return 1;
+}
+
 PioDevice::~PioDevice()
 {
     if (pioPort)
@@ -104,6 +140,19 @@ PioDevice::init()
     pioPort->sendStatusChange(Port::RangeChange);
 }
 
+
+unsigned int
+PioDevice::drain(Event *de)
+{
+    unsigned int count;
+    count = pioPort->drain(de);
+    if (count)
+        changeState(Draining);
+    else
+        changeState(Drained);
+    return count;
+}
+
 void
 BasicPioDevice::addressRanges(AddrRangeList &range_list)
 {
@@ -113,8 +162,9 @@ BasicPioDevice::addressRanges(AddrRangeList &range_list)
 }
 
 
-DmaPort::DmaPort(DmaDevice *dev, Platform *p)
-    : Port(dev->name() + "-dmaport"), device(dev), platform(p), pendingCount(0)
+DmaPort::DmaPort(DmaDevice *dev, System *s)
+    : Port(dev->name() + "-dmaport"), device(dev), sys(s), pendingCount(0),
+      actionInProgress(0), drainEvent(NULL)
 { }
 
 bool
@@ -144,6 +194,11 @@ DmaPort::recvTiming(Packet *pkt)
         }
         delete pkt->req;
         delete pkt;
+
+        if (pendingCount == 0 && drainEvent) {
+            drainEvent->process();
+            drainEvent = NULL;
+        }
     }  else {
         panic("Got packet without sender state... huh?\n");
     }
@@ -155,6 +210,29 @@ DmaDevice::DmaDevice(Params *p)
     : PioDevice(p), dmaPort(NULL)
 { }
 
+
+unsigned int
+DmaDevice::drain(Event *de)
+{
+    unsigned int count;
+    count = pioPort->drain(de) + dmaPort->drain(de);
+    if (count)
+        changeState(Draining);
+    else
+        changeState(Drained);
+    return count;
+}
+
+unsigned int
+DmaPort::drain(Event *de)
+{
+    if (pendingCount == 0)
+        return 0;
+    drainEvent = de;
+    return 1;
+}
+
+
 void
 DmaPort::recvRetry()
 {
@@ -180,6 +258,8 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
 {
     assert(event);
 
+    assert(device->getState() == SimObject::Running);
+
     DmaReqState *reqState = new DmaReqState(event, this, size);
 
     for (ChunkGenerator gen(addr, size, peerBlockSize());
@@ -197,51 +277,54 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
             pendingCount++;
             sendDma(pkt);
     }
+
 }
 
 
 void
 DmaPort::sendDma(Packet *pkt, bool front)
 {
-   // some kind of selction between access methods
-   // more work is going to have to be done to make
-   // switching actually work
-  /* MemState state = device->platform->system->memState;
-
-   if (state == Timing) {  */
-       DPRINTF(DMA, "Attempting to send Packet %#x with addr: %#x\n",
-               pkt, pkt->getAddr());
-       if (transmitList.size() || !sendTiming(pkt)) {
-           if (front)
-               transmitList.push_front(pkt);
-           else
-               transmitList.push_back(pkt);
-           DPRINTF(DMA, "-- Failed: queued\n");
-       } else {
-           DPRINTF(DMA, "-- Done\n");
-       }
-  /*  } else if (state == Atomic) {
-       sendAtomic(pkt);
-       if (pkt->senderState) {
-           DmaReqState *state = dynamic_cast<DmaReqState*>(pkt->senderState);
-           assert(state);
-           state->completionEvent->schedule(curTick + (pkt->time -
-           pkt->req->getTime()) +1);
-           delete state;
-       }
-       pendingCount--;
-       assert(pendingCount >= 0);
-       delete pkt->req;
-       delete pkt;
-
-   } else if (state == Functional) {
-       sendFunctional(pkt);
-       // Is this correct???
-       completionEvent->schedule(pkt->req->responseTime - pkt->req->requestTime);
-       completionEvent == NULL;
+    // some kind of selction between access methods
+    // more work is going to have to be done to make
+    // switching actually work
+
+    System::MemoryMode state = sys->getMemoryMode();
+    if (state == System::Timing) {
+        DPRINTF(DMA, "Attempting to send Packet %#x with addr: %#x\n",
+                pkt, pkt->getAddr());
+        if (transmitList.size() || !sendTiming(pkt)) {
+            if (front)
+                transmitList.push_front(pkt);
+            else
+                transmitList.push_back(pkt);
+            DPRINTF(DMA, "-- Failed: queued\n");
+        } else {
+            DPRINTF(DMA, "-- Done\n");
+        }
+    } else if (state == System::Atomic) {
+        Tick lat;
+        lat = sendAtomic(pkt);
+        assert(pkt->senderState);
+        DmaReqState *state = dynamic_cast<DmaReqState*>(pkt->senderState);
+        assert(state);
+
+        state->numBytes += pkt->req->getSize();
+        if (state->totBytes == state->numBytes) {
+            state->completionEvent->schedule(curTick + lat);
+            delete state;
+            delete pkt->req;
+        }
+        pendingCount--;
+        assert(pendingCount >= 0);
+        delete pkt;
+
+        if (pendingCount == 0 && drainEvent) {
+            drainEvent->process();
+            drainEvent = NULL;
+        }
+
    } else
        panic("Unknown memory command state.");
-  */
 }
 
 DmaDevice::~DmaDevice()
diff --git a/src/dev/io_device.hh b/src/dev/io_device.hh
index 195ca0fb7..fa3f98247 100644
--- a/src/dev/io_device.hh
+++ b/src/dev/io_device.hh
@@ -60,9 +60,9 @@ class PioPort : public Port
     /** The device that this port serves. */
     PioDevice *device;
 
-    /** The platform that device/port are in. This is used to select which mode
+    /** The system that device/port are in. This is used to select which mode
      * we are currently operating in. */
-    Platform *platform;
+    System *sys;
 
     /** A list of outgoing timing response packets that haven't been serviced
      * yet. */
@@ -82,6 +82,8 @@ class PioPort : public Port
 
     virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop);
 
+    void resendNacked(Packet *pkt);
+
     /**
      * This class is used to implemented sendTiming() with a delay. When a delay
      * is requested a new event is created. When the event time expires it
@@ -104,55 +106,72 @@ class PioPort : public Port
         friend class PioPort;
     };
 
+    /** Number of timing requests that are emulating the device timing before
+     * attempting to end up on the bus.
+     */
+    int outTiming;
+
+    /** If we need to drain, keep the drain event around until we're done
+     * here.*/
+    Event *drainEvent;
+
     /** Schedule a sendTiming() event to be called in the future. */
     void sendTiming(Packet *pkt, Tick time)
-    { new PioPort::SendEvent(this, pkt, time); }
+    { outTiming++; new PioPort::SendEvent(this, pkt, time); }
 
     /** This function is notification that the device should attempt to send a
      * packet again. */
     virtual void recvRetry();
 
   public:
-    PioPort(PioDevice *dev, Platform *p);
+    PioPort(PioDevice *dev, System *s, std::string pname = "-pioport");
+
+    unsigned int drain(Event *de);
 
   friend class PioPort::SendEvent;
 };
 
 
-struct DmaReqState : public Packet::SenderState
+class DmaPort : public Port
 {
-    /** Event to call on the device when this transaction (all packets)
-     * complete. */
-    Event *completionEvent;
+  protected:
+    struct DmaReqState : public Packet::SenderState
+    {
+        /** Event to call on the device when this transaction (all packets)
+         * complete. */
+        Event *completionEvent;
 
-    /** Where we came from for some sanity checking. */
-    Port *outPort;
+        /** Where we came from for some sanity checking. */
+        Port *outPort;
 
-    /** Total number of bytes that this transaction involves. */
-    Addr totBytes;
+        /** Total number of bytes that this transaction involves. */
+        Addr totBytes;
 
-    /** Number of bytes that have been acked for this transaction. */
-    Addr numBytes;
+        /** Number of bytes that have been acked for this transaction. */
+        Addr numBytes;
 
-    bool final;
-    DmaReqState(Event *ce, Port *p, Addr tb)
-        : completionEvent(ce), outPort(p), totBytes(tb), numBytes(0)
-    {}
-};
+        DmaReqState(Event *ce, Port *p, Addr tb)
+            : completionEvent(ce), outPort(p), totBytes(tb), numBytes(0)
+        {}
+    };
 
-class DmaPort : public Port
-{
-  protected:
     DmaDevice *device;
     std::list<Packet*> transmitList;
 
-    /** The platform that device/port are in. This is used to select which mode
+    /** The system that device/port are in. This is used to select which mode
      * we are currently operating in. */
-    Platform *platform;
+    System *sys;
 
     /** Number of outstanding packets the dma port has. */
     int pendingCount;
 
+    /** If a dmaAction is in progress. */
+    int actionInProgress;
+
+    /** If we need to drain, keep the drain event around until we're done
+     * here.*/
+    Event *drainEvent;
+
     virtual bool recvTiming(Packet *pkt);
     virtual Tick recvAtomic(Packet *pkt)
     { panic("dma port shouldn't be used for pio access."); }
@@ -170,13 +189,14 @@ class DmaPort : public Port
     void sendDma(Packet *pkt, bool front = false);
 
   public:
-    DmaPort(DmaDevice *dev, Platform *p);
+    DmaPort(DmaDevice *dev, System *s);
 
     void dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
                    uint8_t *data = NULL);
 
     bool dmaPending() { return pendingCount > 0; }
 
+    unsigned int drain(Event *de);
 };
 
 /**
@@ -195,6 +215,8 @@ class PioDevice : public MemObject
      * transaction we should perform. */
     Platform *platform;
 
+    System *sys;
+
     /** The pioPort that handles the requests for us and provides us requests
      * that it sees. */
     PioPort *pioPort;
@@ -239,20 +261,22 @@ class PioDevice : public MemObject
     const Params *params() const { return _params; }
 
     PioDevice(Params *p)
-              : MemObject(p->name),  platform(p->platform), pioPort(NULL),
-                _params(p)
+              : MemObject(p->name),  platform(p->platform), sys(p->system),
+              pioPort(NULL), _params(p)
               {}
 
     virtual ~PioDevice();
 
     virtual void init();
 
-    virtual Port *getPort(const std::string &if_name)
+    virtual unsigned int drain(Event *de);
+
+    virtual Port *getPort(const std::string &if_name, int idx = -1)
     {
         if (if_name == "pio") {
             if (pioPort != NULL)
                 panic("pio port already connected to.");
-            pioPort = new PioPort(this, params()->platform);
+            pioPort = new PioPort(this, sys);
             return pioPort;
         } else
             return NULL;
@@ -309,17 +333,19 @@ class DmaDevice : public PioDevice
 
     bool dmaPending() { return dmaPort->dmaPending(); }
 
-    virtual Port *getPort(const std::string &if_name)
+    virtual unsigned int drain(Event *de);
+
+    virtual Port *getPort(const std::string &if_name, int idx = -1)
     {
         if (if_name == "pio") {
             if (pioPort != NULL)
                 panic("pio port already connected to.");
-            pioPort = new PioPort(this, params()->platform);
+            pioPort = new PioPort(this, sys);
             return pioPort;
         } else if (if_name == "dma") {
             if (dmaPort != NULL)
                 panic("dma port already connected to.");
-            dmaPort = new DmaPort(this, params()->platform);
+            dmaPort = new DmaPort(this, sys);
             return dmaPort;
         } else
             return NULL;
diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc
index decffaf73..bf2279d93 100644
--- a/src/dev/ns_gige.cc
+++ b/src/dev/ns_gige.cc
@@ -25,7 +25,8 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Lisa Hsu
+ * Authors: Nathan Binkert
+ *          Lisa Hsu
  */
 
 /** @file
@@ -464,11 +465,12 @@ NSGigE::regStats()
 /**
  * This is to write to the PCI general configuration registers
  */
-void
-NSGigE::writeConfig(int offset, const uint16_t data)
+Tick
+NSGigE::writeConfig(Packet *pkt)
 {
+    int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
     if (offset < PCI_DEVICE_SPECIFIC)
-        PciDev::writeConfig(offset,  data);
+        PciDev::writeConfig(pkt);
     else
         panic("Device specific PCI config space not implemented!\n");
 
@@ -483,6 +485,8 @@ NSGigE::writeConfig(int offset, const uint16_t data)
             ioEnable = false;
         break;
     }
+    pkt->result = Packet::Success;
+    return configDelay;
 }
 
 /**
@@ -507,14 +511,7 @@ NSGigE::read(Packet *pkt)
     if (daddr > LAST && daddr <=  RESERVED) {
         panic("Accessing reserved register");
     } else if (daddr > RESERVED && daddr <= 0x3FC) {
-        if (pkt->getSize() == sizeof(uint8_t))
-            readConfig(daddr & 0xff, pkt->getPtr<uint8_t>());
-        if (pkt->getSize() == sizeof(uint16_t))
-            readConfig(daddr & 0xff, pkt->getPtr<uint16_t>());
-        if (pkt->getSize() == sizeof(uint32_t))
-            readConfig(daddr & 0xff, pkt->getPtr<uint32_t>());
-        pkt->result = Packet::Success;
-        return pioDelay;
+        return readConfig(pkt);
     } else if (daddr >= MIB_START && daddr <= MIB_END) {
         // don't implement all the MIB's.  hopefully the kernel
         // doesn't actually DEPEND upon their values
@@ -732,14 +729,7 @@ NSGigE::write(Packet *pkt)
     if (daddr > LAST && daddr <=  RESERVED) {
         panic("Accessing reserved register");
     } else if (daddr > RESERVED && daddr <= 0x3FC) {
-        if (pkt->getSize() == sizeof(uint8_t))
-            writeConfig(daddr & 0xff, pkt->get<uint8_t>());
-        if (pkt->getSize() == sizeof(uint16_t))
-            writeConfig(daddr & 0xff, pkt->get<uint16_t>());
-        if (pkt->getSize() == sizeof(uint32_t))
-            writeConfig(daddr & 0xff, pkt->get<uint32_t>());
-        pkt->result = Packet::Success;
-        return pioDelay;
+        return writeConfig(pkt);
     } else if (daddr > 0x3FC)
         panic("Something is messed up!\n");
 
@@ -1387,7 +1377,7 @@ NSGigE::doRxDmaRead()
     assert(rxDmaState == dmaIdle || rxDmaState == dmaReadWaiting);
     rxDmaState = dmaReading;
 
-    if (dmaPending())
+    if (dmaPending() || getState() != Running)
         rxDmaState = dmaReadWaiting;
     else
         dmaRead(rxDmaAddr, rxDmaLen, &rxDmaReadEvent, (uint8_t*)rxDmaData);
@@ -1418,7 +1408,7 @@ NSGigE::doRxDmaWrite()
     assert(rxDmaState == dmaIdle || rxDmaState == dmaWriteWaiting);
     rxDmaState = dmaWriting;
 
-    if (dmaPending())
+    if (dmaPending() || getState() != Running)
         rxDmaState = dmaWriteWaiting;
     else
         dmaWrite(rxDmaAddr, rxDmaLen, &rxDmaWriteEvent, (uint8_t*)rxDmaData);
@@ -1836,7 +1826,7 @@ NSGigE::doTxDmaRead()
     assert(txDmaState == dmaIdle || txDmaState == dmaReadWaiting);
     txDmaState = dmaReading;
 
-    if (dmaPending())
+    if (dmaPending() || getState() != Running)
         txDmaState = dmaReadWaiting;
     else
         dmaRead(txDmaAddr, txDmaLen, &txDmaReadEvent, (uint8_t*)txDmaData);
@@ -1867,7 +1857,7 @@ NSGigE::doTxDmaWrite()
     assert(txDmaState == dmaIdle || txDmaState == dmaWriteWaiting);
     txDmaState = dmaWriting;
 
-    if (dmaPending())
+    if (dmaPending() || getState() != Running)
         txDmaState = dmaWriteWaiting;
     else
         dmaWrite(txDmaAddr, txDmaLen, &txDmaWriteEvent, (uint8_t*)txDmaData);
@@ -2416,6 +2406,20 @@ NSGigE::recvPacket(EthPacketPtr packet)
     return true;
 }
 
+
+void
+NSGigE::resume()
+{
+    SimObject::resume();
+
+    // During drain we could have left the state machines in a waiting state and
+    // they wouldn't get out until some other event occured to kick them.
+    // This way they'll get out immediately
+    txKick();
+    rxKick();
+}
+
+
 //=====================================================================
 //
 //
@@ -2806,7 +2810,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(NSGigE)
 
     SimObjectParam<System *> system;
     SimObjectParam<Platform *> platform;
-    SimObjectParam<PciConfigAll *> configspace;
     SimObjectParam<PciConfigData *> configdata;
     Param<uint32_t> pci_bus;
     Param<uint32_t> pci_dev;
@@ -2840,7 +2843,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(NSGigE)
 
     INIT_PARAM(system, "System pointer"),
     INIT_PARAM(platform, "Platform pointer"),
-    INIT_PARAM(configspace, "PCI Configspace"),
     INIT_PARAM(configdata, "PCI Config data"),
     INIT_PARAM(pci_bus, "PCI bus ID"),
     INIT_PARAM(pci_dev, "PCI device number"),
@@ -2878,7 +2880,6 @@ CREATE_SIM_OBJECT(NSGigE)
     params->name = getInstanceName();
     params->platform = platform;
     params->system = system;
-    params->configSpace = configspace;
     params->configData = configdata;
     params->busNum = pci_bus;
     params->deviceNum = pci_dev;
diff --git a/src/dev/ns_gige.hh b/src/dev/ns_gige.hh
index 2de11c951..080c0b1f3 100644
--- a/src/dev/ns_gige.hh
+++ b/src/dev/ns_gige.hh
@@ -25,7 +25,8 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Lisa Hsu
+ * Authors: Nathan Binkert
+ *          Lisa Hsu
  */
 
 /** @file
@@ -113,7 +114,6 @@ struct dp_rom {
 
 class NSGigEInt;
 class Packet;
-class PciConfigAll;
 
 /**
  * NS DP83820 Ethernet device model
@@ -375,7 +375,7 @@ class NSGigE : public PciDev
     ~NSGigE();
     const Params *params() const { return (const Params *)_params; }
 
-    virtual void writeConfig(int offset, const uint16_t data);
+    virtual Tick writeConfig(Packet *pkt);
 
     virtual Tick read(Packet *pkt);
     virtual Tick write(Packet *pkt);
@@ -391,6 +391,8 @@ class NSGigE : public PciDev
     virtual void serialize(std::ostream &os);
     virtual void unserialize(Checkpoint *cp, const std::string &section);
 
+    virtual void resume();
+
   public:
     void regStats();
 
diff --git a/src/dev/pciconfigall.cc b/src/dev/pciconfigall.cc
index 785774ff4..68013eab8 100644
--- a/src/dev/pciconfigall.cc
+++ b/src/dev/pciconfigall.cc
@@ -33,14 +33,8 @@
  * PCI Configspace implementation
  */
 
-#include <deque>
-#include <string>
-#include <vector>
-#include <bitset>
-
 #include "base/trace.hh"
 #include "dev/pciconfigall.hh"
-#include "dev/pcidev.hh"
 #include "dev/pcireg.h"
 #include "dev/platform.hh"
 #include "mem/packet.hh"
@@ -50,151 +44,61 @@
 using namespace std;
 
 PciConfigAll::PciConfigAll(Params *p)
-    : BasicPioDevice(p)
+    : PioDevice(p)
 {
-    pioSize = 0xffffff;
-
-    // Set backpointer for pci config. Really the config stuff should be able to
-    // automagically do this
-    p->platform->pciconfig = this;
-
-    // Make all the pointers to devices null
-    for(int x=0; x < MAX_PCI_DEV; x++)
-        for(int y=0; y < MAX_PCI_FUNC; y++)
-            devices[x][y] = NULL;
+    pioAddr = p->platform->calcConfigAddr(params()->bus,0,0);
 }
 
-// If two interrupts share the same line largely bad things will happen.
-// Since we don't track how many times an interrupt was set and correspondingly
-// cleared two devices on the same interrupt line and assert and deassert each
-// others interrupt "line". Interrupts will not work correctly.
-void
-PciConfigAll::startup()
-{
-    bitset<256> intLines;
-    PciDev *tempDev;
-    uint8_t intline;
-
-    for (int x = 0; x < MAX_PCI_DEV; x++) {
-        for (int y = 0; y < MAX_PCI_FUNC; y++) {
-           if (devices[x][y] != NULL) {
-               tempDev = devices[x][y];
-               intline = tempDev->interruptLine();
-               if (intLines.test(intline))
-                   warn("Interrupt line %#X is used multiple times"
-                        "(You probably want to fix this).\n", (uint32_t)intline);
-               else
-                   intLines.set(intline);
-           } // devices != NULL
-        } // PCI_FUNC
-    } // PCI_DEV
-
-}
 
 Tick
 PciConfigAll::read(Packet *pkt)
 {
     assert(pkt->result == Packet::Unknown);
-    assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
-
-    Addr daddr = pkt->getAddr() - pioAddr;
-    int device = (daddr >> 11) & 0x1F;
-    int func = (daddr >> 8) & 0x7;
-    int reg = daddr & 0xFF;
 
     pkt->allocate();
 
-    DPRINTF(PciConfigAll, "read  va=%#x da=%#x size=%d\n", pkt->getAddr(), daddr,
+    DPRINTF(PciConfigAll, "read  va=%#x size=%d\n", pkt->getAddr(),
             pkt->getSize());
 
     switch (pkt->getSize()) {
       case sizeof(uint32_t):
-         if (devices[device][func] == NULL)
-             pkt->set<uint32_t>(0xFFFFFFFF);
-         else
-             devices[device][func]->readConfig(reg, pkt->getPtr<uint32_t>());
+         pkt->set<uint32_t>(0xFFFFFFFF);
          break;
       case sizeof(uint16_t):
-         if (devices[device][func] == NULL)
-             pkt->set<uint16_t>(0xFFFF);
-         else
-             devices[device][func]->readConfig(reg, pkt->getPtr<uint16_t>());
+         pkt->set<uint16_t>(0xFFFF);
          break;
       case sizeof(uint8_t):
-         if (devices[device][func] == NULL)
-             pkt->set<uint8_t>(0xFF);
-         else
-             devices[device][func]->readConfig(reg, pkt->getPtr<uint8_t>());
+         pkt->set<uint8_t>(0xFF);
          break;
       default:
         panic("invalid access size(?) for PCI configspace!\n");
     }
     pkt->result = Packet::Success;
-    return pioDelay;
+    return params()->pio_delay;
 }
 
 Tick
 PciConfigAll::write(Packet *pkt)
 {
     assert(pkt->result == Packet::Unknown);
-    assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
-    assert(pkt->getSize() == sizeof(uint8_t) || pkt->getSize() == sizeof(uint16_t) ||
-            pkt->getSize() == sizeof(uint32_t));
-    Addr daddr = pkt->getAddr() - pioAddr;
-
-    int device = (daddr >> 11) & 0x1F;
-    int func = (daddr >> 8) & 0x7;
-    int reg = daddr & 0xFF;
-
-    if (devices[device][func] == NULL)
-        panic("Attempting to write to config space on non-existant device\n");
-
-    DPRINTF(PciConfigAll, "write - va=%#x size=%d data=%#x\n",
-            pkt->getAddr(), pkt->getSize(), pkt->get<uint32_t>());
-
-    switch (pkt->getSize()) {
-      case sizeof(uint8_t):
-        devices[device][func]->writeConfig(reg, pkt->get<uint8_t>());
-        break;
-      case sizeof(uint16_t):
-        devices[device][func]->writeConfig(reg, pkt->get<uint16_t>());
-        break;
-      case sizeof(uint32_t):
-        devices[device][func]->writeConfig(reg, pkt->get<uint32_t>());
-        break;
-      default:
-        panic("invalid pci config write size\n");
-    }
-    pkt->result = Packet::Success;
-    return pioDelay;
+    panic("Attempting to write to config space on non-existant device\n");
 }
 
 void
-PciConfigAll::serialize(std::ostream &os)
+PciConfigAll::addressRanges(AddrRangeList &range_list)
 {
-    /*
-     * There is no state associated with this object that requires
-     * serialization.  The only real state are the device pointers
-     * which are all setup by the constructor of the PciDev class
-     */
+    range_list.clear();
+    range_list.push_back(RangeSize(pioAddr, params()->size));
 }
 
-void
-PciConfigAll::unserialize(Checkpoint *cp, const std::string &section)
-{
-    /*
-     * There is no state associated with this object that requires
-     * serialization.  The only real state are the device pointers
-     * which are all setup by the constructor of the PciDev class
-     */
-}
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 
 BEGIN_DECLARE_SIM_OBJECT_PARAMS(PciConfigAll)
 
-    Param<Addr> pio_addr;
     Param<Tick> pio_latency;
+    Param<int> bus;
+    Param<Addr> size;
     SimObjectParam<Platform *> platform;
     SimObjectParam<System *> system;
 
@@ -202,8 +106,9 @@ END_DECLARE_SIM_OBJECT_PARAMS(PciConfigAll)
 
 BEGIN_INIT_SIM_OBJECT_PARAMS(PciConfigAll)
 
-    INIT_PARAM(pio_addr, "Device Address"),
     INIT_PARAM(pio_latency, "Programmed IO latency"),
+    INIT_PARAM(bus, "Bus that this object handles config space for"),
+    INIT_PARAM(size, "The size of config space"),
     INIT_PARAM(platform, "platform"),
     INIT_PARAM(system, "system object")
 
@@ -211,11 +116,13 @@ END_INIT_SIM_OBJECT_PARAMS(PciConfigAll)
 
 CREATE_SIM_OBJECT(PciConfigAll)
 {
-    BasicPioDevice::Params *p = new BasicPioDevice::Params;
-    p->pio_addr = pio_addr;
+    PciConfigAll::Params *p = new PciConfigAll::Params;
     p->pio_delay = pio_latency;
     p->platform = platform;
     p->system = system;
+    p->bus = bus;
+    p->size = size;
+
     return new PciConfigAll(p);
 }
 
diff --git a/src/dev/pciconfigall.hh b/src/dev/pciconfigall.hh
index e60fd949b..07eaf8112 100644
--- a/src/dev/pciconfigall.hh
+++ b/src/dev/pciconfigall.hh
@@ -42,11 +42,6 @@
 #include "dev/io_device.hh"
 
 
-static const uint32_t MAX_PCI_DEV = 32;
-static const uint32_t MAX_PCI_FUNC = 8;
-
-class PciDev;
-
 /**
  * PCI Config Space
  * All of PCI config space needs to return -1 on Tsunami, except
@@ -54,16 +49,17 @@ class PciDev;
  * space and passes the requests on to TsunamiPCIDev devices as
  * appropriate.
  */
-class PciConfigAll : public BasicPioDevice
+class PciConfigAll : public PioDevice
 {
-  private:
-    /**
-      * Pointers to all the devices that are registered with this
-      * particular config space.
-      */
-    PciDev* devices[MAX_PCI_DEV][MAX_PCI_FUNC];
-
   public:
+    struct Params :  public PioDevice::Params
+    {
+        Tick pio_delay;
+        Addr size;
+        int bus;
+    };
+    const Params *params() const { return (const Params *)_params; }
+
     /**
      * Constructor for PCIConfigAll
      * @param p parameters structure
@@ -71,28 +67,10 @@ class PciConfigAll : public BasicPioDevice
     PciConfigAll(Params *p);
 
     /**
-     * Check if a device exists.
-     * @param pcidev PCI device to check
-     * @param pcifunc PCI function to check
-     * @return true if device exists, false otherwise
-     */
-    bool deviceExists(uint32_t pcidev, uint32_t pcifunc)
-                     { return devices[pcidev][pcifunc] != NULL ? true : false; }
-
-    /**
-     * Registers a device with the config space object.
-     * @param pcidev PCI device to register
-     * @param pcifunc PCI function to register
-     * @param device device to register
-     */
-    void registerDevice(uint8_t pcidev, uint8_t pcifunc, PciDev *device)
-                        { devices[pcidev][pcifunc] = device; }
-
-    /**
      * Read something in PCI config space. If the device does not exist
      * -1 is returned, if the device does exist its PciDev::ReadConfig (or the
      * virtual function that overrides) it is called.
-     * @param pkt Contains the address of the field to read.
+     * @param pkt Contains information about the read operation
      * @return Amount of time to do the read
      */
     virtual Tick read(Packet *pkt);
@@ -101,31 +79,17 @@ class PciConfigAll : public BasicPioDevice
      * Write to PCI config spcae. If the device does not exit the simulator
      * panics. If it does it is passed on the PciDev::WriteConfig (or the virtual
      * function that overrides it).
-     * @param req Contains the address to write to.
-     * @param data The data to write.
-     * @return The fault condition of the access.
+     * @param pkt Contains information about the write operation
+     * @return Amount of time to do the read
      */
 
     virtual Tick write(Packet *pkt);
 
-    /**
-     * Start up function to check if more than one person is using an interrupt line
-     * and print a warning if such a case exists
-     */
-    virtual void startup();
+    void addressRanges(AddrRangeList &range_list);
 
-    /**
-     * Serialize this object to the given output stream.
-     * @param os The stream to serialize to.
-     */
-    virtual void serialize(std::ostream &os);
+  private:
+    Addr pioAddr;
 
-    /**
-     * Reconstruct the state of this object from a checkpoint.
-     * @param cp The checkpoint use.
-     * @param section The section name of this object
-     */
-    virtual void unserialize(Checkpoint *cp, const std::string &section);
 };
 
 #endif // __PCICONFIGALL_HH__
diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc
index f8db2efbc..e81e0d1ee 100644
--- a/src/dev/pcidev.cc
+++ b/src/dev/pcidev.cc
@@ -53,201 +53,268 @@
 
 using namespace std;
 
-PciDev::PciDev(Params *p)
-    : DmaDevice(p), plat(p->platform), configData(p->configData),
-      pioDelay(p->pio_delay)
-{
-    // copy the config data from the PciConfigData object
-    if (configData) {
-        memcpy(config.data, configData->config.data, sizeof(config.data));
-        memcpy(BARSize, configData->BARSize, sizeof(BARSize));
-        memcpy(BARAddrs, configData->BARAddrs, sizeof(BARAddrs));
-    } else
-        panic("NULL pointer to configuration data");
 
-    // Setup pointer in config space to point to this entry
-    if (p->configSpace->deviceExists(p->deviceNum, p->functionNum))
-        panic("Two PCI devices occuping same dev: %#x func: %#x",
-              p->deviceNum, p->functionNum);
-    else
-        p->configSpace->registerDevice(p->deviceNum, p->functionNum, this);
-}
-
-void
-PciDev::readConfig(int offset, uint8_t *data)
+PciDev::PciConfigPort::PciConfigPort(PciDev *dev, int busid, int devid,
+        int funcid, Platform *p)
+        : PioPort(dev,p->system,"-pciconf"), device(dev), platform(p),
+          busId(busid), deviceId(devid), functionId(funcid)
 {
-    if (offset >= PCI_DEVICE_SPECIFIC)
-        panic("Device specific PCI config space not implemented!\n");
+    configAddr = platform->calcConfigAddr(busId, deviceId, functionId);
+}
 
-    *data = config.data[offset];
 
-    DPRINTF(PCIDEV,
-            "read device: %#x function: %#x register: %#x 1 bytes: data: %#x\n",
-            params()->deviceNum, params()->functionNum, offset, *data);
+Tick
+PciDev::PciConfigPort::recvAtomic(Packet *pkt)
+{
+    assert(pkt->result == Packet::Unknown);
+    assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr +
+            PCI_CONFIG_SIZE);
+    return device->recvConfig(pkt);
 }
 
 void
-PciDev::addressRanges(AddrRangeList &range_list)
+PciDev::PciConfigPort::recvFunctional(Packet *pkt)
 {
-    int x = 0;
-    range_list.clear();
-    for (x = 0; x < 6; x++)
-        if (BARAddrs[x] != 0)
-            range_list.push_back(RangeSize(BARAddrs[x],BARSize[x]));
+    assert(pkt->result == Packet::Unknown);
+    assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr +
+            PCI_CONFIG_SIZE);
+    device->recvConfig(pkt);
 }
 
 void
-PciDev::readConfig(int offset, uint16_t *data)
+PciDev::PciConfigPort::getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop)
 {
-    if (offset >= PCI_DEVICE_SPECIFIC)
-        panic("Device specific PCI config space not implemented!\n");
+    snoop.clear();
+    resp.push_back(RangeSize(configAddr, PCI_CONFIG_SIZE+1));
+}
 
-    *data = *(uint16_t*)&config.data[offset];
 
-    DPRINTF(PCIDEV,
-            "read device: %#x function: %#x register: %#x 2 bytes: data: %#x\n",
-            params()->deviceNum, params()->functionNum, offset, *data);
+bool
+PciDev::PciConfigPort::recvTiming(Packet *pkt)
+{
+    if (pkt->result == Packet::Nacked) {
+        resendNacked(pkt);
+    } else {
+        assert(pkt->result == Packet::Unknown);
+        assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr +
+                PCI_CONFIG_SIZE);
+        Tick latency = device->recvConfig(pkt);
+        // turn packet around to go back to requester
+        pkt->makeTimingResponse();
+        sendTiming(pkt, latency);
+    }
+    return true;
 }
 
-void
-PciDev::readConfig(int offset, uint32_t *data)
+PciDev::PciDev(Params *p)
+    : DmaDevice(p), plat(p->platform), configData(p->configData),
+      pioDelay(p->pio_delay), configDelay(p->config_delay),
+      configPort(NULL)
 {
-    if (offset >= PCI_DEVICE_SPECIFIC)
-        panic("Device specific PCI config space not implemented!\n");
+    // copy the config data from the PciConfigData object
+    if (configData) {
+        memcpy(config.data, configData->config.data, sizeof(config.data));
+        memcpy(BARSize, configData->BARSize, sizeof(BARSize));
+        memcpy(BARAddrs, configData->BARAddrs, sizeof(BARAddrs));
+    } else
+        panic("NULL pointer to configuration data");
 
-    *data = *(uint32_t*)&config.data[offset];
+    plat->registerPciDevice(0, p->deviceNum, p->functionNum,
+            letoh(configData->config.interruptLine));
+}
 
-    DPRINTF(PCIDEV,
-            "read device: %#x function: %#x register: %#x 4 bytes: data: %#x\n",
-            params()->deviceNum, params()->functionNum, offset, *data);
+void
+PciDev::init()
+{
+    if (!configPort)
+        panic("pci config port not connected to anything!");
+   configPort->sendStatusChange(Port::RangeChange);
+   PioDevice::init();
 }
 
+unsigned int
+PciDev::drain(Event *de)
+{
+    unsigned int count;
+    count = pioPort->drain(de) + dmaPort->drain(de) + configPort->drain(de);
+    if (count)
+        changeState(Draining);
+    else
+        changeState(Drained);
+    return count;
+}
 
-void
-PciDev::writeConfig(int offset,  const uint8_t data)
+Tick
+PciDev::readConfig(Packet *pkt)
 {
+    int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
     if (offset >= PCI_DEVICE_SPECIFIC)
         panic("Device specific PCI config space not implemented!\n");
 
-    DPRINTF(PCIDEV,
-            "write device: %#x function: %#x reg: %#x size: 1 data: %#x\n",
-            params()->deviceNum, params()->functionNum, offset, data);
-
-    switch (offset) {
-      case PCI0_INTERRUPT_LINE:
-        config.interruptLine = data;
-      case PCI_CACHE_LINE_SIZE:
-        config.cacheLineSize = data;
-      case PCI_LATENCY_TIMER:
-        config.latencyTimer = data;
+    pkt->allocate();
+
+    switch (pkt->getSize()) {
+      case sizeof(uint8_t):
+        pkt->set<uint8_t>(config.data[offset]);
+        DPRINTF(PCIDEV,
+            "read device: %#x function: %#x register: %#x 1 bytes: data: %#x\n",
+            params()->deviceNum, params()->functionNum, offset,
+            (uint32_t)pkt->get<uint8_t>());
         break;
-      /* Do nothing for these read-only registers */
-      case PCI0_INTERRUPT_PIN:
-      case PCI0_MINIMUM_GRANT:
-      case PCI0_MAXIMUM_LATENCY:
-      case PCI_CLASS_CODE:
-      case PCI_REVISION_ID:
+      case sizeof(uint16_t):
+        pkt->set<uint16_t>(*(uint16_t*)&config.data[offset]);
+        DPRINTF(PCIDEV,
+            "read device: %#x function: %#x register: %#x 2 bytes: data: %#x\n",
+            params()->deviceNum, params()->functionNum, offset,
+            (uint32_t)pkt->get<uint16_t>());
+        break;
+      case sizeof(uint32_t):
+        pkt->set<uint32_t>(*(uint32_t*)&config.data[offset]);
+        DPRINTF(PCIDEV,
+            "read device: %#x function: %#x register: %#x 4 bytes: data: %#x\n",
+            params()->deviceNum, params()->functionNum, offset,
+            (uint32_t)pkt->get<uint32_t>());
         break;
       default:
-        panic("writing to a read only register");
+        panic("invalid access size(?) for PCI configspace!\n");
     }
+    pkt->result = Packet::Success;
+    return configDelay;
+
 }
 
 void
-PciDev::writeConfig(int offset, const uint16_t data)
+PciDev::addressRanges(AddrRangeList &range_list)
 {
-    if (offset >= PCI_DEVICE_SPECIFIC)
-        panic("Device specific PCI config space not implemented!\n");
-
-    DPRINTF(PCIDEV,
-            "write device: %#x function: %#x reg: %#x size: 2 data: %#x\n",
-            params()->deviceNum, params()->functionNum, offset, data);
-
-    switch (offset) {
-      case PCI_COMMAND:
-        config.command = data;
-      case PCI_STATUS:
-        config.status = data;
-      case PCI_CACHE_LINE_SIZE:
-        config.cacheLineSize = data;
-        break;
-      default:
-        panic("writing to a read only register");
-    }
+    int x = 0;
+    range_list.clear();
+    for (x = 0; x < 6; x++)
+        if (BARAddrs[x] != 0)
+            range_list.push_back(RangeSize(BARAddrs[x],BARSize[x]));
 }
 
-
-void
-PciDev::writeConfig(int offset, const uint32_t data)
+Tick
+PciDev::writeConfig(Packet *pkt)
 {
+    int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
     if (offset >= PCI_DEVICE_SPECIFIC)
         panic("Device specific PCI config space not implemented!\n");
 
-    DPRINTF(PCIDEV,
-            "write device: %#x function: %#x reg: %#x size: 4 data: %#x\n",
-            params()->deviceNum, params()->functionNum, offset, data);
-
-    switch (offset) {
-      case PCI0_BASE_ADDR0:
-      case PCI0_BASE_ADDR1:
-      case PCI0_BASE_ADDR2:
-      case PCI0_BASE_ADDR3:
-      case PCI0_BASE_ADDR4:
-      case PCI0_BASE_ADDR5:
-
-        uint32_t barnum, bar_mask;
-        Addr base_addr, base_size, space_base;
-
-        barnum = BAR_NUMBER(offset);
-
-        if (BAR_IO_SPACE(letoh(config.baseAddr[barnum]))) {
-            bar_mask = BAR_IO_MASK;
-            space_base = TSUNAMI_PCI0_IO;
-        } else {
-            bar_mask = BAR_MEM_MASK;
-            space_base = TSUNAMI_PCI0_MEMORY;
+    switch (pkt->getSize()) {
+      case sizeof(uint8_t):
+        switch (offset) {
+          case PCI0_INTERRUPT_LINE:
+            config.interruptLine = pkt->get<uint8_t>();
+          case PCI_CACHE_LINE_SIZE:
+            config.cacheLineSize = pkt->get<uint8_t>();
+          case PCI_LATENCY_TIMER:
+            config.latencyTimer = pkt->get<uint8_t>();
+            break;
+          /* Do nothing for these read-only registers */
+          case PCI0_INTERRUPT_PIN:
+          case PCI0_MINIMUM_GRANT:
+          case PCI0_MAXIMUM_LATENCY:
+          case PCI_CLASS_CODE:
+          case PCI_REVISION_ID:
+            break;
+          default:
+            panic("writing to a read only register");
         }
+        DPRINTF(PCIDEV,
+            "write device: %#x function: %#x register: %#x 1 bytes: data: %#x\n",
+            params()->deviceNum, params()->functionNum, offset,
+            (uint32_t)pkt->get<uint8_t>());
+        break;
+      case sizeof(uint16_t):
+        switch (offset) {
+          case PCI_COMMAND:
+            config.command = pkt->get<uint8_t>();
+          case PCI_STATUS:
+            config.status = pkt->get<uint8_t>();
+          case PCI_CACHE_LINE_SIZE:
+            config.cacheLineSize = pkt->get<uint8_t>();
+            break;
+          default:
+            panic("writing to a read only register");
+        }
+        DPRINTF(PCIDEV,
+            "write device: %#x function: %#x register: %#x 2 bytes: data: %#x\n",
+            params()->deviceNum, params()->functionNum, offset,
+            (uint32_t)pkt->get<uint16_t>());
+        break;
+      case sizeof(uint32_t):
+        switch (offset) {
+          case PCI0_BASE_ADDR0:
+          case PCI0_BASE_ADDR1:
+          case PCI0_BASE_ADDR2:
+          case PCI0_BASE_ADDR3:
+          case PCI0_BASE_ADDR4:
+          case PCI0_BASE_ADDR5:
+
+            uint32_t barnum, bar_mask;
+            Addr base_addr, base_size, space_base;
+
+            barnum = BAR_NUMBER(offset);
+
+            if (BAR_IO_SPACE(letoh(config.baseAddr[barnum]))) {
+                bar_mask = BAR_IO_MASK;
+                space_base = TSUNAMI_PCI0_IO;
+            } else {
+                bar_mask = BAR_MEM_MASK;
+                space_base = TSUNAMI_PCI0_MEMORY;
+            }
 
-        // Writing 0xffffffff to a BAR tells the card to set the
-        // value of the bar to size of memory it needs
-        if (letoh(data) == 0xffffffff) {
-            // This is I/O Space, bottom two bits are read only
-
-            config.baseAddr[barnum] = letoh(
-                    (~(BARSize[barnum] - 1) & ~bar_mask) |
+            // Writing 0xffffffff to a BAR tells the card to set the
+            // value of the bar to size of memory it needs
+            if (letoh(pkt->get<uint32_t>()) == 0xffffffff) {
+                // This is I/O Space, bottom two bits are read only
+
+                config.baseAddr[barnum] = letoh(
+                        (~(BARSize[barnum] - 1) & ~bar_mask) |
+                        (letoh(config.baseAddr[barnum]) & bar_mask));
+            } else {
+                config.baseAddr[barnum] = letoh(
+                    (letoh(pkt->get<uint32_t>()) & ~bar_mask) |
                     (letoh(config.baseAddr[barnum]) & bar_mask));
-        } else {
-            config.baseAddr[barnum] = letoh(
-                (letoh(data) & ~bar_mask) |
-                (letoh(config.baseAddr[barnum]) & bar_mask));
 
-            if (letoh(config.baseAddr[barnum]) & ~bar_mask) {
-                base_addr = (letoh(data) & ~bar_mask) + space_base;
-                base_size = BARSize[barnum];
-                BARAddrs[barnum] = base_addr;
+                if (letoh(config.baseAddr[barnum]) & ~bar_mask) {
+                    base_addr = (letoh(pkt->get<uint32_t>()) & ~bar_mask) + space_base;
+                    base_size = BARSize[barnum];
+                    BARAddrs[barnum] = base_addr;
 
-            pioPort->sendStatusChange(Port::RangeChange);
+                pioPort->sendStatusChange(Port::RangeChange);
+                }
             }
+            break;
+
+          case PCI0_ROM_BASE_ADDR:
+            if (letoh(pkt->get<uint32_t>()) == 0xfffffffe)
+                config.expansionROM = htole((uint32_t)0xffffffff);
+            else
+                config.expansionROM = pkt->get<uint32_t>();
+            break;
+
+          case PCI_COMMAND:
+            // This could also clear some of the error bits in the Status
+            // register. However they should never get set, so lets ignore
+            // it for now
+            config.command = pkt->get<uint32_t>();
+            break;
+
+          default:
+            DPRINTF(PCIDEV, "Writing to a read only register");
         }
+        DPRINTF(PCIDEV,
+            "write device: %#x function: %#x register: %#x 4 bytes: data: %#x\n",
+            params()->deviceNum, params()->functionNum, offset,
+            (uint32_t)pkt->get<uint32_t>());
         break;
-
-      case PCI0_ROM_BASE_ADDR:
-        if (letoh(data) == 0xfffffffe)
-            config.expansionROM = htole((uint32_t)0xffffffff);
-        else
-            config.expansionROM = data;
-        break;
-
-      case PCI_COMMAND:
-        // This could also clear some of the error bits in the Status
-        // register. However they should never get set, so lets ignore
-        // it for now
-        config.command = data;
-        break;
-
       default:
-        DPRINTF(PCIDEV, "Writing to a read only register");
+        panic("invalid access size(?) for PCI configspace!\n");
     }
+    pkt->result = Packet::Success;
+    return configDelay;
+
 }
 
 void
diff --git a/src/dev/pcidev.hh b/src/dev/pcidev.hh
index 92786427b..847fb07d0 100644
--- a/src/dev/pcidev.hh
+++ b/src/dev/pcidev.hh
@@ -47,8 +47,6 @@
 #define BAR_IO_SPACE(x) ((x) & BAR_IO_SPACE_BIT)
 #define BAR_NUMBER(x) (((x) - PCI0_BASE_ADDR0) >> 0x2);
 
-class PciConfigAll;
-
 
 /**
  * This class encapulates the first 64 bytes of a singles PCI
@@ -78,24 +76,43 @@ class PciConfigData : public SimObject
     Addr BARAddrs[6];
 };
 
+
 /**
  * PCI device, base implemnation is only config space.
- * Each device is connected to a PCIConfigSpace device
- * which returns -1 for everything but the pcidevs that
- * register with it. This object registers with the PCIConfig space
- * object.
  */
 class PciDev : public DmaDevice
 {
-  public:
-    struct Params : public ::PioDevice::Params
+    class PciConfigPort : public PioPort
     {
-        /**
-         * A pointer to the configspace all object that calls us when
-         * a read comes to this particular device/function.
-         */
-        PciConfigAll *configSpace;
+      protected:
+        PciDev *device;
+
+        virtual bool recvTiming(Packet *pkt);
+
+        virtual Tick recvAtomic(Packet *pkt);
+
+        virtual void recvFunctional(Packet *pkt) ;
+
+        virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop);
+
+        Platform *platform;
+
+        int busId;
+        int deviceId;
+        int functionId;
+
+        Addr configAddr;
+
+      public:
+        PciConfigPort(PciDev *dev, int busid, int devid, int funcid,
+                Platform *p);
 
+      friend class PioPort::SendEvent;
+    };
+
+  public:
+    struct Params : public PioDevice::Params
+    {
         /**
          * A pointer to the object that contains the first 64 bytes of
          * config space
@@ -113,6 +130,9 @@ class PciDev : public DmaDevice
 
         /** The latency for pio accesses. */
         Tick pio_delay;
+
+        /** The latency for a config access. */
+        Tick config_delay;
     };
 
   public:
@@ -164,6 +184,25 @@ class PciDev : public DmaDevice
     Platform *plat;
     PciConfigData *configData;
     Tick pioDelay;
+    Tick configDelay;
+    PciConfigPort *configPort;
+
+    /**
+     * Write to the PCI config space data that is stored locally. This may be
+     * overridden by the device but at some point it will eventually call this
+     * for normal operations that it does not need to override.
+     * @param pkt packet containing the write the offset into config space
+     */
+    virtual Tick writeConfig(Packet *pkt);
+
+
+    /**
+     * Read from the PCI config space data that is stored locally. This may be
+     * overridden by the device but at some point it will eventually call this
+     * for normal operations that it does not need to override.
+     * @param pkt packet containing the write the offset into config space
+     */
+    virtual Tick readConfig(Packet *pkt);
 
   public:
     Addr pciToDma(Addr pciAddr) const
@@ -171,21 +210,25 @@ class PciDev : public DmaDevice
 
     void
     intrPost()
-    { plat->postPciInt(configData->config.interruptLine); }
+    { plat->postPciInt(letoh(configData->config.interruptLine)); }
 
     void
     intrClear()
-    { plat->clearPciInt(configData->config.interruptLine); }
+    { plat->clearPciInt(letoh(configData->config.interruptLine)); }
 
     uint8_t
     interruptLine()
-    { return configData->config.interruptLine; }
+    { return letoh(configData->config.interruptLine); }
 
     /** return the address ranges that this device responds to.
      * @params range_list range list to populate with ranges
      */
     void addressRanges(AddrRangeList &range_list);
 
+    /** Do a PCI Configspace memory access. */
+    Tick recvConfig(Packet *pkt)
+    { return pkt->isRead() ? readConfig(pkt) : writeConfig(pkt); }
+
     /**
      * Constructor for PCI Dev. This function copies data from the
      * config file object PCIConfigData and registers the device with
@@ -193,30 +236,7 @@ class PciDev : public DmaDevice
      */
     PciDev(Params *params);
 
-    /**
-     * Write to the PCI config space data that is stored locally. This may be
-     * overridden by the device but at some point it will eventually call this
-     * for normal operations that it does not need to override.
-     * @param offset the offset into config space
-     * @param size the size of the write
-     * @param data the data to write
-     */
-    virtual void writeConfig(int offset, const uint8_t data);
-    virtual void writeConfig(int offset, const uint16_t data);
-    virtual void writeConfig(int offset, const uint32_t data);
-
-
-    /**
-     * Read from the PCI config space data that is stored locally. This may be
-     * overridden by the device but at some point it will eventually call this
-     * for normal operations that it does not need to override.
-     * @param offset the offset into config space
-     * @param size the size of the read
-     * @param data pointer to the location where the read value should be stored
-     */
-    virtual void readConfig(int offset, uint8_t *data);
-    virtual void readConfig(int offset, uint16_t *data);
-    virtual void readConfig(int offset, uint32_t *data);
+    virtual void init();
 
     /**
      * Serialize this object to the given output stream.
@@ -230,5 +250,22 @@ class PciDev : public DmaDevice
      * @param section The section name of this object
      */
     virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+
+    virtual unsigned int drain(Event *de);
+
+    virtual Port *getPort(const std::string &if_name, int idx = -1)
+    {
+        if (if_name == "config") {
+            if (configPort != NULL)
+                panic("pciconfig port already connected to.");
+            configPort = new PciConfigPort(this, params()->busNum,
+                    params()->deviceNum, params()->functionNum,
+                    params()->platform);
+            return configPort;
+        }
+        return DmaDevice::getPort(if_name, idx);
+    }
+
 };
 #endif // __DEV_PCIDEV_HH__
diff --git a/src/dev/pcireg.h b/src/dev/pcireg.h
index 0aa4ba8ef..a48abd4fa 100644
--- a/src/dev/pcireg.h
+++ b/src/dev/pcireg.h
@@ -142,6 +142,7 @@ union PCIConfig {
 
 // Device specific offsets
 #define PCI_DEVICE_SPECIFIC     	0x40	// 192 bytes
+#define PCI_CONFIG_SIZE         0xFF
 
 // Some Vendor IDs
 #define PCI_VENDOR_DEC			0x1011
diff --git a/src/dev/platform.cc b/src/dev/platform.cc
index ed021e3b6..8546b7805 100644
--- a/src/dev/platform.cc
+++ b/src/dev/platform.cc
@@ -63,5 +63,21 @@ Platform::pciToDma(Addr pciAddr) const
    panic("No PCI dma support in platform.");
 }
 
+void
+Platform::registerPciDevice(uint8_t bus, uint8_t dev, uint8_t func, uint8_t intr)
+{
+    uint32_t bdf = bus << 16 | dev << 8 | func << 0;
+    if (pciDevices.find(bdf) != pciDevices.end())
+        fatal("Two PCI devices have same bus:device:function\n");
+
+    if (intLines.test(intr))
+        fatal("Two PCI devices have same interrupt line: %d\n", intr);
+
+    pciDevices.insert(bdf);
+
+    intLines.set(intr);
+}
+
+
 DEFINE_SIM_OBJECT_CLASS_NAME("Platform", Platform)
 
diff --git a/src/dev/platform.hh b/src/dev/platform.hh
index bfc229748..1940dcad6 100644
--- a/src/dev/platform.hh
+++ b/src/dev/platform.hh
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Andrew Schultz
+ *          Nathan Binkert
  */
 
 /**
@@ -36,6 +37,9 @@
 #ifndef __DEV_PLATFORM_HH__
 #define __DEV_PLATFORM_HH__
 
+#include <bitset>
+#include <set>
+
 #include "sim/sim_object.hh"
 #include "arch/isa_traits.hh"
 
@@ -51,9 +55,6 @@ class Platform : public SimObject
     /** Pointer to the interrupt controller */
     IntrControl *intrctrl;
 
-    /** Pointer to the PCI configuration space */
-    PciConfigAll *pciconfig;
-
     /** Pointer to the UART, set by the uart */
     Uart *uart;
 
@@ -63,13 +64,20 @@ class Platform : public SimObject
   public:
     Platform(const std::string &name, IntrControl *intctrl);
     virtual ~Platform();
-    virtual void init() { if (pciconfig == NULL) panic("PCI Config not set"); }
     virtual void postConsoleInt() = 0;
     virtual void clearConsoleInt() = 0;
     virtual Tick intrFrequency() = 0;
     virtual void postPciInt(int line);
     virtual void clearPciInt(int line);
     virtual Addr pciToDma(Addr pciAddr) const;
+    virtual Addr calcConfigAddr(int bus, int dev, int func) = 0;
+    virtual void registerPciDevice(uint8_t bus, uint8_t dev, uint8_t func,
+            uint8_t intr);
+
+  private:
+    std::bitset<256> intLines;
+    std::set<uint32_t> pciDevices;
+
 };
 
 #endif // __DEV_PLATFORM_HH__
diff --git a/src/dev/sinic.cc b/src/dev/sinic.cc
index a0223733b..815cecca5 100644
--- a/src/dev/sinic.cc
+++ b/src/dev/sinic.cc
@@ -37,7 +37,6 @@
 #include "cpu/intr_control.hh"
 #include "dev/etherlink.hh"
 #include "dev/sinic.hh"
-#include "dev/pciconfigall.hh"
 #include "mem/packet.hh"
 #include "sim/builder.hh"
 #include "sim/debug.hh"
@@ -922,7 +921,7 @@ Device::rxKick()
         break;
 
       case rxBeginCopy:
-        if (dmaPending())
+        if (dmaPending() || getState() != Running)
             goto exit;
 
         rxDmaAddr = params()->platform->pciToDma(
@@ -1110,7 +1109,7 @@ Device::txKick()
         break;
 
       case txBeginCopy:
-        if (dmaPending())
+        if (dmaPending() || getState() != Running)
             goto exit;
 
         txDmaAddr = params()->platform->pciToDma(
@@ -1288,6 +1287,18 @@ Device::recvPacket(EthPacketPtr packet)
     return true;
 }
 
+void
+Device::resume()
+{
+    SimObject::resume();
+
+    // During drain we could have left the state machines in a waiting state and
+    // they wouldn't get out until some other event occured to kick them.
+    // This way they'll get out immediately
+    txKick();
+    rxKick();
+}
+
 //=====================================================================
 //
 //
@@ -1623,7 +1634,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(Device)
 
     SimObjectParam<System *> system;
     SimObjectParam<Platform *> platform;
-    SimObjectParam<PciConfigAll *> configspace;
     SimObjectParam<PciConfigData *> configdata;
     Param<uint32_t> pci_bus;
     Param<uint32_t> pci_dev;
@@ -1666,7 +1676,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(Device)
 
     INIT_PARAM(system, "System pointer"),
     INIT_PARAM(platform, "Platform pointer"),
-    INIT_PARAM(configspace, "PCI Configspace"),
     INIT_PARAM(configdata, "PCI Config data"),
     INIT_PARAM(pci_bus, "PCI bus ID"),
     INIT_PARAM(pci_dev, "PCI device number"),
@@ -1711,7 +1720,6 @@ CREATE_SIM_OBJECT(Device)
     params->name = getInstanceName();
     params->platform = platform;
     params->system = system;
-    params->configSpace = configspace;
     params->configData = configdata;
     params->busNum = pci_bus;
     params->deviceNum = pci_dev;
diff --git a/src/dev/sinic.hh b/src/dev/sinic.hh
index f6c229039..eece4ba6b 100644
--- a/src/dev/sinic.hh
+++ b/src/dev/sinic.hh
@@ -266,6 +266,7 @@ class Device : public Base
   public:
     virtual Tick read(Packet *pkt);
     virtual Tick write(Packet *pkt);
+    virtual void resume();
 
     void prepareIO(int cpu, int index);
     void prepareRead(int cpu, int index);
diff --git a/src/dev/tsunami.cc b/src/dev/tsunami.cc
index c9e15581d..8e740a72f 100644
--- a/src/dev/tsunami.cc
+++ b/src/dev/tsunami.cc
@@ -95,6 +95,13 @@ Tsunami::pciToDma(Addr pciAddr) const
     return pchip->translatePciToDma(pciAddr);
 }
 
+
+Addr
+Tsunami::calcConfigAddr(int bus, int dev, int func)
+{
+   return pchip->calcConfigAddr(bus, dev, func);
+}
+
 void
 Tsunami::serialize(std::ostream &os)
 {
diff --git a/src/dev/tsunami.hh b/src/dev/tsunami.hh
index 13fc4417c..8bb66e914 100644
--- a/src/dev/tsunami.hh
+++ b/src/dev/tsunami.hh
@@ -113,9 +113,15 @@ class Tsunami : public Platform
      */
     virtual void clearPciInt(int line);
 
+
     virtual Addr pciToDma(Addr pciAddr) const;
 
     /**
+     * Calculate the configuration address given a bus/dev/func.
+     */
+    virtual Addr calcConfigAddr(int bus, int dev, int func);
+
+    /**
      * Serialize this object to the given output stream.
      * @param os The stream to serialize to.
      */
diff --git a/src/dev/tsunami_pchip.cc b/src/dev/tsunami_pchip.cc
index a376b908d..8a542b9b0 100644
--- a/src/dev/tsunami_pchip.cc
+++ b/src/dev/tsunami_pchip.cc
@@ -302,6 +302,17 @@ TsunamiPChip::translatePciToDma(Addr busAddr)
     // if no match was found, then return the original address
     return busAddr;
 }
+Addr
+TsunamiPChip::calcConfigAddr(int bus, int dev, int func)
+{
+    assert(func < 8);
+    assert(dev < 32);
+    assert(bus == 0);
+
+    return TsunamiPciBus0Config | (func << 8) | (dev << 11);
+}
+
+
 
 void
 TsunamiPChip::serialize(std::ostream &os)
diff --git a/src/dev/tsunami_pchip.hh b/src/dev/tsunami_pchip.hh
index 9f80f7d68..2c97a1fea 100644
--- a/src/dev/tsunami_pchip.hh
+++ b/src/dev/tsunami_pchip.hh
@@ -45,6 +45,9 @@
 class TsunamiPChip : public BasicPioDevice
 {
   protected:
+
+    static const Addr TsunamiPciBus0Config = ULL(0x801fe000000);
+
     /** Pchip control register */
     uint64_t pctl;
 
@@ -80,6 +83,8 @@ class TsunamiPChip : public BasicPioDevice
      */
     Addr translatePciToDma(Addr busAddr);
 
+    Addr calcConfigAddr(int bus, int dev, int func);
+
     virtual Tick read(Packet *pkt);
     virtual Tick write(Packet *pkt);
 
diff --git a/src/kern/linux/events.cc b/src/kern/linux/events.cc
index 5ff7e26db..289ece5ce 100644
--- a/src/kern/linux/events.cc
+++ b/src/kern/linux/events.cc
@@ -25,7 +25,8 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Ali Saidi
+ * Authors: Nathan Binkert
+ *          Ali Saidi
  */
 
 #include "arch/arguments.hh"
diff --git a/src/kern/linux/events.hh b/src/kern/linux/events.hh
index 65f794a9c..b0510c18f 100644
--- a/src/kern/linux/events.hh
+++ b/src/kern/linux/events.hh
@@ -25,7 +25,8 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Ali Saidi
+ * Authors: Nathan Binkert
+ *          Ali Saidi
  */
 
 #ifndef __KERN_LINUX_EVENTS_HH__
diff --git a/src/kern/linux/linux.hh b/src/kern/linux/linux.hh
index af5e23b95..e3f554a22 100644
--- a/src/kern/linux/linux.hh
+++ b/src/kern/linux/linux.hh
@@ -69,7 +69,7 @@ class Linux {
     typedef uint32_t gid_t;
     //@}
 
-#if BSD_HOST
+#if NO_STAT64
     typedef struct stat hst_stat;
     typedef struct stat hst_stat64;
 #else
@@ -176,7 +176,7 @@ class Linux {
     /// Helper function to convert a host stat buffer to a target stat
     /// buffer.  Also copies the target buffer out to the simulated
     /// memory space.  Used by stat(), fstat(), and lstat().
-#if !BSD_HOST
+#if !NO_STAT64
     static void
     copyOutStatBuf(TranslatingPort *mem, Addr addr, hst_stat *host)
     {
diff --git a/src/kern/linux/printk.cc b/src/kern/linux/printk.cc
index e39a15982..004d1be2f 100644
--- a/src/kern/linux/printk.cc
+++ b/src/kern/linux/printk.cc
@@ -25,7 +25,8 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Ali Saidi
+ * Authors: Nathan Binkert
+ *          Ali Saidi
  */
 
 #include <sys/types.h>
diff --git a/src/kern/linux/printk.hh b/src/kern/linux/printk.hh
index f9203717a..5ddf0a018 100644
--- a/src/kern/linux/printk.hh
+++ b/src/kern/linux/printk.hh
@@ -25,7 +25,8 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Ali Saidi
+ * Authors: Nathan Binkert
+ *          Ali Saidi
  */
 
 #ifndef __PRINTK_HH__
diff --git a/src/kern/solaris/solaris.hh b/src/kern/solaris/solaris.hh
index 0fec0bcce..b819fb6d2 100644
--- a/src/kern/solaris/solaris.hh
+++ b/src/kern/solaris/solaris.hh
@@ -74,7 +74,7 @@ class Solaris {
     typedef uint32_t nlink_t;
     //@}
 
-#if BSD_HOST
+#if NO_STAT64
     typedef struct stat hst_stat;
     typedef struct stat hst_stat64;
 #else
@@ -177,7 +177,7 @@ class Solaris {
     /// Helper function to convert a host stat buffer to a target stat
     /// buffer.  Also copies the target buffer out to the simulated
     /// memory space.  Used by stat(), fstat(), and lstat().
-#if !BSD_HOST
+#if !NO_STAT64
     static void
     copyOutStatBuf(TranslatingPort *mem, Addr addr, hst_stat *host)
     {
diff --git a/src/kern/system_events.hh b/src/kern/system_events.hh
index ccd6bd9a4..93b5eb528 100644
--- a/src/kern/system_events.hh
+++ b/src/kern/system_events.hh
@@ -25,7 +25,8 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Lisa Hsu
+ * Authors: Nathan Binkert
+ *          Lisa Hsu
  *          Ali Saidi
  */
 
diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index 3718cbaaf..29ea2e12f 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -59,7 +59,7 @@ Bridge::Bridge(const std::string &n, int qsa, int qsb,
 }
 
 Port *
-Bridge::getPort(const std::string &if_name)
+Bridge::getPort(const std::string &if_name, int idx)
 {
     BridgePort *port;
 
diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh
index 37fb92662..b3525d3e0 100644
--- a/src/mem/bridge.hh
+++ b/src/mem/bridge.hh
@@ -177,7 +177,7 @@ class Bridge : public MemObject
   public:
 
     /** A function used to return the port associated with this bus object. */
-    virtual Port *getPort(const std::string &if_name);
+    virtual Port *getPort(const std::string &if_name, int idx = -1);
 
     virtual void init();
 
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 919acd23c..31271106b 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -33,13 +33,22 @@
  */
 
 
+#include "base/misc.hh"
 #include "base/trace.hh"
 #include "mem/bus.hh"
 #include "sim/builder.hh"
 
 Port *
-Bus::getPort(const std::string &if_name)
+Bus::getPort(const std::string &if_name, int idx)
 {
+    if (if_name == "default")
+        if (defaultPort == NULL) {
+            defaultPort = new BusPort(csprintf("%s-default",name()), this,
+                    defaultId);
+            return defaultPort;
+        } else
+            fatal("Default port already set\n");
+
     // if_name ignored?  forced to be empty?
     int id = interfaces.size();
     BusPort *bp = new BusPort(csprintf("%s-p%d", name(), id), this, id);
@@ -47,11 +56,12 @@ Bus::getPort(const std::string &if_name)
     return bp;
 }
 
-/** Get the ranges of anyone that we are connected to. */
+/** Get the ranges of anyone other buses that we are connected to. */
 void
 Bus::init()
 {
     std::vector<Port*>::iterator intIter;
+
     for (intIter = interfaces.begin(); intIter != interfaces.end(); intIter++)
         (*intIter)->sendStatusChange(Port::RangeChange);
 }
@@ -110,6 +120,7 @@ Bus::findPort(Addr addr, int id)
     int dest_id = -1;
     int i = 0;
     bool found = false;
+    AddrRangeIter iter;
 
     while (i < portList.size() && !found)
     {
@@ -120,8 +131,18 @@ Bus::findPort(Addr addr, int id)
         }
         i++;
     }
-    if (dest_id == -1)
+
+    // Check if this matches the default range
+    if (dest_id == -1) {
+        for (iter = defaultRange.begin(); iter != defaultRange.end(); iter++) {
+            if (*iter == addr) {
+                DPRINTF(Bus, "  found addr 0x%llx on default\n", addr);
+                return defaultPort;
+            }
+        }
         panic("Unable to find destination for addr: %llx", addr);
+    }
+
 
     // we shouldn't be sending this back to where it came from
     assert(dest_id != id);
@@ -155,39 +176,52 @@ Bus::recvFunctional(Packet *pkt)
 void
 Bus::recvStatusChange(Port::Status status, int id)
 {
+    AddrRangeList ranges;
+    AddrRangeList snoops;
+    int x;
+    AddrRangeIter iter;
+
     assert(status == Port::RangeChange &&
            "The other statuses need to be implemented.");
 
     DPRINTF(BusAddrRanges, "received RangeChange from device id %d\n", id);
 
-    assert(id < interfaces.size() && id >= 0);
-    int x;
-    Port *port = interfaces[id];
-    AddrRangeList ranges;
-    AddrRangeList snoops;
-    AddrRangeIter iter;
-    std::vector<DevMap>::iterator portIter;
+    if (id == defaultId) {
+        defaultRange.clear();
+        defaultPort->getPeerAddressRanges(ranges, snoops);
+        assert(snoops.size() == 0);
+        for(iter = ranges.begin(); iter != ranges.end(); iter++) {
+            defaultRange.push_back(*iter);
+            DPRINTF(BusAddrRanges, "Adding range %llx - %llx for default\n",
+                    iter->start, iter->end);
+        }
+    } else {
 
-    // Clean out any previously existent ids
-    for (portIter = portList.begin(); portIter != portList.end(); ) {
-        if (portIter->portId == id)
-            portIter = portList.erase(portIter);
-        else
-            portIter++;
-    }
+        assert((id < interfaces.size() && id >= 0) || id == -1);
+        Port *port = interfaces[id];
+        std::vector<DevMap>::iterator portIter;
+
+        // Clean out any previously existent ids
+        for (portIter = portList.begin(); portIter != portList.end(); ) {
+            if (portIter->portId == id)
+                portIter = portList.erase(portIter);
+            else
+                portIter++;
+        }
 
-    port->getPeerAddressRanges(ranges, snoops);
+        port->getPeerAddressRanges(ranges, snoops);
 
-    // not dealing with snooping yet either
-    assert(snoops.size() == 0);
-    for(iter = ranges.begin(); iter != ranges.end(); iter++) {
-        DevMap dm;
-        dm.portId = id;
-        dm.range = *iter;
+        // not dealing with snooping yet either
+        assert(snoops.size() == 0);
+        for(iter = ranges.begin(); iter != ranges.end(); iter++) {
+            DevMap dm;
+            dm.portId = id;
+            dm.range = *iter;
 
-        DPRINTF(BusAddrRanges, "Adding range %llx - %llx for id %d\n",
-                dm.range.start, dm.range.end, id);
-        portList.push_back(dm);
+            DPRINTF(BusAddrRanges, "Adding range %llx - %llx for id %d\n",
+                    dm.range.start, dm.range.end, id);
+            portList.push_back(dm);
+        }
     }
     DPRINTF(MMU, "port list has %d entries\n", portList.size());
 
@@ -196,19 +230,47 @@ Bus::recvStatusChange(Port::Status status, int id)
     for (x = 0; x < interfaces.size(); x++)
         if (x != id)
             interfaces[x]->sendStatusChange(Port::RangeChange);
+
+    if (id != defaultId && defaultPort)
+        defaultPort->sendStatusChange(Port::RangeChange);
 }
 
 void
 Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id)
 {
     std::vector<DevMap>::iterator portIter;
+    AddrRangeIter dflt_iter;
+    bool subset;
 
     resp.clear();
     snoop.clear();
 
     DPRINTF(BusAddrRanges, "received address range request, returning:\n");
+
+    for (dflt_iter = defaultRange.begin(); dflt_iter != defaultRange.end();
+            dflt_iter++) {
+        resp.push_back(*dflt_iter);
+        DPRINTF(BusAddrRanges, "  -- %#llX : %#llX\n",dflt_iter->start,
+                dflt_iter->end);
+    }
     for (portIter = portList.begin(); portIter != portList.end(); portIter++) {
-        if (portIter->portId != id) {
+        subset = false;
+        for (dflt_iter = defaultRange.begin(); dflt_iter != defaultRange.end();
+                dflt_iter++) {
+            if ((portIter->range.start < dflt_iter->start &&
+                portIter->range.end >= dflt_iter->start) ||
+               (portIter->range.start < dflt_iter->end &&
+                portIter->range.end >= dflt_iter->end))
+                fatal("Devices can not set ranges that itersect the default set\
+                        but are not a subset of the default set.\n");
+            if (portIter->range.start >= dflt_iter->start &&
+                portIter->range.end <= dflt_iter->end) {
+                subset = true;
+                DPRINTF(BusAddrRanges, "  -- %#llX : %#llX is a SUBSET\n",
+                    portIter->range.start, portIter->range.end);
+            }
+        }
+        if (portIter->portId != id && !subset) {
             resp.push_back(portIter->range);
             DPRINTF(BusAddrRanges, "  -- %#llX : %#llX\n",
                     portIter->range.start, portIter->range.end);
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index 50bfba6e4..3a2896886 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Ron Dreslinski
+ *          Ali Saidi
  */
 
 /**
@@ -50,19 +51,22 @@ class Bus : public MemObject
     /** a globally unique id for this bus. */
     int busId;
 
+    static const int defaultId = -1;
+
     struct DevMap {
         int portId;
         Range<Addr> range;
     };
     std::vector<DevMap> portList;
+    AddrRangeList defaultRange;
 
 
     /** Function called by the port when the bus is recieving a Timing
-        transaction.*/
+      transaction.*/
     bool recvTiming(Packet *pkt);
 
     /** Function called by the port when the bus is recieving a Atomic
-        transaction.*/
+      transaction.*/
     Tick recvAtomic(Packet *pkt);
 
     /** Function called by the port when the bus is recieving a Functional
@@ -158,15 +162,18 @@ class Bus : public MemObject
      * original send failed for whatever reason.*/
     std::list<Port*> retryList;
 
+    /** Port that handles requests that don't match any of the interfaces.*/
+    Port *defaultPort;
+
   public:
 
     /** A function used to return the port associated with this bus object. */
-    virtual Port *getPort(const std::string &if_name);
+    virtual Port *getPort(const std::string &if_name, int idx = -1);
 
     virtual void init();
 
     Bus(const std::string &n, int bus_id)
-        : MemObject(n), busId(bus_id)  {}
+        : MemObject(n), busId(bus_id), defaultPort(NULL)  {}
 
 };
 
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
new file mode 100644
index 000000000..451da28e8
--- /dev/null
+++ b/src/mem/cache/base_cache.cc
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Definition of BaseCache functions.
+ */
+
+#include "mem/cache/base_cache.hh"
+#include "cpu/smt.hh"
+#include "cpu/base.hh"
+
+using namespace std;
+
+BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache,
+                                bool _isCpuSide)
+    : Port(_name), cache(_cache), isCpuSide(_isCpuSide)
+{
+    blocked = false;
+    //Start ports at null if more than one is created we should panic
+    //cpuSidePort = NULL;
+    //memSidePort = NULL;
+}
+
+void
+BaseCache::CachePort::recvStatusChange(Port::Status status)
+{
+    cache->recvStatusChange(status, isCpuSide);
+}
+
+void
+BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp,
+                                       AddrRangeList &snoop)
+{
+    cache->getAddressRanges(resp, snoop, isCpuSide);
+}
+
+int
+BaseCache::CachePort::deviceBlockSize()
+{
+    return cache->getBlockSize();
+}
+
+bool
+BaseCache::CachePort::recvTiming(Packet *pkt)
+{
+    return cache->doTimingAccess(pkt, this, isCpuSide);
+}
+
+Tick
+BaseCache::CachePort::recvAtomic(Packet *pkt)
+{
+    return cache->doAtomicAccess(pkt, isCpuSide);
+}
+
+void
+BaseCache::CachePort::recvFunctional(Packet *pkt)
+{
+    cache->doFunctionalAccess(pkt, isCpuSide);
+}
+
+void
+BaseCache::CachePort::setBlocked()
+{
+    blocked = true;
+}
+
+void
+BaseCache::CachePort::clearBlocked()
+{
+    blocked = false;
+}
+
+BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort)
+    : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort)
+{
+    this->setFlags(AutoDelete);
+    pkt = NULL;
+}
+
+BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort, Packet *_pkt)
+    : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort), pkt(_pkt)
+{
+    this->setFlags(AutoDelete);
+}
+
+void
+BaseCache::CacheEvent::process()
+{
+    if (!pkt)
+    {
+        if (!cachePort->isCpuSide)
+        {
+            pkt = cachePort->cache->getPacket();
+            bool success = cachePort->sendTiming(pkt);
+            DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+                    pkt->getAddr(), success ? "succesful" : "unsuccesful");
+            cachePort->cache->sendResult(pkt, success);
+            if (success && cachePort->cache->doMasterRequest())
+            {
+                //Still more to issue, rerequest in 1 cycle
+                pkt = NULL;
+                this->schedule(curTick+1);
+            }
+        }
+        else
+        {
+            pkt = cachePort->cache->getCoherencePacket();
+            cachePort->sendTiming(pkt);
+        }
+        return;
+    }
+    //Know the packet to send, no need to mark in service (must succed)
+    bool success = cachePort->sendTiming(pkt);
+    assert(success);
+}
+
+const char *
+BaseCache::CacheEvent::description()
+{
+    return "timing event\n";
+}
+
+Port*
+BaseCache::getPort(const std::string &if_name, int idx)
+{
+    if (if_name == "")
+    {
+        if(cpuSidePort == NULL)
+            cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true);
+        return cpuSidePort;
+    }
+    else if (if_name == "functional")
+    {
+        if(cpuSidePort == NULL)
+            cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true);
+        return cpuSidePort;
+    }
+    else if (if_name == "cpu_side")
+    {
+        if(cpuSidePort == NULL)
+            cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true);
+        return cpuSidePort;
+    }
+    else if (if_name == "mem_side")
+    {
+        if (memSidePort != NULL)
+            panic("Already have a mem side for this cache\n");
+        memSidePort = new CachePort(name() + "-mem_side_port", this, false);
+        return memSidePort;
+    }
+    else panic("Port name %s unrecognized\n", if_name);
+}
+
+void
+BaseCache::init()
+{
+    if (!cpuSidePort || !memSidePort)
+        panic("Cache not hooked up on both sides\n");
+    cpuSidePort->sendStatusChange(Port::RangeChange);
+}
+
+void
+BaseCache::regStats()
+{
+    Request temp_req((Addr) NULL, 4, 0);
+    Packet::Command temp_cmd = Packet::ReadReq;
+    Packet temp_pkt(&temp_req, temp_cmd, 0);  //@todo FIx command strings so this isn't neccessary
+    temp_pkt.allocate(); //Temp allocate, all need data
+
+    using namespace Stats;
+
+    // Hit statistics
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        hits[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_hits")
+            .desc("number of " + cstr + " hits")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandHits
+        .name(name() + ".demand_hits")
+        .desc("number of demand (read+write) hits")
+        .flags(total)
+        ;
+    demandHits = hits[Packet::ReadReq] + hits[Packet::WriteReq];
+
+    overallHits
+        .name(name() + ".overall_hits")
+        .desc("number of overall hits")
+        .flags(total)
+        ;
+    overallHits = demandHits + hits[Packet::SoftPFReq] + hits[Packet::HardPFReq]
+        + hits[Packet::Writeback];
+
+    // Miss statistics
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        misses[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_misses")
+            .desc("number of " + cstr + " misses")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandMisses
+        .name(name() + ".demand_misses")
+        .desc("number of demand (read+write) misses")
+        .flags(total)
+        ;
+    demandMisses = misses[Packet::ReadReq] + misses[Packet::WriteReq];
+
+    overallMisses
+        .name(name() + ".overall_misses")
+        .desc("number of overall misses")
+        .flags(total)
+        ;
+    overallMisses = demandMisses + misses[Packet::SoftPFReq] +
+        misses[Packet::HardPFReq] + misses[Packet::Writeback];
+
+    // Miss latency statistics
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        missLatency[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_miss_latency")
+            .desc("number of " + cstr + " miss cycles")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandMissLatency
+        .name(name() + ".demand_miss_latency")
+        .desc("number of demand (read+write) miss cycles")
+        .flags(total)
+        ;
+    demandMissLatency = missLatency[Packet::ReadReq] + missLatency[Packet::WriteReq];
+
+    overallMissLatency
+        .name(name() + ".overall_miss_latency")
+        .desc("number of overall miss cycles")
+        .flags(total)
+        ;
+    overallMissLatency = demandMissLatency + missLatency[Packet::SoftPFReq] +
+        missLatency[Packet::HardPFReq];
+
+    // access formulas
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        accesses[access_idx]
+            .name(name() + "." + cstr + "_accesses")
+            .desc("number of " + cstr + " accesses(hits+misses)")
+            .flags(total | nozero | nonan)
+            ;
+
+        accesses[access_idx] = hits[access_idx] + misses[access_idx];
+    }
+
+    demandAccesses
+        .name(name() + ".demand_accesses")
+        .desc("number of demand (read+write) accesses")
+        .flags(total)
+        ;
+    demandAccesses = demandHits + demandMisses;
+
+    overallAccesses
+        .name(name() + ".overall_accesses")
+        .desc("number of overall (read+write) accesses")
+        .flags(total)
+        ;
+    overallAccesses = overallHits + overallMisses;
+
+    // miss rate formulas
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        missRate[access_idx]
+            .name(name() + "." + cstr + "_miss_rate")
+            .desc("miss rate for " + cstr + " accesses")
+            .flags(total | nozero | nonan)
+            ;
+
+        missRate[access_idx] = misses[access_idx] / accesses[access_idx];
+    }
+
+    demandMissRate
+        .name(name() + ".demand_miss_rate")
+        .desc("miss rate for demand accesses")
+        .flags(total)
+        ;
+    demandMissRate = demandMisses / demandAccesses;
+
+    overallMissRate
+        .name(name() + ".overall_miss_rate")
+        .desc("miss rate for overall accesses")
+        .flags(total)
+        ;
+    overallMissRate = overallMisses / overallAccesses;
+
+    // miss latency formulas
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        avgMissLatency[access_idx]
+            .name(name() + "." + cstr + "_avg_miss_latency")
+            .desc("average " + cstr + " miss latency")
+            .flags(total | nozero | nonan)
+            ;
+
+        avgMissLatency[access_idx] =
+            missLatency[access_idx] / misses[access_idx];
+    }
+
+    demandAvgMissLatency
+        .name(name() + ".demand_avg_miss_latency")
+        .desc("average overall miss latency")
+        .flags(total)
+        ;
+    demandAvgMissLatency = demandMissLatency / demandMisses;
+
+    overallAvgMissLatency
+        .name(name() + ".overall_avg_miss_latency")
+        .desc("average overall miss latency")
+        .flags(total)
+        ;
+    overallAvgMissLatency = overallMissLatency / overallMisses;
+
+    blocked_cycles.init(NUM_BLOCKED_CAUSES);
+    blocked_cycles
+        .name(name() + ".blocked_cycles")
+        .desc("number of cycles access was blocked")
+        .subname(Blocked_NoMSHRs, "no_mshrs")
+        .subname(Blocked_NoTargets, "no_targets")
+        ;
+
+
+    blocked_causes.init(NUM_BLOCKED_CAUSES);
+    blocked_causes
+        .name(name() + ".blocked")
+        .desc("number of cycles access was blocked")
+        .subname(Blocked_NoMSHRs, "no_mshrs")
+        .subname(Blocked_NoTargets, "no_targets")
+        ;
+
+    avg_blocked
+        .name(name() + ".avg_blocked_cycles")
+        .desc("average number of cycles each access was blocked")
+        .subname(Blocked_NoMSHRs, "no_mshrs")
+        .subname(Blocked_NoTargets, "no_targets")
+        ;
+
+    avg_blocked = blocked_cycles / blocked_causes;
+
+    fastWrites
+        .name(name() + ".fast_writes")
+        .desc("number of fast writes performed")
+        ;
+
+    cacheCopies
+        .name(name() + ".cache_copies")
+        .desc("number of cache copies performed")
+        ;
+
+}
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
new file mode 100644
index 000000000..0d1bfdfdb
--- /dev/null
+++ b/src/mem/cache/base_cache.hh
@@ -0,0 +1,549 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Declares a basic cache interface BaseCache.
+ */
+
+#ifndef __BASE_CACHE_HH__
+#define __BASE_CACHE_HH__
+
+#include <vector>
+#include <string>
+#include <list>
+#include <inttypes.h>
+
+#include "base/misc.hh"
+#include "base/statistics.hh"
+#include "base/trace.hh"
+#include "mem/mem_object.hh"
+#include "mem/packet.hh"
+#include "mem/port.hh"
+#include "mem/request.hh"
+#include "sim/eventq.hh"
+
+/**
+ * Reasons for Caches to be Blocked.
+ */
+enum BlockedCause{
+    Blocked_NoMSHRs,
+    Blocked_NoTargets,
+    Blocked_NoWBBuffers,
+    Blocked_Coherence,
+    Blocked_Copy,
+    NUM_BLOCKED_CAUSES
+};
+
+/**
+ * Reasons for cache to request a bus.
+ */
+enum RequestCause{
+    Request_MSHR,
+    Request_WB,
+    Request_Coherence,
+    Request_PF
+};
+
+/**
+ * A basic cache interface. Implements some common functions for speed.
+ */
+class BaseCache : public MemObject
+{
+    class CachePort : public Port
+    {
+      public:
+        BaseCache *cache;
+
+        CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide);
+
+      protected:
+        virtual bool recvTiming(Packet *pkt);
+
+        virtual Tick recvAtomic(Packet *pkt);
+
+        virtual void recvFunctional(Packet *pkt);
+
+        virtual void recvStatusChange(Status status);
+
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+                                            AddrRangeList &snoop);
+
+        virtual int deviceBlockSize();
+
+      public:
+        void setBlocked();
+
+        void clearBlocked();
+
+        bool blocked;
+
+        bool isCpuSide;
+    };
+
+    struct CacheEvent : public Event
+    {
+        CachePort *cachePort;
+        Packet *pkt;
+
+        CacheEvent(CachePort *_cachePort);
+        CacheEvent(CachePort *_cachePort, Packet *_pkt);
+        void process();
+        const char *description();
+    };
+
+  protected:
+    CachePort *cpuSidePort;
+    CachePort *memSidePort;
+
+  public:
+    virtual Port *getPort(const std::string &if_name, int idx = -1);
+
+  private:
+    //To be defined in cache_impl.hh not in base class
+    virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
+    {
+        fatal("No implementation");
+    }
+
+    virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide)
+    {
+        fatal("No implementation");
+    }
+
+    virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide)
+    {
+        fatal("No implementation");
+    }
+
+    void recvStatusChange(Port::Status status, bool isCpuSide)
+    {
+        if (status == Port::RangeChange)
+        {
+            if (!isCpuSide)
+            {
+                cpuSidePort->sendStatusChange(Port::RangeChange);
+            }
+            else
+            {
+                memSidePort->sendStatusChange(Port::RangeChange);
+            }
+        }
+    }
+
+    virtual Packet *getPacket()
+    {
+        fatal("No implementation");
+    }
+
+    virtual Packet *getCoherencePacket()
+    {
+        fatal("No implementation");
+    }
+
+    virtual void sendResult(Packet* &pkt, bool success)
+    {
+
+        fatal("No implementation");
+    }
+
+    /**
+     * Bit vector of the blocking reasons for the access path.
+     * @sa #BlockedCause
+     */
+    uint8_t blocked;
+
+    /**
+     * Bit vector for the blocking reasons for the snoop path.
+     * @sa #BlockedCause
+     */
+    uint8_t blockedSnoop;
+
+    /**
+     * Bit vector for the outstanding requests for the master interface.
+     */
+    uint8_t masterRequests;
+
+    /**
+     * Bit vector for the outstanding requests for the slave interface.
+     */
+    uint8_t slaveRequests;
+
+  protected:
+
+    /** True if this cache is connected to the CPU. */
+    bool topLevelCache;
+
+    /** Stores time the cache blocked for statistics. */
+    Tick blockedCycle;
+
+    /** Block size of this cache */
+    const int blkSize;
+
+    /** The number of misses to trigger an exit event. */
+    Counter missCount;
+
+  public:
+    // Statistics
+    /**
+     * @addtogroup CacheStatistics
+     * @{
+     */
+
+    /** Number of hits per thread for each type of command. @sa Packet::Command */
+    Stats::Vector<> hits[NUM_MEM_CMDS];
+    /** Number of hits for demand accesses. */
+    Stats::Formula demandHits;
+    /** Number of hit for all accesses. */
+    Stats::Formula overallHits;
+
+    /** Number of misses per thread for each type of command. @sa Packet::Command */
+    Stats::Vector<> misses[NUM_MEM_CMDS];
+    /** Number of misses for demand accesses. */
+    Stats::Formula demandMisses;
+    /** Number of misses for all accesses. */
+    Stats::Formula overallMisses;
+
+    /**
+     * Total number of cycles per thread/command spent waiting for a miss.
+     * Used to calculate the average miss latency.
+     */
+    Stats::Vector<> missLatency[NUM_MEM_CMDS];
+    /** Total number of cycles spent waiting for demand misses. */
+    Stats::Formula demandMissLatency;
+    /** Total number of cycles spent waiting for all misses. */
+    Stats::Formula overallMissLatency;
+
+    /** The number of accesses per command and thread. */
+    Stats::Formula accesses[NUM_MEM_CMDS];
+    /** The number of demand accesses. */
+    Stats::Formula demandAccesses;
+    /** The number of overall accesses. */
+    Stats::Formula overallAccesses;
+
+    /** The miss rate per command and thread. */
+    Stats::Formula missRate[NUM_MEM_CMDS];
+    /** The miss rate of all demand accesses. */
+    Stats::Formula demandMissRate;
+    /** The miss rate for all accesses. */
+    Stats::Formula overallMissRate;
+
+    /** The average miss latency per command and thread. */
+    Stats::Formula avgMissLatency[NUM_MEM_CMDS];
+    /** The average miss latency for demand misses. */
+    Stats::Formula demandAvgMissLatency;
+    /** The average miss latency for all misses. */
+    Stats::Formula overallAvgMissLatency;
+
+    /** The total number of cycles blocked for each blocked cause. */
+    Stats::Vector<> blocked_cycles;
+    /** The number of times this cache blocked for each blocked cause. */
+    Stats::Vector<> blocked_causes;
+
+    /** The average number of cycles blocked for each blocked cause. */
+    Stats::Formula avg_blocked;
+
+    /** The number of fast writes (WH64) performed. */
+    Stats::Scalar<> fastWrites;
+
+    /** The number of cache copies performed. */
+    Stats::Scalar<> cacheCopies;
+
+    /**
+     * @}
+     */
+
+    /**
+     * Register stats for this object.
+     */
+    virtual void regStats();
+
+  public:
+
+    class Params
+    {
+      public:
+        /** List of address ranges of this cache. */
+        std::vector<Range<Addr> > addrRange;
+        /** The hit latency for this cache. */
+        int hitLatency;
+        /** The block size of this cache. */
+        int blkSize;
+        /**
+         * The maximum number of misses this cache should handle before
+         * ending the simulation.
+         */
+        Counter maxMisses;
+
+        /**
+         * Construct an instance of this parameter class.
+         */
+        Params(std::vector<Range<Addr> > addr_range,
+               int hit_latency, int _blkSize, Counter max_misses)
+            : addrRange(addr_range), hitLatency(hit_latency), blkSize(_blkSize),
+              maxMisses(max_misses)
+        {
+        }
+    };
+
+    /**
+     * Create and initialize a basic cache object.
+     * @param name The name of this cache.
+     * @param hier_params Pointer to the HierParams object for this hierarchy
+     * of this cache.
+     * @param params The parameter object for this BaseCache.
+     */
+    BaseCache(const std::string &name, Params &params)
+        : MemObject(name), blocked(0), blockedSnoop(0), masterRequests(0),
+          slaveRequests(0), topLevelCache(false),  blkSize(params.blkSize),
+          missCount(params.maxMisses)
+    {
+        //Start ports at null if more than one is created we should panic
+        cpuSidePort = NULL;
+        memSidePort = NULL;
+    }
+
+    virtual void init();
+
+    /**
+     * Query block size of a cache.
+     * @return  The block size
+     */
+    int getBlockSize() const
+    {
+        return blkSize;
+    }
+
+    /**
+     * Returns true if this cache is connect to the CPU.
+     * @return True if this is a L1 cache.
+     */
+    bool isTopLevel()
+    {
+        return topLevelCache;
+    }
+
+    /**
+     * Returns true if the cache is blocked for accesses.
+     */
+    bool isBlocked()
+    {
+        return blocked != 0;
+    }
+
+    /**
+     * Returns true if the cache is blocked for snoops.
+     */
+    bool isBlockedForSnoop()
+    {
+        return blockedSnoop != 0;
+    }
+
+    /**
+     * Marks the access path of the cache as blocked for the given cause. This
+     * also sets the blocked flag in the slave interface.
+     * @param cause The reason for the cache blocking.
+     */
+    void setBlocked(BlockedCause cause)
+    {
+        uint8_t flag = 1 << cause;
+        if (blocked == 0) {
+            blocked_causes[cause]++;
+            blockedCycle = curTick;
+        }
+        blocked |= flag;
+        DPRINTF(Cache,"Blocking for cause %s\n", cause);
+        cpuSidePort->setBlocked();
+    }
+
+    /**
+     * Marks the snoop path of the cache as blocked for the given cause. This
+     * also sets the blocked flag in the master interface.
+     * @param cause The reason to block the snoop path.
+     */
+    void setBlockedForSnoop(BlockedCause cause)
+    {
+        uint8_t flag = 1 << cause;
+        blockedSnoop |= flag;
+        memSidePort->setBlocked();
+    }
+
+    /**
+     * Marks the cache as unblocked for the given cause. This also clears the
+     * blocked flags in the appropriate interfaces.
+     * @param cause The newly unblocked cause.
+     * @warning Calling this function can cause a blocked request on the bus to
+     * access the cache. The cache must be in a state to handle that request.
+     */
+    void clearBlocked(BlockedCause cause)
+    {
+        uint8_t flag = 1 << cause;
+        blocked &= ~flag;
+        blockedSnoop &= ~flag;
+        DPRINTF(Cache,"Unblocking for cause %s, causes left=%i\n",
+                cause, blocked);
+        if (!isBlocked()) {
+            blocked_cycles[cause] += curTick - blockedCycle;
+            DPRINTF(Cache,"Unblocking from all causes\n");
+            cpuSidePort->clearBlocked();
+        }
+        if (!isBlockedForSnoop()) {
+           memSidePort->clearBlocked();
+        }
+    }
+
+    /**
+     * True if the master bus should be requested.
+     * @return True if there are outstanding requests for the master bus.
+     */
+    bool doMasterRequest()
+    {
+        return masterRequests != 0;
+    }
+
+    /**
+     * Request the master bus for the given cause and time.
+     * @param cause The reason for the request.
+     * @param time The time to make the request.
+     */
+    void setMasterRequest(RequestCause cause, Tick time)
+    {
+        if (!doMasterRequest())
+        {
+            BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(memSidePort);
+            reqCpu->schedule(time);
+        }
+        uint8_t flag = 1<<cause;
+        masterRequests |= flag;
+    }
+
+    /**
+     * Clear the master bus request for the given cause.
+     * @param cause The request reason to clear.
+     */
+    void clearMasterRequest(RequestCause cause)
+    {
+        uint8_t flag = 1<<cause;
+        masterRequests &= ~flag;
+    }
+
+    /**
+     * Return true if the slave bus should be requested.
+     * @return True if there are outstanding requests for the slave bus.
+     */
+    bool doSlaveRequest()
+    {
+        return slaveRequests != 0;
+    }
+
+    /**
+     * Request the slave bus for the given reason and time.
+     * @param cause The reason for the request.
+     * @param time The time to make the request.
+     */
+    void setSlaveRequest(RequestCause cause, Tick time)
+    {
+        uint8_t flag = 1<<cause;
+        slaveRequests |= flag;
+        assert("Implement\n" && 0);
+//	si->pktuest(time);
+    }
+
+    /**
+     * Clear the slave bus request for the given reason.
+     * @param cause The request reason to clear.
+     */
+    void clearSlaveRequest(RequestCause cause)
+    {
+        uint8_t flag = 1<<cause;
+        slaveRequests &= ~flag;
+    }
+
+    /**
+     * Send a response to the slave interface.
+     * @param req The request being responded to.
+     * @param time The time the response is ready.
+     */
+    void respond(Packet *pkt, Tick time)
+    {
+        pkt->makeTimingResponse();
+        pkt->result = Packet::Success;
+        CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+        reqCpu->schedule(time);
+    }
+
+    /**
+     * Send a reponse to the slave interface and calculate miss latency.
+     * @param req The request to respond to.
+     * @param time The time the response is ready.
+     */
+    void respondToMiss(Packet *pkt, Tick time)
+    {
+        if (!pkt->req->isUncacheable()) {
+            missLatency[pkt->cmdToIndex()][pkt->req->getThreadNum()] += time - pkt->time;
+        }
+        pkt->makeTimingResponse();
+        pkt->result = Packet::Success;
+        CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+        reqCpu->schedule(time);
+    }
+
+    /**
+     * Suppliess the data if cache to cache transfers are enabled.
+     * @param req The bus transaction to fulfill.
+     */
+    void respondToSnoop(Packet *pkt)
+    {
+        assert("Implement\n" && 0);
+//	mi->respond(pkt,curTick + hitLatency);
+    }
+
+    /**
+     * Notification from master interface that a address range changed. Nothing
+     * to do for a cache.
+     */
+    void rangeChange() {}
+
+    void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop, bool isCpuSide)
+    {
+        if (isCpuSide)
+        {
+            AddrRangeList dummy;
+            memSidePort->getPeerAddressRanges(resp, dummy);
+        }
+        else
+        {
+            //This is where snoops get updated
+            return;
+        }
+    }
+};
+
+#endif //__BASE_CACHE_HH__
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
new file mode 100644
index 000000000..db66c096e
--- /dev/null
+++ b/src/mem/cache/cache.cc
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Steve Reinhardt
+ *          Lisa Hsu
+ *          Kevin Lim
+ */
+
+/**
+ * @file
+ * Cache template instantiations.
+ */
+
+#include "mem/config/cache.hh"
+#include "mem/config/compression.hh"
+
+#include "mem/cache/tags/cache_tags.hh"
+
+#if defined(USE_CACHE_LRU)
+#include "mem/cache/tags/lru.hh"
+#endif
+
+#if defined(USE_CACHE_FALRU)
+#include "mem/cache/tags/fa_lru.hh"
+#endif
+
+#if defined(USE_CACHE_IIC)
+#include "mem/cache/tags/iic.hh"
+#endif
+
+#if defined(USE_CACHE_SPLIT)
+#include "mem/cache/tags/split.hh"
+#endif
+
+#if defined(USE_CACHE_SPLIT_LIFO)
+#include "mem/cache/tags/split_lifo.hh"
+#endif
+
+#include "base/compression/null_compression.hh"
+#if defined(USE_LZSS_COMPRESSION)
+#include "base/compression/lzss_compression.hh"
+#endif
+
+#include "mem/cache/miss/miss_queue.hh"
+#include "mem/cache/miss/blocking_buffer.hh"
+
+#include "mem/cache/coherence/uni_coherence.hh"
+#include "mem/cache/coherence/simple_coherence.hh"
+
+#include "mem/cache/cache_impl.hh"
+
+// Template Instantiations
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+
+#if defined(USE_CACHE_FALRU)
+template class Cache<CacheTags<FALRU,NullCompression>, BlockingBuffer, SimpleCoherence>;
+template class Cache<CacheTags<FALRU,NullCompression>, BlockingBuffer, UniCoherence>;
+template class Cache<CacheTags<FALRU,NullCompression>, MissQueue, SimpleCoherence>;
+template class Cache<CacheTags<FALRU,NullCompression>, MissQueue, UniCoherence>;
+#if defined(USE_LZSS_COMPRESSION)
+template class Cache<CacheTags<FALRU,LZSSCompression>, BlockingBuffer, SimpleCoherence>;
+template class Cache<CacheTags<FALRU,LZSSCompression>, BlockingBuffer, UniCoherence>;
+template class Cache<CacheTags<FALRU,LZSSCompression>, MissQueue, SimpleCoherence>;
+template class Cache<CacheTags<FALRU,LZSSCompression>, MissQueue, UniCoherence>;
+#endif
+#endif
+
+#if defined(USE_CACHE_IIC)
+template class Cache<CacheTags<IIC,NullCompression>, BlockingBuffer, SimpleCoherence>;
+template class Cache<CacheTags<IIC,NullCompression>, BlockingBuffer, UniCoherence>;
+template class Cache<CacheTags<IIC,NullCompression>, MissQueue, SimpleCoherence>;
+template class Cache<CacheTags<IIC,NullCompression>, MissQueue, UniCoherence>;
+#if defined(USE_LZSS_COMPRESSION)
+template class Cache<CacheTags<IIC,LZSSCompression>, BlockingBuffer, SimpleCoherence>;
+template class Cache<CacheTags<IIC,LZSSCompression>, BlockingBuffer, UniCoherence>;
+template class Cache<CacheTags<IIC,LZSSCompression>, MissQueue, SimpleCoherence>;
+template class Cache<CacheTags<IIC,LZSSCompression>, MissQueue, UniCoherence>;
+#endif
+#endif
+
+#if defined(USE_CACHE_LRU)
+template class Cache<CacheTags<LRU,NullCompression>, BlockingBuffer, SimpleCoherence>;
+template class Cache<CacheTags<LRU,NullCompression>, BlockingBuffer, UniCoherence>;
+template class Cache<CacheTags<LRU,NullCompression>, MissQueue, SimpleCoherence>;
+template class Cache<CacheTags<LRU,NullCompression>, MissQueue, UniCoherence>;
+#if defined(USE_LZSS_COMPRESSION)
+template class Cache<CacheTags<LRU,LZSSCompression>, BlockingBuffer, SimpleCoherence>;
+template class Cache<CacheTags<LRU,LZSSCompression>, BlockingBuffer, UniCoherence>;
+template class Cache<CacheTags<LRU,LZSSCompression>, MissQueue, SimpleCoherence>;
+template class Cache<CacheTags<LRU,LZSSCompression>, MissQueue, UniCoherence>;
+#endif
+#endif
+
+#if defined(USE_CACHE_SPLIT)
+template class Cache<CacheTags<Split,NullCompression>, BlockingBuffer, SimpleCoherence>;
+template class Cache<CacheTags<Split,NullCompression>, BlockingBuffer, UniCoherence>;
+template class Cache<CacheTags<Split,NullCompression>, MissQueue, SimpleCoherence>;
+template class Cache<CacheTags<Split,NullCompression>, MissQueue, UniCoherence>;
+#if defined(USE_LZSS_COMPRESSION)
+template class Cache<CacheTags<Split,LZSSCompression>, BlockingBuffer, SimpleCoherence>;
+template class Cache<CacheTags<Split,LZSSCompression>, BlockingBuffer, UniCoherence>;
+template class Cache<CacheTags<Split,LZSSCompression>, MissQueue, SimpleCoherence>;
+template class Cache<CacheTags<Split,LZSSCompression>, MissQueue, UniCoherence>;
+#endif
+#endif
+
+#if defined(USE_CACHE_SPLIT_LIFO)
+template class Cache<CacheTags<SplitLIFO,NullCompression>, BlockingBuffer, SimpleCoherence>;
+template class Cache<CacheTags<SplitLIFO,NullCompression>, BlockingBuffer, UniCoherence>;
+template class Cache<CacheTags<SplitLIFO,NullCompression>, MissQueue, SimpleCoherence>;
+template class Cache<CacheTags<SplitLIFO,NullCompression>, MissQueue, UniCoherence>;
+#if defined(USE_LZSS_COMPRESSION)
+template class Cache<CacheTags<SplitLIFO,LZSSCompression>, BlockingBuffer, SimpleCoherence>;
+template class Cache<CacheTags<SplitLIFO,LZSSCompression>, BlockingBuffer, UniCoherence>;
+template class Cache<CacheTags<SplitLIFO,LZSSCompression>, MissQueue, SimpleCoherence>;
+template class Cache<CacheTags<SplitLIFO,LZSSCompression>, MissQueue, UniCoherence>;
+#endif
+#endif
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
new file mode 100644
index 000000000..ec5b800a8
--- /dev/null
+++ b/src/mem/cache/cache.hh
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Dave Greene
+ *          Steve Reinhardt
+ */
+
+/**
+ * @file
+ * Describes a cache based on template policies.
+ */
+
+#ifndef __CACHE_HH__
+#define __CACHE_HH__
+
+#include "base/misc.hh" // fatal, panic, and warn
+#include "cpu/smt.hh" // SMT_MAX_THREADS
+
+#include "mem/cache/base_cache.hh"
+#include "mem/cache/prefetch/prefetcher.hh"
+
+//Forward decleration
+class MSHR;
+
+
+/**
+ * A template-policy based cache. The behavior of the cache can be altered by
+ * supplying different template policies. TagStore handles all tag and data
+ * storage @sa TagStore. Buffering handles all misses and writes/writebacks
+ * @sa MissQueue. Coherence handles all coherence policy details @sa
+ * UniCoherence, SimpleMultiCoherence.
+ */
+template <class TagStore, class Buffering, class Coherence>
+class Cache : public BaseCache
+{
+  public:
+    /** Define the type of cache block to use. */
+    typedef typename TagStore::BlkType BlkType;
+
+    bool prefetchAccess;
+  protected:
+
+    /** Tag and data Storage */
+    TagStore *tags;
+    /** Miss and Writeback handler */
+    Buffering *missQueue;
+    /** Coherence protocol. */
+    Coherence *coherence;
+
+    /** Prefetcher */
+    Prefetcher<TagStore, Buffering> *prefetcher;
+
+    /** Do fast copies in this cache. */
+    bool doCopy;
+
+    /** Block on a delayed copy. */
+    bool blockOnCopy;
+
+    /**
+     * The clock ratio of the outgoing bus.
+     * Used for calculating critical word first.
+     */
+    int busRatio;
+
+     /**
+      * The bus width in bytes of the outgoing bus.
+      * Used for calculating critical word first.
+      */
+    int busWidth;
+
+    /**
+     * The latency of a hit in this device.
+     */
+    int hitLatency;
+
+     /**
+      * A permanent mem req to always be used to cause invalidations.
+      * Used to append to target list, to cause an invalidation.
+      */
+    Packet * invalidatePkt;
+
+    /**
+     * Temporarily move a block into a MSHR.
+     * @todo Remove this when LSQ/SB are fixed and implemented in memtest.
+     */
+    void pseudoFill(Addr addr, int asid);
+
+    /**
+     * Temporarily move a block into an existing MSHR.
+     * @todo Remove this when LSQ/SB are fixed and implemented in memtest.
+     */
+    void pseudoFill(MSHR *mshr);
+
+  public:
+
+    class Params
+    {
+      public:
+        TagStore *tags;
+        Buffering *missQueue;
+        Coherence *coherence;
+        bool doCopy;
+        bool blockOnCopy;
+        BaseCache::Params baseParams;
+        Prefetcher<TagStore, Buffering> *prefetcher;
+        bool prefetchAccess;
+        int hitLatency;
+
+        Params(TagStore *_tags, Buffering *mq, Coherence *coh,
+               bool do_copy, BaseCache::Params params,
+               Prefetcher<TagStore, Buffering> *_prefetcher,
+               bool prefetch_access, int hit_latency)
+            : tags(_tags), missQueue(mq), coherence(coh), doCopy(do_copy),
+              blockOnCopy(false), baseParams(params),
+              prefetcher(_prefetcher), prefetchAccess(prefetch_access),
+              hitLatency(hit_latency)
+        {
+        }
+    };
+
+    /** Instantiates a basic cache object. */
+    Cache(const std::string &_name, Params &params);
+
+    virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort,
+                        bool isCpuSide);
+
+    virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide);
+
+    virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide);
+
+    virtual void recvStatusChange(Port::Status status, bool isCpuSide);
+
+    void regStats();
+
+    /**
+     * Performs the access specified by the request.
+     * @param req The request to perform.
+     * @return The result of the access.
+     */
+    bool access(Packet * &pkt);
+
+    /**
+     * Selects a request to send on the bus.
+     * @return The memory request to service.
+     */
+    virtual Packet * getPacket();
+
+    /**
+     * Was the request was sent successfully?
+     * @param req The request.
+     * @param success True if the request was sent successfully.
+     */
+    virtual void sendResult(Packet * &pkt, bool success);
+
+    /**
+     * Handles a response (cache line fill/write ack) from the bus.
+     * @param req The request being responded to.
+     */
+    void handleResponse(Packet * &pkt);
+
+    /**
+     * Start handling a copy transaction.
+     * @param req The copy request to perform.
+     */
+    void startCopy(Packet * &pkt);
+
+    /**
+     * Handle a delayed copy transaction.
+     * @param req The delayed copy request to continue.
+     * @param addr The address being responded to.
+     * @param blk The block of the current response.
+     * @param mshr The mshr being handled.
+     */
+    void handleCopy(Packet * &pkt, Addr addr, BlkType *blk, MSHR *mshr);
+
+    /**
+     * Selects a coherence message to forward to lower levels of the hierarchy.
+     * @return The coherence message to forward.
+     */
+    virtual Packet * getCoherencePacket();
+
+    /**
+     * Snoops bus transactions to maintain coherence.
+     * @param req The current bus transaction.
+     */
+    void snoop(Packet * &pkt);
+
+    void snoopResponse(Packet * &pkt);
+
+    /**
+     * Invalidates the block containing address if found.
+     * @param addr The address to look for.
+     * @param asid The address space ID of the address.
+     * @todo Is this function necessary?
+     */
+    void invalidateBlk(Addr addr, int asid);
+
+    /**
+     * Aquash all requests associated with specified thread.
+     * intended for use by I-cache.
+     * @param req->getThreadNum()ber The thread to squash.
+     */
+    void squash(int threadNum)
+    {
+        missQueue->squash(threadNum);
+    }
+
+    /**
+     * Return the number of outstanding misses in a Cache.
+     * Default returns 0.
+     *
+     * @retval unsigned The number of missing still outstanding.
+     */
+    unsigned outstandingMisses() const
+    {
+        return missQueue->getMisses();
+    }
+
+    /**
+     * Perform the access specified in the request and return the estimated
+     * time of completion. This function can either update the hierarchy state
+     * or just perform the access wherever the data is found depending on the
+     * state of the update flag.
+     * @param req The memory request to satisfy
+     * @param update If true, update the hierarchy, otherwise just perform the
+     * request.
+     * @return The estimated completion time.
+     */
+    Tick probe(Packet * &pkt, bool update);
+
+    /**
+     * Snoop for the provided request in the cache and return the estimated
+     * time of completion.
+     * @todo Can a snoop probe not change state?
+     * @param req The memory request to satisfy
+     * @param update If true, update the hierarchy, otherwise just perform the
+     * request.
+     * @return The estimated completion time.
+     */
+    Tick snoopProbe(Packet * &pkt, bool update);
+};
+
+#endif // __CACHE_HH__
diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh
new file mode 100644
index 000000000..67e65d25b
--- /dev/null
+++ b/src/mem/cache/cache_blk.hh
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/** @file
+ * Definitions of a simple cache block class.
+ */
+
+#ifndef __CACHE_BLK_HH__
+#define __CACHE_BLK_HH__
+
+#include "sim/root.hh"		// for Tick
+#include "arch/isa_traits.hh"	// for Addr
+
+/**
+ * Cache block status bit assignments
+ */
+enum CacheBlkStatusBits {
+    /** valid, readable */
+    BlkValid =		0x01,
+    /** write permission */
+    BlkWritable =	0x02,
+    /** dirty (modified) */
+    BlkDirty =		0x04,
+    /** compressed */
+    BlkCompressed =	0x08,
+    /** block was referenced */
+    BlkReferenced =	0x10,
+    /** block was a hardware prefetch yet unaccessed*/
+    BlkHWPrefetched =	0x20
+};
+
+/**
+ * A Basic Cache block.
+ * Contains the tag, status, and a pointer to data.
+ */
+class CacheBlk
+{
+  public:
+    /** The address space ID of this block. */
+    int asid;
+    /** Data block tag value. */
+    Addr tag;
+    /**
+     * Contains a copy of the data in this block for easy access. This is used
+     * for efficient execution when the data could be actually stored in
+     * another format (COW, compressed, sub-blocked, etc). In all cases the
+     * data stored here should be kept consistant with the actual data
+     * referenced by this block.
+     */
+    uint8_t *data;
+    /** the number of bytes stored in this block. */
+    int size;
+
+    /** block state: OR of CacheBlkStatusBit */
+    typedef unsigned State;
+
+    /** The current status of this block. @sa CacheBlockStatusBits */
+    State status;
+
+    /** Which curTick will this block be accessable */
+    Tick whenReady;
+
+    /**
+     * The set this block belongs to.
+     * @todo Move this into subclasses when we fix CacheTags to use them.
+     */
+    int set;
+
+    /** Number of references to this block since it was brought in. */
+    int refCount;
+
+    CacheBlk()
+        : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
+          set(-1), refCount(0)
+    {}
+
+    /**
+     * Copy the state of the given block into this one.
+     * @param rhs The block to copy.
+     * @return a const reference to this block.
+     */
+    const CacheBlk& operator=(const CacheBlk& rhs)
+    {
+        asid = rhs.asid;
+        tag = rhs.tag;
+        data = rhs.data;
+        size = rhs.size;
+        status = rhs.status;
+        whenReady = rhs.whenReady;
+        set = rhs.set;
+        refCount = rhs.refCount;
+        return *this;
+    }
+
+    /**
+     * Checks the write permissions of this block.
+     * @return True if the block is writable.
+     */
+    bool isWritable() const
+    {
+        const int needed_bits = BlkWritable | BlkValid;
+        return (status & needed_bits) == needed_bits;
+    }
+
+    /**
+     * Checks that a block is valid (readable).
+     * @return True if the block is valid.
+     */
+    bool isValid() const
+    {
+        return (status & BlkValid) != 0;
+    }
+
+    /**
+     * Check to see if a block has been written.
+     * @return True if the block is dirty.
+     */
+    bool isModified() const
+    {
+        return (status & BlkDirty) != 0;
+    }
+
+    /**
+     * Check to see if this block contains compressed data.
+     * @return True iF the block's data is compressed.
+     */
+    bool isCompressed() const
+    {
+        return (status & BlkCompressed) != 0;
+    }
+
+    /**
+     * Check if this block has been referenced.
+     * @return True if the block has been referenced.
+     */
+    bool isReferenced() const
+    {
+        return (status & BlkReferenced) != 0;
+    }
+
+    /**
+     * Check if this block was the result of a hardware prefetch, yet to
+     * be touched.
+     * @return True if the block was a hardware prefetch, unaccesed.
+     */
+    bool isPrefetch() const
+    {
+        return (status & BlkHWPrefetched) != 0;
+    }
+
+
+};
+
+/**
+ * Output a CacheBlk to the given ostream.
+ * @param out The stream for the output.
+ * @param blk The cache block to print.
+ *
+ * @return The output stream.
+ */
+inline std::ostream &
+operator<<(std::ostream &out, const CacheBlk &blk)
+{
+    out << std::hex << std::endl;
+    out << "  Tag: " << blk.tag << std::endl;
+    out << "  Status: " <<  blk.status << std::endl;
+
+    return(out << std::dec);
+}
+
+#endif //__CACHE_BLK_HH__
diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc
new file mode 100644
index 000000000..05a149a1c
--- /dev/null
+++ b/src/mem/cache/cache_builder.cc
@@ -0,0 +1,480 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Nathan Binkert
+ */
+
+/**
+ * @file
+ * Simobject instatiation of caches.
+ */
+#include <vector>
+
+// Must be included first to determine which caches we want
+#include "mem/config/cache.hh"
+#include "mem/config/compression.hh"
+#include "mem/config/prefetch.hh"
+
+#include "mem/cache/base_cache.hh"
+#include "mem/cache/cache.hh"
+#include "mem/bus.hh"
+#include "mem/cache/coherence/coherence_protocol.hh"
+#include "sim/builder.hh"
+
+// Tag Templates
+#if defined(USE_CACHE_LRU)
+#include "mem/cache/tags/lru.hh"
+#endif
+
+#if defined(USE_CACHE_FALRU)
+#include "mem/cache/tags/fa_lru.hh"
+#endif
+
+#if defined(USE_CACHE_IIC)
+#include "mem/cache/tags/iic.hh"
+#endif
+
+#if defined(USE_CACHE_SPLIT)
+#include "mem/cache/tags/split.hh"
+#endif
+
+#if defined(USE_CACHE_SPLIT_LIFO)
+#include "mem/cache/tags/split_lifo.hh"
+#endif
+
+// Compression Templates
+#include "base/compression/null_compression.hh"
+#if defined(USE_LZSS_COMPRESSION)
+#include "base/compression/lzss_compression.hh"
+#endif
+
+// CacheTags Templates
+#include "mem/cache/tags/cache_tags.hh"
+
+// MissQueue Templates
+#include "mem/cache/miss/miss_queue.hh"
+#include "mem/cache/miss/blocking_buffer.hh"
+
+// Coherence Templates
+#include "mem/cache/coherence/uni_coherence.hh"
+#include "mem/cache/coherence/simple_coherence.hh"
+
+//Prefetcher Headers
+#if defined(USE_GHB)
+#include "mem/cache/prefetch/ghb_prefetcher.hh"
+#endif
+#if defined(USE_TAGGED)
+#include "mem/cache/prefetch/tagged_prefetcher.hh"
+#endif
+#if defined(USE_STRIDED)
+#include "mem/cache/prefetch/stride_prefetcher.hh"
+#endif
+
+
+using namespace std;
+using namespace TheISA;
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache)
+
+    Param<int> size;
+    Param<int> assoc;
+    Param<int> block_size;
+    Param<int> latency;
+    Param<int> mshrs;
+    Param<int> tgts_per_mshr;
+    Param<int> write_buffers;
+    Param<bool> prioritizeRequests;
+//    SimObjectParam<Bus *> in_bus;
+//    SimObjectParam<Bus *> out_bus;
+    Param<bool> do_copy;
+    SimObjectParam<CoherenceProtocol *> protocol;
+    Param<Addr> trace_addr;
+    Param<int> hash_delay;
+#if defined(USE_CACHE_IIC)
+    SimObjectParam<Repl *> repl;
+#endif
+    Param<bool> compressed_bus;
+    Param<bool> store_compressed;
+    Param<bool> adaptive_compression;
+    Param<int> compression_latency;
+    Param<int> subblock_size;
+    Param<Counter> max_miss_count;
+//    SimObjectParam<HierParams *> hier;
+    VectorParam<Range<Addr> > addr_range;
+//    SimObjectParam<MemTraceWriter *> mem_trace;
+    Param<bool> split;
+    Param<int> split_size;
+    Param<bool> lifo;
+    Param<bool> two_queue;
+    Param<bool> prefetch_miss;
+    Param<bool> prefetch_access;
+    Param<int> prefetcher_size;
+    Param<bool> prefetch_past_page;
+    Param<bool> prefetch_serial_squash;
+    Param<Tick> prefetch_latency;
+    Param<int> prefetch_degree;
+    Param<string> prefetch_policy;
+    Param<bool> prefetch_cache_check_push;
+    Param<bool> prefetch_use_cpu_id;
+    Param<bool> prefetch_data_accesses_only;
+    Param<int> hit_latency;
+
+END_DECLARE_SIM_OBJECT_PARAMS(BaseCache)
+
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache)
+
+    INIT_PARAM(size, "capacity in bytes"),
+    INIT_PARAM(assoc, "associativity"),
+    INIT_PARAM(block_size, "block size in bytes"),
+    INIT_PARAM(latency, "hit latency in CPU cycles"),
+    INIT_PARAM(mshrs, "number of MSHRs (max outstanding requests)"),
+    INIT_PARAM(tgts_per_mshr, "max number of accesses per MSHR"),
+    INIT_PARAM_DFLT(write_buffers, "number of write buffers", 8),
+    INIT_PARAM_DFLT(prioritizeRequests, "always service demand misses first",
+                    false),
+/*    INIT_PARAM_DFLT(in_bus, "incoming bus object", NULL),
+    INIT_PARAM(out_bus, "outgoing bus object"),
+*/
+    INIT_PARAM_DFLT(do_copy, "perform fast copies in the cache", false),
+    INIT_PARAM_DFLT(protocol, "coherence protocol to use in the cache", NULL),
+    INIT_PARAM_DFLT(trace_addr, "address to trace", 0),
+
+    INIT_PARAM_DFLT(hash_delay, "time in cycles of hash access",1),
+#if defined(USE_CACHE_IIC)
+    INIT_PARAM_DFLT(repl, "replacement policy",NULL),
+#endif
+    INIT_PARAM_DFLT(compressed_bus,
+                    "This cache connects to a compressed memory",
+                    false),
+    INIT_PARAM_DFLT(store_compressed, "Store compressed data in the cache",
+                    false),
+    INIT_PARAM_DFLT(adaptive_compression, "Use an adaptive compression scheme",
+                    false),
+    INIT_PARAM_DFLT(compression_latency,
+                    "Latency in cycles of compression algorithm",
+                    0),
+    INIT_PARAM_DFLT(subblock_size,
+                    "Size of subblock in IIC used for compression",
+                    0),
+    INIT_PARAM_DFLT(max_miss_count,
+                    "The number of misses to handle before calling exit",
+                    0),
+/*    INIT_PARAM_DFLT(hier,
+                    "Hierarchy global variables",
+                    &defaultHierParams),
+*/
+    INIT_PARAM_DFLT(addr_range, "The address range in bytes",
+                    vector<Range<Addr> >(1,RangeIn((Addr)0, MaxAddr))),
+//    INIT_PARAM_DFLT(mem_trace, "Memory trace to write accesses to", NULL),
+    INIT_PARAM_DFLT(split, "Whether this is a partitioned cache", false),
+    INIT_PARAM_DFLT(split_size, "the number of \"ways\" belonging to the LRU partition", 0),
+    INIT_PARAM_DFLT(lifo, "whether you are using a LIFO repl. policy", false),
+    INIT_PARAM_DFLT(two_queue, "whether the lifo should have two queue replacement", false),
+    INIT_PARAM_DFLT(prefetch_miss, "wheter you are using the hardware prefetcher from Miss stream", false),
+    INIT_PARAM_DFLT(prefetch_access, "wheter you are using the hardware prefetcher from Access stream", false),
+    INIT_PARAM_DFLT(prefetcher_size, "Number of entries in the harware prefetch queue", 100),
+    INIT_PARAM_DFLT(prefetch_past_page, "Allow prefetches to cross virtual page boundaries", false),
+    INIT_PARAM_DFLT(prefetch_serial_squash, "Squash prefetches with a later time on a subsequent miss", false),
+    INIT_PARAM_DFLT(prefetch_latency, "Latency of the prefetcher", 10),
+    INIT_PARAM_DFLT(prefetch_degree, "Degree of the prefetch depth", 1),
+    INIT_PARAM_DFLT(prefetch_policy, "Type of prefetcher to use", "none"),
+    INIT_PARAM_DFLT(prefetch_cache_check_push, "Check if in cash on push or pop of prefetch queue", true),
+    INIT_PARAM_DFLT(prefetch_use_cpu_id, "Use the CPU ID to seperate calculations of prefetches", true),
+    INIT_PARAM_DFLT(prefetch_data_accesses_only, "Only prefetch on data not on instruction accesses", false),
+    INIT_PARAM_DFLT(hit_latency, "Hit Latecny for a succesful access", 1)
+END_INIT_SIM_OBJECT_PARAMS(BaseCache)
+
+
+#define BUILD_CACHE(t, comp, b, c) do {					\
+        Prefetcher<CacheTags<t, comp>, b> *pf; \
+        if (pf_policy == "tagged") {      \
+             BUILD_TAGGED_PREFETCHER(t, comp, b); \
+        }            \
+        else if (pf_policy == "stride") {       \
+             BUILD_STRIDED_PREFETCHER(t, comp, b); \
+        } \
+        else if (pf_policy == "ghb") {       \
+             BUILD_GHB_PREFETCHER(t, comp, b); \
+        } \
+        else { \
+             BUILD_NULL_PREFETCHER(t, comp, b); \
+        } \
+        Cache<CacheTags<t, comp>, b, c>::Params params(tagStore, mq, coh, \
+                                                       do_copy, base_params, \
+                                                       /*in_bus, out_bus,*/ pf,  \
+                                                       prefetch_access, hit_latency); \
+        Cache<CacheTags<t, comp>, b, c> *retval =			\
+            new Cache<CacheTags<t, comp>, b, c>(getInstanceName(), /*hier,*/ \
+                                                params);		\
+/*	if (in_bus == NULL) {						\
+            retval->setSlaveInterface(new MemoryInterface<Cache<CacheTags<t, comp>, b, c> >(getInstanceName(), hier, retval, mem_trace)); \
+        } else {							\
+            retval->setSlaveInterface(new SlaveInterface<Cache<CacheTags<t, comp>, b, c>, Bus>(getInstanceName(), hier, retval, in_bus, mem_trace)); \
+        }								\
+        retval->setMasterInterface(new MasterInterface<Cache<CacheTags<t, comp>, b, c>, Bus>(getInstanceName(), hier, retval, out_bus)); \
+        out_bus->rangeChange();						\
+        return retval;							\
+*/return retval;                                                          \
+    } while (0)
+
+#define BUILD_CACHE_PANIC(x) do {			\
+        panic("%s not compiled into M5", x);		\
+    } while (0)
+
+#if defined(USE_LZSS_COMPRESSION)
+#define BUILD_COMPRESSED_CACHE(TAGS, tags, b, c) do { \
+        if (compressed_bus || store_compressed){			\
+            CacheTags<TAGS, LZSSCompression> *tagStore =		\
+                new CacheTags<TAGS, LZSSCompression>(tags,		\
+                                                     compression_latency, \
+                                                     true, store_compressed, \
+                                                     adaptive_compression,   \
+                                                     prefetch_miss); \
+            BUILD_CACHE(TAGS, LZSSCompression, b, c);			\
+        } else {							\
+            CacheTags<TAGS, NullCompression> *tagStore =		\
+                new CacheTags<TAGS, NullCompression>(tags,		\
+                                                     compression_latency, \
+                                                     true, store_compressed, \
+                                                     adaptive_compression,   \
+                                                     prefetch_miss); \
+            BUILD_CACHE(TAGS, NullCompression, b, c);			\
+        }								\
+    } while (0)
+#else
+#define BUILD_COMPRESSED_CACHE(TAGS, tags, b, c) do { \
+        if (compressed_bus || store_compressed){			\
+            BUILD_CACHE_PANIC("compressed caches");			\
+        } else {							\
+            CacheTags<TAGS, NullCompression> *tagStore =		\
+                new CacheTags<TAGS, NullCompression>(tags,		\
+                                                      compression_latency, \
+                                                      true, store_compressed, \
+                                                      adaptive_compression    \
+                                                      prefetch_miss); \
+            BUILD_CACHE(TAGS, NullCompression, b, c);			\
+        }								\
+    } while (0)
+#endif
+
+#if defined(USE_CACHE_FALRU)
+#define BUILD_FALRU_CACHE(b,c) do {			    \
+        FALRU *tags = new FALRU(block_size, size, latency); \
+        BUILD_COMPRESSED_CACHE(FALRU, tags, b, c);		\
+    } while (0)
+#else
+#define BUILD_FALRU_CACHE(b, c) BUILD_CACHE_PANIC("falru cache")
+#endif
+
+#if defined(USE_CACHE_LRU)
+#define BUILD_LRU_CACHE(b, c) do {				\
+        LRU *tags = new LRU(numSets, block_size, assoc, latency);	\
+        BUILD_COMPRESSED_CACHE(LRU, tags, b, c);			\
+    } while (0)
+#else
+#define BUILD_LRU_CACHE(b, c) BUILD_CACHE_PANIC("lru cache")
+#endif
+
+#if defined(USE_CACHE_SPLIT)
+#define BUILD_SPLIT_CACHE(b, c) do {					\
+        Split *tags = new Split(numSets, block_size, assoc, split_size, lifo, \
+                                two_queue, latency);		\
+        BUILD_COMPRESSED_CACHE(Split, tags, b, c);			\
+    } while (0)
+#else
+#define BUILD_SPLIT_CACHE(b, c) BUILD_CACHE_PANIC("split cache")
+#endif
+
+#if defined(USE_CACHE_SPLIT_LIFO)
+#define BUILD_SPLIT_LIFO_CACHE(b, c) do {				\
+        SplitLIFO *tags = new SplitLIFO(block_size, size, assoc,        \
+                                        latency, two_queue, -1);	\
+        BUILD_COMPRESSED_CACHE(SplitLIFO, tags, b, c);			\
+    } while (0)
+#else
+#define BUILD_SPLIT_LIFO_CACHE(b, c) BUILD_CACHE_PANIC("lifo cache")
+#endif
+
+#if defined(USE_CACHE_IIC)
+#define BUILD_IIC_CACHE(b ,c) do {			\
+        IIC *tags = new IIC(iic_params);		\
+        BUILD_COMPRESSED_CACHE(IIC, tags, b, c);	\
+    } while (0)
+#else
+#define BUILD_IIC_CACHE(b, c) BUILD_CACHE_PANIC("iic")
+#endif
+
+#define BUILD_CACHES(b, c) do {				\
+        if (repl == NULL) {				\
+            if (numSets == 1) {				\
+                BUILD_FALRU_CACHE(b, c);		\
+            } else {					\
+                if (split == true) {			\
+                    BUILD_SPLIT_CACHE(b, c);		\
+                } else if (lifo == true) {		\
+                    BUILD_SPLIT_LIFO_CACHE(b, c);	\
+                } else {				\
+                    BUILD_LRU_CACHE(b, c);		\
+                }					\
+            }						\
+        } else {					\
+            BUILD_IIC_CACHE(b, c);			\
+        }						\
+    } while (0)
+
+#define BUILD_COHERENCE(b) do {						\
+        if (protocol == NULL) {						\
+            UniCoherence *coh = new UniCoherence();			\
+            BUILD_CACHES(b, UniCoherence);				\
+        } else {							\
+            SimpleCoherence *coh = new SimpleCoherence(protocol);	\
+            BUILD_CACHES(b, SimpleCoherence);				\
+        }								\
+    } while (0)
+
+#if defined(USE_TAGGED)
+#define BUILD_TAGGED_PREFETCHER(t, comp, b) pf = new   \
+                TaggedPrefetcher<CacheTags<t, comp>, b>(prefetcher_size, \
+                                                        !prefetch_past_page, \
+                                                        prefetch_serial_squash, \
+                                                        prefetch_cache_check_push, \
+                                                        prefetch_data_accesses_only, \
+                                                        prefetch_latency, \
+                                                        prefetch_degree)
+#else
+#define BUILD_TAGGED_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("Tagged Prefetcher")
+#endif
+
+#if defined(USE_STRIDED)
+#define BUILD_STRIDED_PREFETCHER(t, comp, b) pf = new  \
+                StridePrefetcher<CacheTags<t, comp>, b>(prefetcher_size, \
+                                                        !prefetch_past_page, \
+                                                        prefetch_serial_squash, \
+                                                        prefetch_cache_check_push, \
+                                                        prefetch_data_accesses_only, \
+                                                        prefetch_latency, \
+                                                        prefetch_degree, \
+                                                        prefetch_use_cpu_id)
+#else
+#define BUILD_STRIDED_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("Stride Prefetcher")
+#endif
+
+#if defined(USE_GHB)
+#define BUILD_GHB_PREFETCHER(t, comp, b) pf = new  \
+                GHBPrefetcher<CacheTags<t, comp>, b>(prefetcher_size, \
+                                                     !prefetch_past_page, \
+                                                     prefetch_serial_squash, \
+                                                     prefetch_cache_check_push, \
+                                                        prefetch_data_accesses_only, \
+                                                     prefetch_latency, \
+                                                     prefetch_degree, \
+                                                     prefetch_use_cpu_id)
+#else
+#define BUILD_GHB_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("GHB Prefetcher")
+#endif
+
+#if defined(USE_TAGGED)
+#define BUILD_NULL_PREFETCHER(t, comp, b) pf = new  \
+                TaggedPrefetcher<CacheTags<t, comp>, b>(prefetcher_size, \
+                                                        !prefetch_past_page, \
+                                                        prefetch_serial_squash, \
+                                                        prefetch_cache_check_push, \
+                                                        prefetch_data_accesses_only, \
+                                                        prefetch_latency, \
+                                                        prefetch_degree)
+#else
+#define BUILD_NULL_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("NULL Prefetcher (uses Tagged)")
+#endif
+
+CREATE_SIM_OBJECT(BaseCache)
+{
+    string name = getInstanceName();
+    int numSets = size / (assoc * block_size);
+    string pf_policy = prefetch_policy;
+    if (subblock_size == 0) {
+        subblock_size = block_size;
+    }
+
+    // Build BaseCache param object
+    BaseCache::Params base_params(addr_range, latency,
+                                  block_size, max_miss_count);
+
+    //Warnings about prefetcher policy
+    if (pf_policy == "none" && (prefetch_miss || prefetch_access)) {
+        panic("With no prefetcher, you shouldn't prefetch from"
+              " either miss or access stream\n");
+    }
+    if ((pf_policy == "tagged" || pf_policy == "stride" ||
+         pf_policy == "ghb") && !(prefetch_miss || prefetch_access)) {
+        warn("With this prefetcher you should chose a prefetch"
+             " stream (miss or access)\nNo Prefetching will occur\n");
+    }
+    if ((pf_policy == "tagged" || pf_policy == "stride" ||
+         pf_policy == "ghb") && prefetch_miss && prefetch_access) {
+        panic("Can't do prefetches from both miss and access"
+              " stream\n");
+    }
+    if (pf_policy != "tagged" && pf_policy != "stride" &&
+        pf_policy != "ghb"    && pf_policy != "none") {
+        panic("Unrecognized form of a prefetcher: %s, try using"
+              "['none','stride','tagged','ghb']\n", pf_policy);
+    }
+
+#if defined(USE_CACHE_IIC)
+    // Build IIC params
+    IIC::Params iic_params;
+    iic_params.size = size;
+    iic_params.numSets = numSets;
+    iic_params.blkSize = block_size;
+    iic_params.assoc = assoc;
+    iic_params.hashDelay = hash_delay;
+    iic_params.hitLatency = latency;
+    iic_params.rp = repl;
+    iic_params.subblockSize = subblock_size;
+#else
+    const void *repl = NULL;
+#endif
+
+    if (mshrs == 1 /*|| out_bus->doEvents() == false*/) {
+        BlockingBuffer *mq = new BlockingBuffer(true);
+        BUILD_COHERENCE(BlockingBuffer);
+    } else {
+        MissQueue *mq = new MissQueue(mshrs, tgts_per_mshr, write_buffers,
+                                      true, prefetch_miss);
+        BUILD_COHERENCE(MissQueue);
+    }
+    return NULL;
+}
+
+REGISTER_SIM_OBJECT("BaseCache", BaseCache)
+
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
new file mode 100644
index 000000000..a447ae3d5
--- /dev/null
+++ b/src/mem/cache/cache_impl.hh
@@ -0,0 +1,660 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Dave Greene
+ *          Nathan Binkert
+ */
+
+/**
+ * @file
+ * Cache definitions.
+ */
+
+#include <assert.h>
+#include <math.h>
+
+#include <cassert>
+#include <iostream>
+#include <string>
+
+#include "sim/host.hh"
+#include "base/misc.hh"
+#include "cpu/smt.hh"
+
+#include "mem/cache/cache.hh"
+#include "mem/cache/cache_blk.hh"
+#include "mem/cache/miss/mshr.hh"
+#include "mem/cache/prefetch/prefetcher.hh"
+
+#include "sim/sim_events.hh" // for SimExitEvent
+
+using namespace std;
+
+template<class TagStore, class Buffering, class Coherence>
+bool
+Cache<TagStore,Buffering,Coherence>::
+doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
+{
+    if (isCpuSide)
+    {
+        access(pkt);
+    }
+    else
+    {
+        if (pkt->isResponse())
+            handleResponse(pkt);
+        else
+            snoop(pkt);
+    }
+    return true; //Deal with blocking....
+}
+
+template<class TagStore, class Buffering, class Coherence>
+Tick
+Cache<TagStore,Buffering,Coherence>::
+doAtomicAccess(Packet *pkt, bool isCpuSide)
+{
+    if (isCpuSide)
+    {
+        probe(pkt, true);
+        //TEMP ALWAYS SUCCES FOR NOW
+        pkt->result = Packet::Success;
+    }
+    else
+    {
+        if (pkt->isResponse())
+            handleResponse(pkt);
+        else
+            snoopProbe(pkt, true);
+    }
+    //Fix this timing info
+    return hitLatency;
+}
+
+template<class TagStore, class Buffering, class Coherence>
+void
+Cache<TagStore,Buffering,Coherence>::
+doFunctionalAccess(Packet *pkt, bool isCpuSide)
+{
+    if (isCpuSide)
+    {
+        //TEMP USE CPU?THREAD 0 0
+        pkt->req->setThreadContext(0,0);
+        probe(pkt, true);
+        //TEMP ALWAYS SUCCESFUL FOR NOW
+        pkt->result = Packet::Success;
+    }
+    else
+    {
+        if (pkt->isResponse())
+            handleResponse(pkt);
+        else
+            snoopProbe(pkt, true);
+    }
+}
+
+template<class TagStore, class Buffering, class Coherence>
+void
+Cache<TagStore,Buffering,Coherence>::
+recvStatusChange(Port::Status status, bool isCpuSide)
+{
+
+}
+
+
+template<class TagStore, class Buffering, class Coherence>
+Cache<TagStore,Buffering,Coherence>::
+Cache(const std::string &_name,
+      Cache<TagStore,Buffering,Coherence>::Params &params)
+    : BaseCache(_name, params.baseParams),
+      prefetchAccess(params.prefetchAccess),
+      tags(params.tags), missQueue(params.missQueue),
+      coherence(params.coherence), prefetcher(params.prefetcher),
+      doCopy(params.doCopy), blockOnCopy(params.blockOnCopy)
+{
+//FIX BUS POINTERS
+//    if (params.in == NULL) {
+        topLevelCache = true;
+//    }
+//PLEASE FIX THIS, BUS SIZES NOT BEING USED
+        tags->setCache(this, blkSize, 1/*params.out->width, params.out->clockRate*/);
+    tags->setPrefetcher(prefetcher);
+    missQueue->setCache(this);
+    missQueue->setPrefetcher(prefetcher);
+    coherence->setCache(this);
+    prefetcher->setCache(this);
+    prefetcher->setTags(tags);
+    prefetcher->setBuffer(missQueue);
+#if 0
+    invalidatePkt = new Packet;
+    invalidatePkt->cmd = Packet::InvalidateReq;
+#endif
+}
+
+template<class TagStore, class Buffering, class Coherence>
+void
+Cache<TagStore,Buffering,Coherence>::regStats()
+{
+    BaseCache::regStats();
+    tags->regStats(name());
+    missQueue->regStats(name());
+    coherence->regStats(name());
+    prefetcher->regStats(name());
+}
+
+template<class TagStore, class Buffering, class Coherence>
+bool
+Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
+{
+//@todo Add back in MemDebug Calls
+//    MemDebug::cacheAccess(pkt);
+    BlkType *blk = NULL;
+    PacketList writebacks;
+    int size = blkSize;
+    int lat = hitLatency;
+    if (prefetchAccess) {
+        //We are determining prefetches on access stream, call prefetcher
+        prefetcher->handleMiss(pkt, curTick);
+    }
+    if (!pkt->req->isUncacheable()) {
+        if (pkt->isInvalidate() && !pkt->isRead()
+            && !pkt->isWrite()) {
+            //Upgrade or Invalidate
+            //Look into what happens if two slave caches on bus
+            DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(),
+                    pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1),
+                    pkt->getAddr() & ~((Addr)blkSize - 1));
+
+            //@todo Should this return latency have the hit latency in it?
+//	    respond(pkt,curTick+lat);
+            pkt->flags |= SATISFIED;
+//            return MA_HIT; //@todo, return values
+            return true;
+        }
+        blk = tags->handleAccess(pkt, lat, writebacks);
+    } else {
+        size = pkt->getSize();
+    }
+    // If this is a block size write/hint (WH64) allocate the block here
+    // if the coherence protocol allows it.
+    /** @todo make the fast write alloc (wh64) work with coherence. */
+    /** @todo Do we want to do fast writes for writebacks as well? */
+    if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
+        (pkt->cmd == Packet::WriteReq || pkt->cmd == Packet::WriteInvalidateReq) ) {
+        // not outstanding misses, can do this
+        MSHR* outstanding_miss = missQueue->findMSHR(pkt->getAddr(), pkt->req->getAsid());
+        if (pkt->cmd == Packet::WriteInvalidateReq || !outstanding_miss) {
+            if (outstanding_miss) {
+                warn("WriteInv doing a fastallocate"
+                     "with an outstanding miss to the same address\n");
+            }
+            blk = tags->handleFill(NULL, pkt, BlkValid | BlkWritable,
+                                   writebacks);
+            ++fastWrites;
+        }
+    }
+    while (!writebacks.empty()) {
+        missQueue->doWriteback(writebacks.front());
+        writebacks.pop_front();
+    }
+    DPRINTF(Cache, "%s %d %x %s blk_addr: %x pc %x\n", pkt->cmdString(),
+            pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss",
+            pkt->getAddr() & ~((Addr)blkSize - 1), pkt->req->getPC());
+    if (blk) {
+        // Hit
+        hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+        // clear dirty bit if write through
+        if (pkt->needsResponse())
+            respond(pkt, curTick+lat);
+//	return MA_HIT;
+        return true;
+    }
+
+    // Miss
+    if (!pkt->req->isUncacheable()) {
+        misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+        /** @todo Move miss count code into BaseCache */
+        if (missCount) {
+            --missCount;
+            if (missCount == 0)
+                new SimLoopExitEvent(curTick, "A cache reached the maximum miss count");
+        }
+    }
+    missQueue->handleMiss(pkt, size, curTick + hitLatency);
+//    return MA_CACHE_MISS;
+    return true;
+}
+
+
+template<class TagStore, class Buffering, class Coherence>
+Packet *
+Cache<TagStore,Buffering,Coherence>::getPacket()
+{
+    Packet * pkt = missQueue->getPacket();
+    if (pkt) {
+        if (!pkt->req->isUncacheable()) {
+            if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][pkt->req->getThreadNum()]++;
+            BlkType *blk = tags->findBlock(pkt);
+            Packet::Command cmd = coherence->getBusCmd(pkt->cmd,
+                                              (blk)? blk->status : 0);
+            missQueue->setBusCmd(pkt, cmd);
+        }
+    }
+
+    assert(!doMasterRequest() || missQueue->havePending());
+    assert(!pkt || pkt->time <= curTick);
+    return pkt;
+}
+
+template<class TagStore, class Buffering, class Coherence>
+void
+Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, bool success)
+{
+    if (success) {
+        missQueue->markInService(pkt);
+          //Temp Hack for UPGRADES
+          if (pkt->cmd == Packet::UpgradeReq) {
+              handleResponse(pkt);
+          }
+    } else if (pkt && !pkt->req->isUncacheable()) {
+        missQueue->restoreOrigCmd(pkt);
+    }
+}
+
+template<class TagStore, class Buffering, class Coherence>
+void
+Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
+{
+    BlkType *blk = NULL;
+    if (pkt->senderState) {
+//	MemDebug::cacheResponse(pkt);
+        DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(),
+                pkt->getAddr() & (((ULL(1))<<48)-1));
+
+        if (pkt->isCacheFill() && !pkt->isNoAllocate()) {
+            blk = tags->findBlock(pkt);
+            CacheBlk::State old_state = (blk) ? blk->status : 0;
+            PacketList writebacks;
+            blk = tags->handleFill(blk, (MSHR*)pkt->senderState,
+                                   coherence->getNewState(pkt,old_state),
+                                   writebacks);
+            while (!writebacks.empty()) {
+                    missQueue->doWriteback(writebacks.front());
+            }
+        }
+        missQueue->handleResponse(pkt, curTick + hitLatency);
+    }
+}
+
+template<class TagStore, class Buffering, class Coherence>
+void
+Cache<TagStore,Buffering,Coherence>::pseudoFill(Addr addr, int asid)
+{
+    // Need to temporarily move this blk into MSHRs
+    MSHR *mshr = missQueue->allocateTargetList(addr, asid);
+    int lat;
+    PacketList dummy;
+    // Read the data into the mshr
+    BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false);
+    assert(dummy.empty());
+    assert(mshr->pkt->flags & SATISFIED);
+    // can overload order since it isn't used on non pending blocks
+    mshr->order = blk->status;
+    // temporarily remove the block from the cache.
+    tags->invalidateBlk(addr, asid);
+}
+
+template<class TagStore, class Buffering, class Coherence>
+void
+Cache<TagStore,Buffering,Coherence>::pseudoFill(MSHR *mshr)
+{
+    // Need to temporarily move this blk into MSHRs
+    assert(mshr->pkt->cmd == Packet::ReadReq);
+    int lat;
+    PacketList dummy;
+    // Read the data into the mshr
+    BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false);
+    assert(dummy.empty());
+    assert(mshr->pkt->flags & SATISFIED);
+    // can overload order since it isn't used on non pending blocks
+    mshr->order = blk->status;
+    // temporarily remove the block from the cache.
+    tags->invalidateBlk(mshr->pkt->getAddr(), mshr->pkt->req->getAsid());
+}
+
+
+template<class TagStore, class Buffering, class Coherence>
+Packet *
+Cache<TagStore,Buffering,Coherence>::getCoherencePacket()
+{
+    return coherence->getPacket();
+}
+
+
+template<class TagStore, class Buffering, class Coherence>
+void
+Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
+{
+
+    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+    BlkType *blk = tags->findBlock(pkt);
+    MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid());
+    if (isTopLevel() && coherence->hasProtocol()) { //@todo Move this into handle bus req
+        //If we find an mshr, and it is in service, we need to NACK or invalidate
+        if (mshr) {
+            if (mshr->inService) {
+                if ((mshr->pkt->isInvalidate() || !mshr->pkt->isCacheFill())
+                    && (pkt->cmd != Packet::InvalidateReq && pkt->cmd != Packet::WriteInvalidateReq)) {
+                    //If the outstanding request was an invalidate (upgrade,readex,..)
+                    //Then we need to ACK the request until we get the data
+                    //Also NACK if the outstanding request is not a cachefill (writeback)
+                    pkt->flags |= NACKED_LINE;
+                    return;
+                }
+                else {
+                    //The supplier will be someone else, because we are waiting for
+                    //the data.  This should cause this cache to be forced to go to
+                    //the shared state, not the exclusive even though the shared line
+                    //won't be asserted.  But for now we will just invlidate ourselves
+                    //and allow the other cache to go into the exclusive state.
+                    //@todo Make it so a read to a pending read doesn't invalidate.
+                    //@todo Make it so that a read to a pending read can't be exclusive now.
+
+                    //Set the address so find match works
+                    invalidatePkt->addrOverride(pkt->getAddr());
+
+                    //Append the invalidate on
+                    missQueue->addTarget(mshr,invalidatePkt);
+                    DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1));
+                    return;
+                }
+            }
+        }
+        //We also need to check the writeback buffers and handle those
+        std::vector<MSHR *> writebacks;
+        if (missQueue->findWrites(blk_addr, pkt->req->getAsid(), writebacks)) {
+            DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1));
+
+            //Look through writebacks for any non-uncachable writes, use that
+            for (int i=0; i<writebacks.size(); i++) {
+                mshr = writebacks[i];
+
+                if (!mshr->pkt->req->isUncacheable()) {
+                    if (pkt->isRead()) {
+                        //Only Upgrades don't get here
+                        //Supply the data
+                        pkt->flags |= SATISFIED;
+
+                        //If we are in an exclusive protocol, make it ask again
+                        //to get write permissions (upgrade), signal shared
+                        pkt->flags |= SHARED_LINE;
+
+                        assert(pkt->isRead());
+                        Addr offset = pkt->getAddr() & ~(blkSize - 1);
+                        assert(offset < blkSize);
+                        assert(pkt->getSize() <= blkSize);
+                        assert(offset + pkt->getSize() <=blkSize);
+                        memcpy(pkt->getPtr<uint8_t>(), mshr->pkt->getPtr<uint8_t>() + offset, pkt->getSize());
+
+                        respondToSnoop(pkt);
+                    }
+
+                    if (pkt->isInvalidate()) {
+                        //This must be an upgrade or other cache will take ownership
+                        missQueue->markInService(mshr->pkt);
+                    }
+                    return;
+                }
+            }
+        }
+    }
+    CacheBlk::State new_state;
+    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
+    if (satisfy) {
+        tags->handleSnoop(blk, new_state, pkt);
+        respondToSnoop(pkt);
+        return;
+    }
+    tags->handleSnoop(blk, new_state);
+}
+
+template<class TagStore, class Buffering, class Coherence>
+void
+Cache<TagStore,Buffering,Coherence>::snoopResponse(Packet * &pkt)
+{
+    //Need to handle the response, if NACKED
+    if (pkt->flags & NACKED_LINE) {
+        //Need to mark it as not in service, and retry for bus
+        assert(0); //Yeah, we saw a NACK come through
+
+        //For now this should never get called, we return false when we see a NACK
+        //instead, by doing this we allow the bus_blocked mechanism to handle the retry
+        //For now it retrys in just 2 cycles, need to figure out how to change that
+        //Eventually we will want to also have success come in as a parameter
+        //Need to make sure that we handle the functionality that happens on successufl
+        //return of the sendAddr function
+    }
+}
+
+template<class TagStore, class Buffering, class Coherence>
+void
+Cache<TagStore,Buffering,Coherence>::invalidateBlk(Addr addr, int asid)
+{
+    tags->invalidateBlk(addr,asid);
+}
+
+
+/**
+ * @todo Fix to not assume write allocate
+ */
+template<class TagStore, class Buffering, class Coherence>
+Tick
+Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
+{
+//    MemDebug::cacheProbe(pkt);
+    if (!pkt->req->isUncacheable()) {
+        if (pkt->isInvalidate() && !pkt->isRead()
+            && !pkt->isWrite()) {
+            //Upgrade or Invalidate, satisfy it, don't forward
+            DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(),
+                    pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1),
+                    pkt->getAddr() & ~((Addr)blkSize - 1));
+            pkt->flags |= SATISFIED;
+            return 0;
+        }
+    }
+
+    PacketList writebacks;
+    int lat;
+    BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update);
+
+    if (!blk) {
+        // Need to check for outstanding misses and writes
+        Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
+
+        // There can only be one matching outstanding miss.
+        MSHR* mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid());
+
+        // There can be many matching outstanding writes.
+        vector<MSHR*> writes;
+        missQueue->findWrites(blk_addr, pkt->req->getAsid(), writes);
+
+        if (!update) {
+            memSidePort->sendFunctional(pkt);
+            // Check for data in MSHR and writebuffer.
+            if (mshr) {
+                warn("Found outstanding miss on an non-update probe");
+                MSHR::TargetList *targets = mshr->getTargetList();
+                MSHR::TargetList::iterator i = targets->begin();
+                MSHR::TargetList::iterator end = targets->end();
+                for (; i != end; ++i) {
+                    Packet * target = *i;
+                    // If the target contains data, and it overlaps the
+                    // probed request, need to update data
+                    if (target->isWrite() && target->intersect(pkt)) {
+                        uint8_t* pkt_data;
+                        uint8_t* write_data;
+                        int data_size;
+                        if (target->getAddr() < pkt->getAddr()) {
+                            int offset = pkt->getAddr() - target->getAddr();
+                            pkt_data = pkt->getPtr<uint8_t>();
+                            write_data = target->getPtr<uint8_t>() + offset;
+                            data_size = target->getSize() - offset;
+                            assert(data_size > 0);
+                            if (data_size > pkt->getSize())
+                                data_size = pkt->getSize();
+                        } else {
+                            int offset = target->getAddr() - pkt->getAddr();
+                            pkt_data = pkt->getPtr<uint8_t>() + offset;
+                            write_data = target->getPtr<uint8_t>();
+                            data_size = pkt->getSize() - offset;
+                            assert(data_size > pkt->getSize());
+                            if (data_size > target->getSize())
+                                data_size = target->getSize();
+                        }
+
+                        if (pkt->isWrite()) {
+                            memcpy(pkt_data, write_data, data_size);
+                        } else {
+                            memcpy(write_data, pkt_data, data_size);
+                        }
+                    }
+                }
+            }
+            for (int i = 0; i < writes.size(); ++i) {
+                Packet * write = writes[i]->pkt;
+                if (write->intersect(pkt)) {
+                    warn("Found outstanding write on an non-update probe");
+                    uint8_t* pkt_data;
+                    uint8_t* write_data;
+                    int data_size;
+                    if (write->getAddr() < pkt->getAddr()) {
+                        int offset = pkt->getAddr() - write->getAddr();
+                        pkt_data = pkt->getPtr<uint8_t>();
+                        write_data = write->getPtr<uint8_t>() + offset;
+                        data_size = write->getSize() - offset;
+                        assert(data_size > 0);
+                        if (data_size > pkt->getSize())
+                            data_size = pkt->getSize();
+                    } else {
+                        int offset = write->getAddr() - pkt->getAddr();
+                        pkt_data = pkt->getPtr<uint8_t>() + offset;
+                        write_data = write->getPtr<uint8_t>();
+                        data_size = pkt->getSize() - offset;
+                        assert(data_size > pkt->getSize());
+                        if (data_size > write->getSize())
+                            data_size = write->getSize();
+                    }
+
+                    if (pkt->isWrite()) {
+                        memcpy(pkt_data, write_data, data_size);
+                    } else {
+                        memcpy(write_data, pkt_data, data_size);
+                    }
+
+                }
+            }
+            return 0;
+        } else {
+            // update the cache state and statistics
+            if (mshr || !writes.empty()){
+                // Can't handle it, return pktuest unsatisfied.
+                return 0;
+            }
+            if (!pkt->req->isUncacheable()) {
+                // Fetch the cache block to fill
+                BlkType *blk = tags->findBlock(pkt);
+                Packet::Command temp_cmd = coherence->getBusCmd(pkt->cmd,
+                                                   (blk)? blk->status : 0);
+
+                Packet * busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize);
+
+                busPkt->allocate();
+
+                busPkt->time = curTick;
+
+                lat = memSidePort->sendAtomic(busPkt);
+
+/*		if (!(busPkt->flags & SATISFIED)) {
+                    // blocked at a higher level, just return
+                    return 0;
+                }
+
+*/		misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+
+                CacheBlk::State old_state = (blk) ? blk->status : 0;
+                tags->handleFill(blk, busPkt,
+                                 coherence->getNewState(busPkt, old_state),
+                                 writebacks, pkt);
+                // Handle writebacks if needed
+                while (!writebacks.empty()){
+                    memSidePort->sendAtomic(writebacks.front());
+                    writebacks.pop_front();
+                }
+                return lat + hitLatency;
+            } else {
+                return memSidePort->sendAtomic(pkt);
+            }
+        }
+    } else {
+        // There was a cache hit.
+        // Handle writebacks if needed
+        while (!writebacks.empty()){
+            memSidePort->sendAtomic(writebacks.front());
+            writebacks.pop_front();
+        }
+
+        if (update) {
+            hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+        } else if (pkt->isWrite()) {
+            // Still need to change data in all locations.
+            return memSidePort->sendAtomic(pkt);
+        }
+        return curTick + lat;
+    }
+    fatal("Probe not handled.\n");
+    return 0;
+}
+
+template<class TagStore, class Buffering, class Coherence>
+Tick
+Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt, bool update)
+{
+    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+    BlkType *blk = tags->findBlock(pkt);
+    MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid());
+    CacheBlk::State new_state = 0;
+    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
+    if (satisfy) {
+        tags->handleSnoop(blk, new_state, pkt);
+        return hitLatency;
+    }
+    tags->handleSnoop(blk, new_state);
+    return 0;
+}
+
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
new file mode 100644
index 000000000..bcf3ce9c5
--- /dev/null
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -0,0 +1,567 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Steve Reinhardt
+ *          Ron Dreslinski
+ */
+
+/**
+ * @file
+ * Definitions of CoherenceProtocol.
+ */
+
+#include <string>
+
+#include "base/misc.hh"
+#include "mem/cache/miss/mshr.hh"
+#include "mem/cache/cache.hh"
+#include "mem/cache/coherence/coherence_protocol.hh"
+#include "sim/builder.hh"
+
+using namespace std;
+
+
+CoherenceProtocol::StateTransition::StateTransition()
+    : busCmd(Packet::InvalidCmd), newState(-1), snoopFunc(invalidTransition)
+{
+}
+
+
+void
+CoherenceProtocol::regStats()
+{
+    // Even though we count all the possible transitions in the
+    // requestCount and snoopCount arrays, most of these are invalid,
+    // so we just select the interesting ones to print here.
+
+    requestCount[Invalid][Packet::ReadReq]
+        .name(name() + ".read_invalid")
+        .desc("read misses to invalid blocks")
+        ;
+
+    requestCount[Invalid][Packet::WriteReq]
+        .name(name() +".write_invalid")
+        .desc("write misses to invalid blocks")
+        ;
+
+    requestCount[Invalid][Packet::SoftPFReq]
+        .name(name() +".swpf_invalid")
+        .desc("soft prefetch misses to invalid blocks")
+        ;
+
+    requestCount[Invalid][Packet::HardPFReq]
+        .name(name() +".hwpf_invalid")
+        .desc("hard prefetch misses to invalid blocks")
+        ;
+
+    requestCount[Shared][Packet::WriteReq]
+        .name(name() + ".write_shared")
+        .desc("write misses to shared blocks")
+        ;
+
+    requestCount[Owned][Packet::WriteReq]
+        .name(name() + ".write_owned")
+        .desc("write misses to owned blocks")
+        ;
+
+    snoopCount[Shared][Packet::ReadReq]
+        .name(name() + ".snoop_read_shared")
+        .desc("read snoops on shared blocks")
+        ;
+
+    snoopCount[Shared][Packet::ReadExReq]
+        .name(name() + ".snoop_readex_shared")
+        .desc("readEx snoops on shared blocks")
+        ;
+
+    snoopCount[Shared][Packet::UpgradeReq]
+        .name(name() + ".snoop_upgrade_shared")
+        .desc("upgradee snoops on shared blocks")
+        ;
+
+    snoopCount[Modified][Packet::ReadReq]
+        .name(name() + ".snoop_read_modified")
+        .desc("read snoops on modified blocks")
+        ;
+
+    snoopCount[Modified][Packet::ReadExReq]
+        .name(name() + ".snoop_readex_modified")
+        .desc("readEx snoops on modified blocks")
+        ;
+
+    snoopCount[Owned][Packet::ReadReq]
+        .name(name() + ".snoop_read_owned")
+        .desc("read snoops on owned blocks")
+        ;
+
+    snoopCount[Owned][Packet::ReadExReq]
+        .name(name() + ".snoop_readex_owned")
+        .desc("readEx snoops on owned blocks")
+        ;
+
+    snoopCount[Owned][Packet::UpgradeReq]
+        .name(name() + ".snoop_upgrade_owned")
+        .desc("upgrade snoops on owned blocks")
+        ;
+
+    snoopCount[Exclusive][Packet::ReadReq]
+        .name(name() + ".snoop_read_exclusive")
+        .desc("read snoops on exclusive blocks")
+        ;
+
+    snoopCount[Exclusive][Packet::ReadExReq]
+        .name(name() + ".snoop_readex_exclusive")
+        .desc("readEx snoops on exclusive blocks")
+        ;
+
+    snoopCount[Shared][Packet::InvalidateReq]
+        .name(name() + ".snoop_inv_shared")
+        .desc("Invalidate snoops on shared blocks")
+        ;
+
+    snoopCount[Owned][Packet::InvalidateReq]
+        .name(name() + ".snoop_inv_owned")
+        .desc("Invalidate snoops on owned blocks")
+        ;
+
+    snoopCount[Exclusive][Packet::InvalidateReq]
+        .name(name() + ".snoop_inv_exclusive")
+        .desc("Invalidate snoops on exclusive blocks")
+        ;
+
+    snoopCount[Modified][Packet::InvalidateReq]
+        .name(name() + ".snoop_inv_modified")
+        .desc("Invalidate snoops on modified blocks")
+        ;
+
+    snoopCount[Invalid][Packet::InvalidateReq]
+        .name(name() + ".snoop_inv_invalid")
+        .desc("Invalidate snoops on invalid blocks")
+        ;
+
+    snoopCount[Shared][Packet::WriteInvalidateReq]
+        .name(name() + ".snoop_writeinv_shared")
+        .desc("WriteInvalidate snoops on shared blocks")
+        ;
+
+    snoopCount[Owned][Packet::WriteInvalidateReq]
+        .name(name() + ".snoop_writeinv_owned")
+        .desc("WriteInvalidate snoops on owned blocks")
+        ;
+
+    snoopCount[Exclusive][Packet::WriteInvalidateReq]
+        .name(name() + ".snoop_writeinv_exclusive")
+        .desc("WriteInvalidate snoops on exclusive blocks")
+        ;
+
+    snoopCount[Modified][Packet::WriteInvalidateReq]
+        .name(name() + ".snoop_writeinv_modified")
+        .desc("WriteInvalidate snoops on modified blocks")
+        ;
+
+    snoopCount[Invalid][Packet::WriteInvalidateReq]
+        .name(name() + ".snoop_writeinv_invalid")
+        .desc("WriteInvalidate snoops on invalid blocks")
+        ;
+}
+
+
+bool
+CoherenceProtocol::invalidateTrans(BaseCache *cache, Packet * &pkt,
+                                   CacheBlk *blk, MSHR *mshr,
+                                   CacheBlk::State & new_state)
+{
+    // invalidate the block
+    new_state = (blk->status & ~stateMask) | Invalid;
+    return false;
+}
+
+
+bool
+CoherenceProtocol::supplyTrans(BaseCache *cache, Packet * &pkt,
+                               CacheBlk *blk,
+                               MSHR *mshr,
+                               CacheBlk::State & new_state
+                               )
+{
+    return true;
+}
+
+
+bool
+CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, Packet * &pkt,
+                                            CacheBlk *blk,
+                                            MSHR *mshr,
+                                            CacheBlk::State & new_state)
+{
+    new_state = (blk->status & ~stateMask) | Shared;
+    pkt->flags |= SHARED_LINE;
+    return supplyTrans(cache, pkt, blk, mshr, new_state);
+}
+
+
+bool
+CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, Packet * &pkt,
+                                           CacheBlk *blk,
+                                           MSHR *mshr,
+                                           CacheBlk::State & new_state)
+{
+    new_state = (blk->status & ~stateMask) | Owned;
+    pkt->flags |= SHARED_LINE;
+    return supplyTrans(cache, pkt, blk, mshr, new_state);
+}
+
+
+bool
+CoherenceProtocol::supplyAndInvalidateTrans(BaseCache *cache, Packet * &pkt,
+                                            CacheBlk *blk,
+                                            MSHR *mshr,
+                                            CacheBlk::State & new_state)
+{
+    new_state = (blk->status & ~stateMask) | Invalid;
+    return supplyTrans(cache, pkt, blk, mshr, new_state);
+}
+
+bool
+CoherenceProtocol::assertShared(BaseCache *cache, Packet * &pkt,
+                                            CacheBlk *blk,
+                                            MSHR *mshr,
+                                            CacheBlk::State & new_state)
+{
+    new_state = (blk->status & ~stateMask) | Shared;
+    pkt->flags |= SHARED_LINE;
+    return false;
+}
+
+CoherenceProtocol::CoherenceProtocol(const string &name,
+                                     const string &protocol,
+                                     const bool doUpgrades)
+    : SimObject(name)
+{
+    if ((protocol == "mosi" || protocol == "moesi") && !doUpgrades) {
+        cerr << "CoherenceProtocol: ownership protocols require upgrade transactions"
+             << "(write miss on owned block generates ReadExcl, which will clobber dirty block)"
+             << endl;
+        fatal("");
+    }
+
+    Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq;
+    Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeResp : Packet::ReadExResp;
+
+//@todo add in hardware prefetch to this list
+    if (protocol == "msi") {
+        // incoming requests: specify outgoing bus request
+        transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq);
+        transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq);
+        transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd);
+        //Prefetching causes a read
+        transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq);
+        transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq);
+
+        // on response to given request: specify new state
+        transitionTable[Invalid][Packet::ReadResp].onResponse(Shared);
+        transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified);
+        transitionTable[Shared][writeToSharedResp].onResponse(Modified);
+
+        // bus snoop transition functions
+        transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition);
+        transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition);
+        transitionTable[Shared][Packet::ReadReq].onSnoop(nullTransition);
+        transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
+        transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans);
+        //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv)
+        transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+
+        if (doUpgrades) {
+            transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition);
+            transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans);
+        }
+    }
+
+    else if(protocol == "mesi") {
+        // incoming requests: specify outgoing bus request
+        transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq);
+        transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq);
+        transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd);
+        //Prefetching causes a read
+        transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq);
+        transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq);
+
+        // on response to given request: specify new state
+        transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive);
+        //It will move into shared if the shared line is asserted in the
+        //getNewState function
+        transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified);
+        transitionTable[Shared][writeToSharedResp].onResponse(Modified);
+
+        // bus snoop transition functions
+        transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition);
+        transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition);
+        transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared);
+        transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans);
+        transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared);
+        transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
+        transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans);
+        //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv)
+        transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+
+        if (doUpgrades) {
+            transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition);
+            transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans);
+        }
+    }
+
+    else if(protocol == "mosi") {
+        // incoming requests: specify outgoing bus request
+        transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq);
+        transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq);
+        transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd);
+        transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd);
+        //Prefetching causes a read
+        transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq);
+        transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq);
+
+        // on response to given request: specify new state
+        transitionTable[Invalid][Packet::ReadResp].onResponse(Shared);
+        transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified);
+        transitionTable[Shared][writeToSharedResp].onResponse(Modified);
+        transitionTable[Owned][writeToSharedResp].onResponse(Modified);
+
+        // bus snoop transition functions
+        transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition);
+        transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition);
+        transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition);
+        transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared);
+        transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans);
+        transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
+        transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
+        transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
+        transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
+        transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans);
+        //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv)
+        transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+    }
+
+    else if(protocol == "moesi") {
+        // incoming requests: specify outgoing bus request
+        transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq);
+        transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq);
+        transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd);
+        transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd);
+        //Prefetching causes a read
+        transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq);
+        transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq);
+
+        // on response to given request: specify new state
+        transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive);
+        //It will move into shared if the shared line is asserted in the
+        //getNewState function
+        transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified);
+        transitionTable[Shared][writeToSharedResp].onResponse(Modified);
+        transitionTable[Owned][writeToSharedResp].onResponse(Modified);
+
+        // bus snoop transition functions
+        transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition);
+        transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition);
+        transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition);
+        transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared);
+        transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans);
+        transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans);
+        transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared);
+        transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
+        transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
+        transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
+        transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans);
+        transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans);
+        //Transitions on seeing a DMA (writeInv(samelevel) or DMAInv)
+        transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+        transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans);
+    }
+
+    else {
+        cerr << "CoherenceProtocol: unrecognized protocol " << protocol
+             <<  endl;
+        fatal("");
+    }
+}
+
+
+Packet::Command
+CoherenceProtocol::getBusCmd(Packet::Command cmdIn, CacheBlk::State state,
+                             MSHR *mshr)
+{
+    state &= stateMask;
+    int cmd_idx = (int) cmdIn;
+
+    assert(0 <= state && state <= stateMax);
+    assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS);
+
+    Packet::Command cmdOut = transitionTable[state][cmd_idx].busCmd;
+
+    assert(cmdOut != Packet::InvalidCmd);
+
+    ++requestCount[state][cmd_idx];
+
+    return cmdOut;
+}
+
+
+CacheBlk::State
+CoherenceProtocol::getNewState(Packet * &pkt, CacheBlk::State oldState)
+{
+    CacheBlk::State state = oldState & stateMask;
+    int cmd_idx = pkt->cmdToIndex();
+
+    assert(0 <= state && state <= stateMax);
+    assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS);
+
+    CacheBlk::State newState = transitionTable[state][cmd_idx].newState;
+
+    //Check if it's exclusive and the shared line was asserted,
+    //then  goto shared instead
+    if (newState == Exclusive && (pkt->flags & SHARED_LINE)) {
+        newState = Shared;
+    }
+
+    assert(newState != -1);
+
+    //Make sure not to loose any other state information
+    newState = (oldState & ~stateMask) | newState;
+    return newState;
+}
+
+
+bool
+CoherenceProtocol::handleBusRequest(BaseCache *cache, Packet * &pkt,
+                                    CacheBlk *blk,
+                                    MSHR *mshr,
+                                    CacheBlk::State & new_state)
+{
+    if (blk == NULL) {
+        // nothing to do if we don't have a block
+        return false;
+    }
+
+    CacheBlk::State state = blk->status & stateMask;
+    int cmd_idx = pkt->cmdToIndex();
+
+    assert(0 <= state && state <= stateMax);
+    assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS);
+
+//    assert(mshr == NULL); // can't currently handle outstanding requests
+    //Check first if MSHR, and also insure, if there is one, that it is not in service
+    assert(!mshr || mshr->inService == 0);
+    ++snoopCount[state][cmd_idx];
+
+    bool ret = transitionTable[state][cmd_idx].snoopFunc(cache, pkt, blk, mshr,
+                                                     new_state);
+
+
+
+    return ret;
+}
+
+bool
+CoherenceProtocol::nullTransition(BaseCache *cache, Packet * &pkt,
+                                  CacheBlk *blk, MSHR *mshr,
+                                  CacheBlk::State & new_state)
+{
+    // do nothing
+    if (blk)
+        new_state = blk->status;
+    return false;
+}
+
+
+bool
+CoherenceProtocol::invalidTransition(BaseCache *cache, Packet * &pkt,
+                                     CacheBlk *blk, MSHR *mshr,
+                                     CacheBlk::State & new_state)
+{
+    panic("Invalid transition");
+    return false;
+}
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol)
+
+    Param<string> protocol;
+    Param<bool> do_upgrades;
+
+END_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol)
+
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol)
+
+    INIT_PARAM(protocol, "name of coherence protocol"),
+    INIT_PARAM_DFLT(do_upgrades, "use upgrade transactions?", true)
+
+END_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol)
+
+
+CREATE_SIM_OBJECT(CoherenceProtocol)
+{
+    return new CoherenceProtocol(getInstanceName(), protocol,
+                                 do_upgrades);
+}
+
+REGISTER_SIM_OBJECT("CoherenceProtocol", CoherenceProtocol)
+
+#endif // DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh
new file mode 100644
index 000000000..21351ace4
--- /dev/null
+++ b/src/mem/cache/coherence/coherence_protocol.hh
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Ron Dreslinski
+ *          Steve Reinhardt
+ */
+
+/**
+ * @file
+ * Declaration of CoherenceProcotol a basic coherence policy.
+ */
+#ifndef __COHERENCE_PROTOCOL_HH__
+#define __COHERENCE_PROTOCOL_HH__
+
+#include <string>
+
+#include "sim/sim_object.hh"
+#include "mem/packet.hh"
+#include "mem/cache/cache_blk.hh"
+#include "base/statistics.hh"
+
+class BaseCache;
+class MSHR;
+
+/**
+ * A simple coherence policy for the memory hierarchy. Currently implements
+ * MSI, MESI, and MOESI protocols.
+ */
+class CoherenceProtocol : public SimObject
+{
+  public:
+    /**
+     * Contruct and initialize this policy.
+     * @param name The name of this policy.
+     * @param protocol The string representation of the protocol to use.
+     * @param doUpgrades True if bus upgrades should be used.
+     */
+    CoherenceProtocol(const std::string &name, const std::string &protocol,
+                      const bool doUpgrades);
+
+    /**
+     * Destructor.
+     */
+    virtual ~CoherenceProtocol() {};
+
+    /**
+     * Register statistics
+     */
+    virtual void regStats();
+
+    /**
+     * Get the proper bus command for the given command and status.
+     * @param cmd The request's command.
+     * @param status The current state of the cache block.
+     * @param mshr The MSHR matching the request.
+     * @return The proper bus command, as determined by the protocol.
+     */
+    Packet::Command getBusCmd(Packet::Command cmd, CacheBlk::State status,
+                     MSHR *mshr = NULL);
+
+    /**
+     * Return the proper state given the current state and the bus response.
+     * @param req The bus response.
+     * @param oldState The current block state.
+     * @return The new state.
+     */
+    CacheBlk::State getNewState(Packet * &pkt,
+                                CacheBlk::State oldState);
+
+    /**
+     * Handle snooped bus requests.
+     * @param cache The cache that snooped the request.
+     * @param req The snooped bus request.
+     * @param blk The cache block corresponding to the request, if any.
+     * @param mshr The MSHR corresponding to the request, if any.
+     * @param new_state The new coherence state of the block.
+     * @return True if the request should be satisfied locally.
+     */
+    bool handleBusRequest(BaseCache *cache, Packet * &pkt, CacheBlk *blk,
+                          MSHR *mshr, CacheBlk::State &new_state);
+
+  protected:
+    /** Snoop function type. */
+    typedef bool (*SnoopFuncType)(BaseCache *, Packet *&, CacheBlk *,
+                                  MSHR *, CacheBlk::State&);
+
+    //
+    // Standard snoop transition functions
+    //
+
+    /**
+     * Do nothing transition.
+     */
+    static bool nullTransition(BaseCache *, Packet *&, CacheBlk *,
+                               MSHR *, CacheBlk::State&);
+
+    /**
+     * Invalid transition, basically panic.
+     */
+    static bool invalidTransition(BaseCache *, Packet *&, CacheBlk *,
+                                  MSHR *, CacheBlk::State&);
+
+    /**
+     * Invalidate block, move to Invalid state.
+     */
+    static bool invalidateTrans(BaseCache *, Packet *&, CacheBlk *,
+                                MSHR *, CacheBlk::State&);
+
+    /**
+     * Supply data, no state transition.
+     */
+    static bool supplyTrans(BaseCache *, Packet *&, CacheBlk *,
+                            MSHR *, CacheBlk::State&);
+
+    /**
+     * Supply data and go to Shared state.
+     */
+    static bool supplyAndGotoSharedTrans(BaseCache *, Packet *&, CacheBlk *,
+                                         MSHR *, CacheBlk::State&);
+
+    /**
+     * Supply data and go to Owned state.
+     */
+    static bool supplyAndGotoOwnedTrans(BaseCache *, Packet *&, CacheBlk *,
+                                        MSHR *, CacheBlk::State&);
+
+    /**
+     * Invalidate block, supply data, and go to Invalid state.
+     */
+    static bool supplyAndInvalidateTrans(BaseCache *, Packet *&, CacheBlk *,
+                                         MSHR *, CacheBlk::State&);
+
+    /**
+     * Assert the shared line for a block that is shared/exclusive.
+     */
+    static bool assertShared(BaseCache *, Packet *&, CacheBlk *,
+                                         MSHR *, CacheBlk::State&);
+
+    /**
+     * Definition of protocol state transitions.
+     */
+    class StateTransition
+    {
+        friend class CoherenceProtocol;
+
+        /** The bus command of this transition. */
+        Packet::Command busCmd;
+        /** The state to transition to. */
+        int newState;
+        /** The snoop function for this transition. */
+        SnoopFuncType snoopFunc;
+
+        /**
+         * Constructor, defaults to invalid transition.
+         */
+        StateTransition();
+
+        /**
+         * Initialize bus command.
+         * @param cmd The bus command to use.
+         */
+        void onRequest(Packet::Command cmd)
+        {
+            busCmd = cmd;
+        }
+
+        /**
+         * Set the transition state.
+         * @param s The new state.
+         */
+        void onResponse(CacheBlk::State s)
+        {
+            newState = s;
+        }
+
+        /**
+         * Initialize the snoop function.
+         * @param f The new snoop function.
+         */
+        void onSnoop(SnoopFuncType f)
+        {
+            snoopFunc = f;
+        }
+    };
+
+    friend class CoherenceProtocol::StateTransition;
+
+    /** Mask to select status bits relevant to coherence protocol. */
+    const static CacheBlk::State
+        stateMask = BlkValid | BlkWritable | BlkDirty;
+
+    /** The Modified (M) state. */
+    const static CacheBlk::State
+        Modified = BlkValid | BlkWritable | BlkDirty;
+    /** The Owned (O) state. */
+    const static CacheBlk::State
+        Owned = BlkValid | BlkDirty;
+    /** The Exclusive (E) state. */
+    const static CacheBlk::State
+        Exclusive = BlkValid | BlkWritable;
+    /** The Shared (S) state. */
+    const static CacheBlk::State
+        Shared = BlkValid;
+    /** The Invalid (I) state. */
+    const static CacheBlk::State
+        Invalid = 0;
+
+    /**
+     * Maximum state encoding value (used to size transition lookup
+     * table).  Could be more than number of states, depends on
+     * encoding of status bits.
+     */
+    const static int stateMax = stateMask;
+
+    /**
+     * The table of all possible transitions, organized by starting state and
+     * request command.
+     */
+    StateTransition transitionTable[stateMax+1][NUM_MEM_CMDS];
+
+    /**
+     * @addtogroup CoherenceStatistics
+     * @{
+     */
+    /**
+     * State accesses from parent cache.
+     */
+    Stats::Scalar<> requestCount[stateMax+1][NUM_MEM_CMDS];
+    /**
+     * State accesses from snooped requests.
+     */
+    Stats::Scalar<> snoopCount[stateMax+1][NUM_MEM_CMDS];
+    /**
+     * @}
+     */
+};
+
+#endif // __COHERENCE_PROTOCOL_HH__
diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh
new file mode 100644
index 000000000..ca9d18beb
--- /dev/null
+++ b/src/mem/cache/coherence/simple_coherence.hh
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Ron Dreslinski
+ */
+
+/**
+ * @file
+ * Declaration of a simple coherence policy.
+ */
+
+#ifndef __SIMPLE_COHERENCE_HH__
+#define __SIMPLE_COHERENCE_HH__
+
+#include <string>
+
+#include "mem/packet.hh"
+#include "mem/cache/cache_blk.hh"
+#include "mem/cache/miss/mshr_queue.hh"
+#include "mem/cache/coherence/coherence_protocol.hh"
+
+class BaseCache;
+
+/**
+ * A simple MP coherence policy. This policy assumes an atomic bus and only one
+ * level of cache.
+ */
+class SimpleCoherence
+{
+  protected:
+    /** Pointer to the parent cache. */
+    BaseCache *cache;
+    /** Pointer to the coherence protocol. */
+    CoherenceProtocol *protocol;
+
+  public:
+    /**
+     * Construct and initialize this coherence policy.
+     * @param _protocol The coherence protocol to use.
+     */
+    SimpleCoherence(CoherenceProtocol *_protocol)
+        : protocol(_protocol)
+    {
+    }
+
+    /**
+     * Set the pointer to the parent cache.
+     * @param _cache The parent cache.
+     */
+    void setCache(BaseCache *_cache)
+    {
+        cache = _cache;
+    }
+
+    /**
+     * Register statistics.
+     * @param name The name to prepend to stat descriptions.
+     */
+    void regStats(const std::string &name)
+    {
+    }
+
+    /**
+     * This policy does not forward invalidates, return NULL.
+     * @return NULL.
+     */
+    Packet * getPacket()
+    {
+        return NULL;
+    }
+
+    /**
+     * Return the proper state given the current state and the bus response.
+     * @param req The bus response.
+     * @param current The current block state.
+     * @return The new state.
+     */
+    CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State current)
+    {
+        return protocol->getNewState(pkt, current);
+    }
+
+    /**
+     * Handle snooped bus requests.
+     * @param req The snooped bus request.
+     * @param blk The cache block corresponding to the request, if any.
+     * @param mshr The MSHR corresponding to the request, if any.
+     * @param new_state Return the new state for the block.
+     */
+    bool handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr,
+                          CacheBlk::State &new_state)
+    {
+//	assert(mshr == NULL);
+//Got rid of, there could be an MSHR, but it can't be in service
+        if (blk != NULL)
+        {
+            if (pkt->cmd != Packet::Writeback) {
+                return protocol->handleBusRequest(cache, pkt, blk, mshr,
+                                              new_state);
+            }
+            else { //It is a writeback, must be ownership protocol, just keep state
+                new_state = blk->status;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Get the proper bus command for the given command and status.
+     * @param cmd The request's command.
+     * @param state The current state of the cache block.
+     * @return The proper bus command, as determined by the protocol.
+     */
+    Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state)
+    {
+        if (cmd == Packet::Writeback) return Packet::Writeback;
+        return protocol->getBusCmd(cmd, state);
+    }
+
+    /**
+     * Return true if this coherence policy can handle fast cache writes.
+     */
+    bool allowFastWrites() { return false; }
+
+    bool hasProtocol() { return true; }
+};
+
+#endif //__SIMPLE_COHERENCE_HH__
+
+
+
+
+
+
+
+
diff --git a/src/mem/cache/coherence/uni_coherence.cc b/src/mem/cache/coherence/uni_coherence.cc
new file mode 100644
index 000000000..5ab706269
--- /dev/null
+++ b/src/mem/cache/coherence/uni_coherence.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+#include "mem/cache/coherence/uni_coherence.hh"
+#include "mem/cache/base_cache.hh"
+
+#include "base/trace.hh"
+
+using namespace std;
+
+UniCoherence::UniCoherence()
+    : cshrs(50)
+{
+}
+
+Packet *
+UniCoherence::getPacket()
+{
+    bool unblock = cshrs.isFull();
+    Packet* pkt = cshrs.getReq();
+    cshrs.markInService((MSHR*)pkt->senderState);
+    if (!cshrs.havePending()) {
+        cache->clearSlaveRequest(Request_Coherence);
+    }
+    if (unblock) {
+        //since CSHRs are always used as buffers, should always get rid of one
+        assert(!cshrs.isFull());
+        cache->clearBlocked(Blocked_Coherence);
+    }
+    return pkt;
+}
+
+/**
+ * @todo add support for returning slave requests, not doing them here.
+ */
+bool
+UniCoherence::handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr,
+                               CacheBlk::State &new_state)
+{
+    new_state = 0;
+    if (pkt->isInvalidate()) {
+        DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n",
+                pkt->getAddr(), blk);
+        if (!cache->isTopLevel()) {
+            // Forward to other caches
+            Packet * tmp = new Packet(pkt->req, Packet::InvalidateReq, -1);
+            cshrs.allocate(tmp);
+            cache->setSlaveRequest(Request_Coherence, curTick);
+            if (cshrs.isFull()) {
+                cache->setBlockedForSnoop(Blocked_Coherence);
+            }
+        }
+    } else {
+        if (blk) {
+            new_state = blk->status;
+        }
+    }
+    return false;
+}
diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh
new file mode 100644
index 000000000..764bf6276
--- /dev/null
+++ b/src/mem/cache/coherence/uni_coherence.hh
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+#ifndef __UNI_COHERENCE_HH__
+#define __UNI_COHERENCE_HH__
+
+#include "base/trace.hh"
+#include "base/misc.hh"
+#include "mem/cache/cache_blk.hh"
+#include "mem/cache/miss/mshr_queue.hh"
+#include "mem/packet.hh"
+
+class BaseCache;
+
+class UniCoherence
+{
+  protected:
+    /** Buffers to hold forwarded invalidates. */
+    MSHRQueue cshrs;
+    /** Pointer to the parent cache. */
+    BaseCache *cache;
+
+  public:
+    /**
+     * Construct and initialize this coherence policy.
+     */
+    UniCoherence();
+
+    /**
+     * Set the pointer to the parent cache.
+     * @param _cache The parent cache.
+     */
+    void setCache(BaseCache *_cache)
+    {
+        cache = _cache;
+    }
+
+    /**
+     * Register statistics.
+     * @param name The name to prepend to stat descriptions.
+     */
+    void regStats(const std::string &name)
+    {
+    }
+
+    /**
+     * Return Read.
+     * @param cmd The request's command.
+     * @param state The current state of the cache block.
+     * @return The proper bus command, as determined by the protocol.
+     * @todo Make changes so writebacks don't get here.
+     */
+    Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state)
+    {
+        if (cmd == Packet::HardPFReq && state)
+            warn("Trying to issue a prefetch to a block we already have\n");
+        if (cmd == Packet::Writeback)
+            return Packet::Writeback;
+        return Packet::ReadReq;
+    }
+
+    /**
+     * Just return readable and writeable.
+     * @param req The bus response.
+     * @param current The current block state.
+     * @return The new state.
+     */
+    CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State current)
+    {
+        if (pkt->senderState) //Blocking Buffers don't get mshrs
+        {
+            if (((MSHR *)(pkt->senderState))->originalCmd == Packet::HardPFReq) {
+                DPRINTF(HWPrefetch, "Marking a hardware prefetch as such in the state\n");
+                return BlkHWPrefetched | BlkValid | BlkWritable;
+            }
+            else {
+                return BlkValid | BlkWritable;
+            }
+        }
+        //@todo What about prefetching with blocking buffers
+        else
+            return BlkValid | BlkWritable;
+    }
+    /**
+     * Return outstanding invalidate to forward.
+     * @return The next invalidate to forward to lower levels of cache.
+     */
+    Packet * getPacket();
+
+    /**
+     * Handle snooped bus requests.
+     * @param req The snooped bus request.
+     * @param blk The cache block corresponding to the request, if any.
+     * @param mshr The MSHR corresponding to the request, if any.
+     * @param new_state The new coherence state of the block.
+     * @return True if the request should be satisfied locally.
+     */
+    bool handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr,
+                          CacheBlk::State &new_state);
+
+    /**
+     * Return true if this coherence policy can handle fast cache writes.
+     */
+    bool allowFastWrites() { return true; }
+
+    bool hasProtocol() { return false; }
+};
+
+#endif //__UNI_COHERENCE_HH__
diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc
new file mode 100644
index 000000000..10d53b109
--- /dev/null
+++ b/src/mem/cache/miss/blocking_buffer.cc
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Definitions of a simple buffer for a blocking cache.
+ */
+
+#include "cpu/smt.hh" //for maxThreadsPerCPU
+#include "mem/cache/base_cache.hh"
+#include "mem/cache/miss/blocking_buffer.hh"
+#include "mem/cache/prefetch/base_prefetcher.hh"
+#include "sim/eventq.hh" // for Event declaration.
+#include "mem/request.hh"
+
+using namespace TheISA;
+
+/**
+ * @todo Move writebacks into shared BaseBuffer class.
+ */
+void
+BlockingBuffer::regStats(const std::string &name)
+{
+    using namespace Stats;
+    writebacks
+        .init(maxThreadsPerCPU)
+        .name(name + ".writebacks")
+        .desc("number of writebacks")
+        .flags(total)
+        ;
+}
+
+void
+BlockingBuffer::setCache(BaseCache *_cache)
+{
+    cache = _cache;
+    blkSize = cache->getBlockSize();
+}
+
+void
+BlockingBuffer::setPrefetcher(BasePrefetcher *_prefetcher)
+{
+    prefetcher = _prefetcher;
+}
+void
+BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time)
+{
+    Addr blk_addr = pkt->getAddr() & ~(Addr)(blk_size - 1);
+    if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate ||
+                               !pkt->needsResponse())) {
+        if (!pkt->needsResponse()) {
+            wb.allocateAsBuffer(pkt);
+        } else {
+            wb.allocate(pkt->cmd, blk_addr, pkt->req->getAsid(), blk_size, pkt);
+        }
+
+        memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), blk_size);
+
+        cache->setBlocked(Blocked_NoWBBuffers);
+        cache->setMasterRequest(Request_WB, time);
+        return;
+    }
+
+    if (!pkt->needsResponse()) {
+        miss.allocateAsBuffer(pkt);
+    } else {
+        miss.allocate(pkt->cmd, blk_addr, pkt->req->getAsid(), blk_size, pkt);
+    }
+    if (!pkt->req->isUncacheable()) {
+        miss.pkt->flags |= CACHE_LINE_FILL;
+    }
+    cache->setBlocked(Blocked_NoMSHRs);
+    cache->setMasterRequest(Request_MSHR, time);
+}
+
+Packet *
+BlockingBuffer::getPacket()
+{
+    if (miss.pkt && !miss.inService) {
+        return miss.pkt;
+    }
+    return wb.pkt;
+}
+
+void
+BlockingBuffer::setBusCmd(Packet * &pkt, Packet::Command cmd)
+{
+    MSHR *mshr = (MSHR*) pkt->senderState;
+    mshr->originalCmd = pkt->cmd;
+    if (pkt->isCacheFill())
+        pkt->cmdOverride(cmd);
+}
+
+void
+BlockingBuffer::restoreOrigCmd(Packet * &pkt)
+{
+    pkt->cmdOverride(((MSHR*)(pkt->senderState))->originalCmd);
+}
+
+void
+BlockingBuffer::markInService(Packet * &pkt)
+{
+    if (!pkt->isCacheFill() && pkt->isWrite()) {
+        // Forwarding a write/ writeback, don't need to change
+        // the command
+        assert((MSHR*)pkt->senderState == &wb);
+        cache->clearMasterRequest(Request_WB);
+        if (!pkt->needsResponse()) {
+            assert(wb.getNumTargets() == 0);
+            wb.deallocate();
+            cache->clearBlocked(Blocked_NoWBBuffers);
+        } else {
+            wb.inService = true;
+        }
+    } else {
+        assert((MSHR*)pkt->senderState == &miss);
+        cache->clearMasterRequest(Request_MSHR);
+        if (!pkt->needsResponse()) {
+            assert(miss.getNumTargets() == 0);
+            miss.deallocate();
+            cache->clearBlocked(Blocked_NoMSHRs);
+        } else {
+            //mark in service
+            miss.inService = true;
+        }
+    }
+}
+
+void
+BlockingBuffer::handleResponse(Packet * &pkt, Tick time)
+{
+    if (pkt->isCacheFill()) {
+        // targets were handled in the cache tags
+        assert((MSHR*)pkt->senderState == &miss);
+        miss.deallocate();
+        cache->clearBlocked(Blocked_NoMSHRs);
+    } else {
+        if (((MSHR*)(pkt->senderState))->hasTargets()) {
+            // Should only have 1 target if we had any
+            assert(((MSHR*)(pkt->senderState))->getNumTargets() == 1);
+            Packet * target = ((MSHR*)(pkt->senderState))->getTarget();
+            ((MSHR*)(pkt->senderState))->popTarget();
+            if (pkt->isRead()) {
+                memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), target->getSize());
+            }
+            cache->respond(target, time);
+            assert(!((MSHR*)(pkt->senderState))->hasTargets());
+        }
+
+        if (pkt->isWrite()) {
+            assert(((MSHR*)(pkt->senderState)) == &wb);
+            wb.deallocate();
+            cache->clearBlocked(Blocked_NoWBBuffers);
+        } else {
+            miss.deallocate();
+            cache->clearBlocked(Blocked_NoMSHRs);
+        }
+    }
+}
+
+void
+BlockingBuffer::squash(int threadNum)
+{
+    if (miss.threadNum == threadNum) {
+        Packet * target = miss.getTarget();
+        miss.popTarget();
+        assert(target->req->getThreadNum() == threadNum);
+        target = NULL;
+        assert(!miss.hasTargets());
+        miss.ntargets=0;
+        if (!miss.inService) {
+            miss.deallocate();
+            cache->clearBlocked(Blocked_NoMSHRs);
+            cache->clearMasterRequest(Request_MSHR);
+        }
+    }
+}
+
+void
+BlockingBuffer::doWriteback(Addr addr, int asid,
+                            int size, uint8_t *data, bool compressed)
+{
+    // Generate request
+    Request * req = new Request(addr, size, 0);
+    Packet * pkt = new Packet(req, Packet::Writeback, -1);
+    pkt->allocate();
+    if (data) {
+        memcpy(pkt->getPtr<uint8_t>(), data, size);
+    }
+
+    if (compressed) {
+        pkt->flags |= COMPRESSED;
+    }
+
+    ///All writebacks charged to same thread @todo figure this out
+    writebacks[pkt->req->getThreadNum()]++;
+
+    wb.allocateAsBuffer(pkt);
+    cache->setMasterRequest(Request_WB, curTick);
+    cache->setBlocked(Blocked_NoWBBuffers);
+}
+
+
+
+void
+BlockingBuffer::doWriteback(Packet * &pkt)
+{
+    writebacks[pkt->req->getThreadNum()]++;
+
+    wb.allocateAsBuffer(pkt);
+
+    // Since allocate as buffer copies the request,
+    // need to copy data here.
+        memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
+
+    cache->setBlocked(Blocked_NoWBBuffers);
+    cache->setMasterRequest(Request_WB, curTick);
+}
diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh
new file mode 100644
index 000000000..39a06a377
--- /dev/null
+++ b/src/mem/cache/miss/blocking_buffer.hh
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Declaration of a simple buffer for a blocking cache.
+ */
+
+#ifndef __BLOCKING_BUFFER_HH__
+#define __BLOCKING_BUFFER_HH__
+
+#include <vector>
+
+#include "mem/cache/miss/mshr.hh"
+#include "base/statistics.hh"
+
+class BaseCache;
+class BasePrefetcher;
+
+/**
+ * Miss and writeback storage for a blocking cache.
+ */
+class BlockingBuffer
+{
+protected:
+    /** Miss storage. */
+    MSHR miss;
+    /** WB storage. */
+    MSHR wb;
+
+    //Params
+
+    /** Allocate on write misses. */
+    const bool writeAllocate;
+
+    /** Pointer to the parent cache. */
+    BaseCache* cache;
+
+    BasePrefetcher* prefetcher;
+
+    /** Block size of the parent cache. */
+    int blkSize;
+
+    // Statistics
+    /**
+     * @addtogroup CacheStatistics
+     * @{
+     */
+    /** Number of blocks written back per thread. */
+    Stats::Vector<> writebacks;
+
+    /**
+     * @}
+     */
+
+public:
+    /**
+     * Builds and initializes this buffer.
+     * @param write_allocate If true, treat write misses the same as reads.
+     */
+    BlockingBuffer(bool write_allocate)
+        : writeAllocate(write_allocate)
+    {
+    }
+
+    /**
+     * Register statistics for this object.
+     * @param name The name of the parent cache.
+     */
+    void regStats(const std::string &name);
+
+    /**
+     * Called by the parent cache to set the back pointer.
+     * @param _cache A pointer to the parent cache.
+     */
+    void setCache(BaseCache *_cache);
+
+    void setPrefetcher(BasePrefetcher *_prefetcher);
+
+    /**
+     * Handle a cache miss properly. Requests the bus and marks the cache as
+     * blocked.
+     * @param req The request that missed in the cache.
+     * @param blk_size The block size of the cache.
+     * @param time The time the miss is detected.
+     */
+    void handleMiss(Packet * &pkt, int blk_size, Tick time);
+
+    /**
+     * Fetch the block for the given address and buffer the given target.
+     * @param addr The address to fetch.
+     * @param asid The address space of the address.
+     * @param blk_size The block size of the cache.
+     * @param time The time the miss is detected.
+     * @param target The target for the fetch.
+     */
+    MSHR* fetchBlock(Addr addr, int asid, int blk_size, Tick time,
+                     Packet * &target)
+    {
+        fatal("Unimplemented");
+    }
+
+    /**
+     * Selects a outstanding request to service.
+     * @return The request to service, NULL if none found.
+     */
+    Packet * getPacket();
+
+    /**
+     * Set the command to the given bus command.
+     * @param req The request to update.
+     * @param cmd The bus command to use.
+     */
+    void setBusCmd(Packet * &pkt, Packet::Command cmd);
+
+    /**
+     * Restore the original command in case of a bus transmission error.
+     * @param req The request to reset.
+     */
+    void restoreOrigCmd(Packet * &pkt);
+
+    /**
+     * Marks a request as in service (sent on the bus). This can have side
+     * effect since storage for no response commands is deallocated once they
+     * are successfully sent.
+     * @param req The request that was sent on the bus.
+     */
+    void markInService(Packet * &pkt);
+
+    /**
+     * Frees the resources of the request and unblock the cache.
+     * @param req The request that has been satisfied.
+     * @param time The time when the request is satisfied.
+     */
+    void handleResponse(Packet * &pkt, Tick time);
+
+    /**
+     * Removes all outstanding requests for a given thread number. If a request
+     * has been sent to the bus, this function removes all of its targets.
+     * @param req->getThreadNum()ber The thread number of the requests to squash.
+     */
+    void squash(int threadNum);
+
+    /**
+     * Return the current number of outstanding misses.
+     * @return the number of outstanding misses.
+     */
+    int getMisses()
+    {
+        return miss.getNumTargets();
+    }
+
+    /**
+     * Searches for the supplied address in the miss "queue".
+     * @param addr The address to look for.
+     * @param asid The address space id.
+     * @return A pointer to miss if it matches.
+     */
+    MSHR* findMSHR(Addr addr, int asid)
+    {
+        if (miss.addr == addr && miss.pkt)
+            return &miss;
+        return NULL;
+    }
+
+    /**
+     * Searches for the supplied address in the write buffer.
+     * @param addr The address to look for.
+     * @param asid The address space id.
+     * @param writes List of pointers to the matching writes.
+     * @return True if there is a matching write.
+     */
+    bool findWrites(Addr addr, int asid, std::vector<MSHR*>& writes)
+    {
+        if (wb.addr == addr && wb.pkt) {
+            writes.push_back(&wb);
+            return true;
+        }
+        return false;
+    }
+
+
+
+    /**
+     * Perform a writeback of dirty data to the given address.
+     * @param addr The address to write to.
+     * @param asid The address space id.
+     * @param size The number of bytes to write.
+     * @param data The data to write, can be NULL.
+     * @param compressed True if the data is compressed.
+     */
+    void doWriteback(Addr addr, int asid,
+                     int size, uint8_t *data, bool compressed);
+
+    /**
+     * Perform a writeback request.
+     * @param req The writeback request.
+     */
+    void doWriteback(Packet * &pkt);
+
+    /**
+     * Returns true if there are outstanding requests.
+     * @return True if there are outstanding requests.
+     */
+    bool havePending()
+    {
+        return !miss.inService || !wb.inService;
+    }
+
+    /**
+     * Add a target to the given MSHR. This assumes it is in the miss queue.
+     * @param mshr The mshr to add a target to.
+     * @param req The target to add.
+     */
+    void addTarget(MSHR *mshr, Packet * &pkt)
+    {
+        fatal("Shouldn't call this on a blocking buffer.");
+    }
+
+    /**
+     * Dummy implmentation.
+     */
+    MSHR* allocateTargetList(Addr addr, int asid)
+    {
+        fatal("Unimplemented");
+    }
+};
+
+#endif // __BLOCKING_BUFFER_HH__
diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc
new file mode 100644
index 000000000..4a3dc1062
--- /dev/null
+++ b/src/mem/cache/miss/miss_queue.cc
@@ -0,0 +1,757 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Ron Dreslinski
+ */
+
+/**
+ * @file
+ * Miss and writeback queue definitions.
+ */
+
+#include "cpu/smt.hh" //for maxThreadsPerCPU
+#include "mem/cache/base_cache.hh"
+#include "mem/cache/miss/miss_queue.hh"
+#include "mem/cache/prefetch/base_prefetcher.hh"
+
+using namespace std;
+
+// simple constructor
+/**
+ * @todo Remove the +16 from the write buffer constructor once we handle
+ * stalling on writebacks do to compression writes.
+ */
+MissQueue::MissQueue(int numMSHRs, int numTargets, int write_buffers,
+                     bool write_allocate, bool prefetch_miss)
+    : mq(numMSHRs, 4), wb(write_buffers,numMSHRs+1000), numMSHR(numMSHRs),
+      numTarget(numTargets), writeBuffers(write_buffers),
+      writeAllocate(write_allocate), order(0), prefetchMiss(prefetch_miss)
+{
+    noTargetMSHR = NULL;
+}
+
+void
+MissQueue::regStats(const string &name)
+{
+    Request temp_req((Addr) NULL, 4, 0);
+    Packet::Command temp_cmd = Packet::ReadReq;
+    Packet temp_pkt(&temp_req, temp_cmd, 0);  //@todo FIx command strings so this isn't neccessary
+    temp_pkt.allocate();
+
+    using namespace Stats;
+
+    writebacks
+        .init(maxThreadsPerCPU)
+        .name(name + ".writebacks")
+        .desc("number of writebacks")
+        .flags(total)
+        ;
+
+    // MSHR hit statistics
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        mshr_hits[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name + "." + cstr + "_mshr_hits")
+            .desc("number of " + cstr + " MSHR hits")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandMshrHits
+        .name(name + ".demand_mshr_hits")
+        .desc("number of demand (read+write) MSHR hits")
+        .flags(total)
+        ;
+    demandMshrHits = mshr_hits[Packet::ReadReq] + mshr_hits[Packet::WriteReq];
+
+    overallMshrHits
+        .name(name + ".overall_mshr_hits")
+        .desc("number of overall MSHR hits")
+        .flags(total)
+        ;
+    overallMshrHits = demandMshrHits + mshr_hits[Packet::SoftPFReq] +
+        mshr_hits[Packet::HardPFReq];
+
+    // MSHR miss statistics
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        mshr_misses[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name + "." + cstr + "_mshr_misses")
+            .desc("number of " + cstr + " MSHR misses")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandMshrMisses
+        .name(name + ".demand_mshr_misses")
+        .desc("number of demand (read+write) MSHR misses")
+        .flags(total)
+        ;
+    demandMshrMisses = mshr_misses[Packet::ReadReq] + mshr_misses[Packet::WriteReq];
+
+    overallMshrMisses
+        .name(name + ".overall_mshr_misses")
+        .desc("number of overall MSHR misses")
+        .flags(total)
+        ;
+    overallMshrMisses = demandMshrMisses + mshr_misses[Packet::SoftPFReq] +
+        mshr_misses[Packet::HardPFReq];
+
+    // MSHR miss latency statistics
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        mshr_miss_latency[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name + "." + cstr + "_mshr_miss_latency")
+            .desc("number of " + cstr + " MSHR miss cycles")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandMshrMissLatency
+        .name(name + ".demand_mshr_miss_latency")
+        .desc("number of demand (read+write) MSHR miss cycles")
+        .flags(total)
+        ;
+    demandMshrMissLatency = mshr_miss_latency[Packet::ReadReq]
+        + mshr_miss_latency[Packet::WriteReq];
+
+    overallMshrMissLatency
+        .name(name + ".overall_mshr_miss_latency")
+        .desc("number of overall MSHR miss cycles")
+        .flags(total)
+        ;
+    overallMshrMissLatency = demandMshrMissLatency +
+        mshr_miss_latency[Packet::SoftPFReq] + mshr_miss_latency[Packet::HardPFReq];
+
+    // MSHR uncacheable statistics
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        mshr_uncacheable[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name + "." + cstr + "_mshr_uncacheable")
+            .desc("number of " + cstr + " MSHR uncacheable")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    overallMshrUncacheable
+        .name(name + ".overall_mshr_uncacheable_misses")
+        .desc("number of overall MSHR uncacheable misses")
+        .flags(total)
+        ;
+    overallMshrUncacheable = mshr_uncacheable[Packet::ReadReq]
+        + mshr_uncacheable[Packet::WriteReq] + mshr_uncacheable[Packet::SoftPFReq]
+        + mshr_uncacheable[Packet::HardPFReq];
+
+    // MSHR miss latency statistics
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        mshr_uncacheable_lat[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name + "." + cstr + "_mshr_uncacheable_latency")
+            .desc("number of " + cstr + " MSHR uncacheable cycles")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    overallMshrUncacheableLatency
+        .name(name + ".overall_mshr_uncacheable_latency")
+        .desc("number of overall MSHR uncacheable cycles")
+        .flags(total)
+        ;
+    overallMshrUncacheableLatency = mshr_uncacheable_lat[Packet::ReadReq]
+        + mshr_uncacheable_lat[Packet::WriteReq]
+        + mshr_uncacheable_lat[Packet::SoftPFReq]
+        + mshr_uncacheable_lat[Packet::HardPFReq];
+
+#if 0
+    // MSHR access formulas
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        mshrAccesses[access_idx]
+            .name(name + "." + cstr + "_mshr_accesses")
+            .desc("number of " + cstr + " mshr accesses(hits+misses)")
+            .flags(total | nozero | nonan)
+            ;
+        mshrAccesses[access_idx] =
+            mshr_hits[access_idx] + mshr_misses[access_idx]
+            + mshr_uncacheable[access_idx];
+    }
+
+    demandMshrAccesses
+        .name(name + ".demand_mshr_accesses")
+        .desc("number of demand (read+write) mshr accesses")
+        .flags(total | nozero | nonan)
+        ;
+    demandMshrAccesses = demandMshrHits + demandMshrMisses;
+
+    overallMshrAccesses
+        .name(name + ".overall_mshr_accesses")
+        .desc("number of overall (read+write) mshr accesses")
+        .flags(total | nozero | nonan)
+        ;
+    overallMshrAccesses = overallMshrHits + overallMshrMisses
+        + overallMshrUncacheable;
+#endif
+
+    // MSHR miss rate formulas
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        mshrMissRate[access_idx]
+            .name(name + "." + cstr + "_mshr_miss_rate")
+            .desc("mshr miss rate for " + cstr + " accesses")
+            .flags(total | nozero | nonan)
+            ;
+
+        mshrMissRate[access_idx] =
+            mshr_misses[access_idx] / cache->accesses[access_idx];
+    }
+
+    demandMshrMissRate
+        .name(name + ".demand_mshr_miss_rate")
+        .desc("mshr miss rate for demand accesses")
+        .flags(total)
+        ;
+    demandMshrMissRate = demandMshrMisses / cache->demandAccesses;
+
+    overallMshrMissRate
+        .name(name + ".overall_mshr_miss_rate")
+        .desc("mshr miss rate for overall accesses")
+        .flags(total)
+        ;
+    overallMshrMissRate = overallMshrMisses / cache->overallAccesses;
+
+    // mshrMiss latency formulas
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        avgMshrMissLatency[access_idx]
+            .name(name + "." + cstr + "_avg_mshr_miss_latency")
+            .desc("average " + cstr + " mshr miss latency")
+            .flags(total | nozero | nonan)
+            ;
+
+        avgMshrMissLatency[access_idx] =
+            mshr_miss_latency[access_idx] / mshr_misses[access_idx];
+    }
+
+    demandAvgMshrMissLatency
+        .name(name + ".demand_avg_mshr_miss_latency")
+        .desc("average overall mshr miss latency")
+        .flags(total)
+        ;
+    demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses;
+
+    overallAvgMshrMissLatency
+        .name(name + ".overall_avg_mshr_miss_latency")
+        .desc("average overall mshr miss latency")
+        .flags(total)
+        ;
+    overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses;
+
+    // mshrUncacheable latency formulas
+    for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) {
+        Packet::Command cmd = (Packet::Command)access_idx;
+        const string &cstr = temp_pkt.cmdIdxToString(cmd);
+
+        avgMshrUncacheableLatency[access_idx]
+            .name(name + "." + cstr + "_avg_mshr_uncacheable_latency")
+            .desc("average " + cstr + " mshr uncacheable latency")
+            .flags(total | nozero | nonan)
+            ;
+
+        avgMshrUncacheableLatency[access_idx] =
+            mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx];
+    }
+
+    overallAvgMshrUncacheableLatency
+        .name(name + ".overall_avg_mshr_uncacheable_latency")
+        .desc("average overall mshr uncacheable latency")
+        .flags(total)
+        ;
+    overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable;
+
+    mshr_cap_events
+        .init(maxThreadsPerCPU)
+        .name(name + ".mshr_cap_events")
+        .desc("number of times MSHR cap was activated")
+        .flags(total)
+        ;
+
+    //software prefetching stats
+    soft_prefetch_mshr_full
+        .init(maxThreadsPerCPU)
+        .name(name + ".soft_prefetch_mshr_full")
+        .desc("number of mshr full events for SW prefetching instrutions")
+        .flags(total)
+        ;
+
+    mshr_no_allocate_misses
+        .name(name +".no_allocate_misses")
+        .desc("Number of misses that were no-allocate")
+        ;
+
+}
+
+void
+MissQueue::setCache(BaseCache *_cache)
+{
+    cache = _cache;
+    blkSize = cache->getBlockSize();
+}
+
+void
+MissQueue::setPrefetcher(BasePrefetcher *_prefetcher)
+{
+    prefetcher = _prefetcher;
+}
+
+MSHR*
+MissQueue::allocateMiss(Packet * &pkt, int size, Tick time)
+{
+    MSHR* mshr = mq.allocate(pkt, blkSize);
+    mshr->order = order++;
+    if (!pkt->req->isUncacheable() ){//&& !pkt->isNoAllocate()) {
+        // Mark this as a cache line fill
+        mshr->pkt->flags |= CACHE_LINE_FILL;
+    }
+    if (mq.isFull()) {
+        cache->setBlocked(Blocked_NoMSHRs);
+    }
+    if (pkt->cmd != Packet::HardPFReq) {
+        //If we need to request the bus (not on HW prefetch), do so
+        cache->setMasterRequest(Request_MSHR, time);
+    }
+    return mshr;
+}
+
+
+MSHR*
+MissQueue::allocateWrite(Packet * &pkt, int size, Tick time)
+{
+    MSHR* mshr = wb.allocate(pkt,blkSize);
+    mshr->order = order++;
+
+//REMOVING COMPRESSION FOR NOW
+#if 0
+    if (pkt->isCompressed()) {
+        mshr->pkt->deleteData();
+        mshr->pkt->actualSize = pkt->actualSize;
+        mshr->pkt->data = new uint8_t[pkt->actualSize];
+        memcpy(mshr->pkt->data, pkt->data, pkt->actualSize);
+    } else {
+#endif
+        memcpy(mshr->pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
+  //{
+
+    if (wb.isFull()) {
+        cache->setBlocked(Blocked_NoWBBuffers);
+    }
+
+    cache->setMasterRequest(Request_WB, time);
+
+    return mshr;
+}
+
+
+/**
+ * @todo Remove SW prefetches on mshr hits.
+ */
+void
+MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time)
+{
+//    if (!cache->isTopLevel())
+    if (prefetchMiss) prefetcher->handleMiss(pkt, time);
+
+    int size = blkSize;
+    Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);
+    MSHR* mshr = NULL;
+    if (!pkt->req->isUncacheable()) {
+        mshr = mq.findMatch(blkAddr, pkt->req->getAsid());
+        if (mshr) {
+            //@todo remove hw_pf here
+            mshr_hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+            if (mshr->threadNum != pkt->req->getThreadNum()) {
+                mshr->threadNum = -1;
+            }
+            mq.allocateTarget(mshr, pkt);
+            if (mshr->pkt->isNoAllocate() && !pkt->isNoAllocate()) {
+                //We are adding an allocate after a no-allocate
+                mshr->pkt->flags &= ~NO_ALLOCATE;
+            }
+            if (mshr->getNumTargets() == numTarget) {
+                noTargetMSHR = mshr;
+                cache->setBlocked(Blocked_NoTargets);
+                mq.moveToFront(mshr);
+            }
+            return;
+        }
+        if (pkt->isNoAllocate()) {
+            //Count no-allocate requests differently
+            mshr_no_allocate_misses++;
+        }
+        else {
+            mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+        }
+    } else {
+        //Count uncacheable accesses
+        mshr_uncacheable[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+        size = pkt->getSize();
+    }
+    if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate ||
+                               !pkt->needsResponse())) {
+        /**
+         * @todo Add write merging here.
+         */
+        mshr = allocateWrite(pkt, blkSize, time);
+        return;
+    }
+
+    mshr = allocateMiss(pkt, blkSize, time);
+}
+
+MSHR*
+MissQueue::fetchBlock(Addr addr, int asid, int blk_size, Tick time,
+                      Packet * &target)
+{
+    Addr blkAddr = addr & ~(Addr)(blk_size - 1);
+    assert(mq.findMatch(addr, asid) == NULL);
+    MSHR *mshr = mq.allocateFetch(blkAddr, asid, blk_size, target);
+    mshr->order = order++;
+    mshr->pkt->flags |= CACHE_LINE_FILL;
+    if (mq.isFull()) {
+        cache->setBlocked(Blocked_NoMSHRs);
+    }
+    cache->setMasterRequest(Request_MSHR, time);
+    return mshr;
+}
+
+Packet *
+MissQueue::getPacket()
+{
+    Packet * pkt = mq.getReq();
+    if (((wb.isFull() && wb.inServiceMSHRs == 0) || !pkt ||
+         pkt->time > curTick) && wb.havePending()) {
+        pkt = wb.getReq();
+        // Need to search for earlier miss.
+        MSHR *mshr = mq.findPending(pkt);
+        if (mshr && mshr->order < ((MSHR*)(pkt->senderState))->order) {
+            // Service misses in order until conflict is cleared.
+            return mq.getReq();
+        }
+    }
+    if (pkt) {
+        MSHR* mshr = wb.findPending(pkt);
+        if (mshr /*&& mshr->order < pkt->senderState->order*/) {
+            // The only way this happens is if we are
+            // doing a write and we didn't have permissions
+            // then subsequently saw a writeback(owned got evicted)
+            // We need to make sure to perform the writeback first
+            // To preserve the dirty data, then we can issue the write
+            return wb.getReq();
+        }
+    }
+    else if (!mq.isFull()){
+        //If we have a miss queue slot, we can try a prefetch
+        pkt = prefetcher->getPacket();
+        if (pkt) {
+            //Update statistic on number of prefetches issued (hwpf_mshr_misses)
+            mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+            //It will request the bus for the future, but should clear that immedieatley
+            allocateMiss(pkt, pkt->getSize(), curTick);
+            pkt = mq.getReq();
+            assert(pkt); //We should get back a req b/c we just put one in
+        }
+    }
+    return pkt;
+}
+
+void
+MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd)
+{
+    assert(pkt->senderState != 0);
+    MSHR * mshr = (MSHR*)pkt->senderState;
+    mshr->originalCmd = pkt->cmd;
+    if (pkt->isCacheFill() || pkt->isNoAllocate())
+        pkt->cmd = cmd;
+}
+
+void
+MissQueue::restoreOrigCmd(Packet * &pkt)
+{
+    pkt->cmd = ((MSHR*)(pkt->senderState))->originalCmd;
+}
+
+void
+MissQueue::markInService(Packet * &pkt)
+{
+    assert(pkt->senderState != 0);
+    bool unblock = false;
+    BlockedCause cause = NUM_BLOCKED_CAUSES;
+
+    /**
+     * @todo Should include MSHRQueue pointer in MSHR to select the correct
+     * one.
+     */
+    if ((!pkt->isCacheFill() && pkt->isWrite())) {
+        // Forwarding a write/ writeback, don't need to change
+        // the command
+        unblock = wb.isFull();
+        wb.markInService((MSHR*)pkt->senderState);
+        if (!wb.havePending()){
+            cache->clearMasterRequest(Request_WB);
+        }
+        if (unblock) {
+            // Do we really unblock?
+            unblock = !wb.isFull();
+            cause = Blocked_NoWBBuffers;
+        }
+    } else {
+        unblock = mq.isFull();
+        mq.markInService((MSHR*)pkt->senderState);
+        if (!mq.havePending()){
+            cache->clearMasterRequest(Request_MSHR);
+        }
+        if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) {
+            DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
+                    cache->name());
+            //Also clear pending if need be
+            if (!prefetcher->havePending())
+            {
+                cache->clearMasterRequest(Request_PF);
+            }
+        }
+        if (unblock) {
+            unblock = !mq.isFull();
+            cause = Blocked_NoMSHRs;
+        }
+    }
+    if (unblock) {
+        cache->clearBlocked(cause);
+    }
+}
+
+
+void
+MissQueue::handleResponse(Packet * &pkt, Tick time)
+{
+    MSHR* mshr = (MSHR*)pkt->senderState;
+    if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) {
+        DPRINTF(HWPrefetch, "%s:Handling the response to a HW_PF\n",
+                cache->name());
+    }
+#ifndef NDEBUG
+    int num_targets = mshr->getNumTargets();
+#endif
+
+    bool unblock = false;
+    bool unblock_target = false;
+    BlockedCause cause = NUM_BLOCKED_CAUSES;
+
+    if (pkt->isCacheFill() && !pkt->isNoAllocate()) {
+        mshr_miss_latency[mshr->originalCmd][pkt->req->getThreadNum()] +=
+            curTick - pkt->time;
+        // targets were handled in the cache tags
+        if (mshr == noTargetMSHR) {
+            // we always clear at least one target
+            unblock_target = true;
+            cause = Blocked_NoTargets;
+            noTargetMSHR = NULL;
+        }
+
+        if (mshr->hasTargets()) {
+            // Didn't satisfy all the targets, need to resend
+            Packet::Command cmd = mshr->getTarget()->cmd;
+            mq.markPending(mshr, cmd);
+            mshr->order = order++;
+            cache->setMasterRequest(Request_MSHR, time);
+        }
+        else {
+            unblock = mq.isFull();
+            mq.deallocate(mshr);
+            if (unblock) {
+                unblock = !mq.isFull();
+                cause = Blocked_NoMSHRs;
+            }
+        }
+    } else {
+        if (pkt->req->isUncacheable()) {
+            mshr_uncacheable_lat[pkt->cmd][pkt->req->getThreadNum()] +=
+                curTick - pkt->time;
+        }
+        if (mshr->hasTargets() && pkt->req->isUncacheable()) {
+            // Should only have 1 target if we had any
+            assert(num_targets == 1);
+            Packet * target = mshr->getTarget();
+            mshr->popTarget();
+            if (pkt->isRead()) {
+                memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(),
+                       target->getSize());
+            }
+            cache->respond(target, time);
+            assert(!mshr->hasTargets());
+        }
+        else if (mshr->hasTargets()) {
+            //Must be a no_allocate with possibly more than one target
+            assert(mshr->pkt->isNoAllocate());
+            while (mshr->hasTargets()) {
+                Packet * target = mshr->getTarget();
+                mshr->popTarget();
+                if (pkt->isRead()) {
+                    memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(),
+                           target->getSize());
+                }
+                cache->respond(target, time);
+            }
+        }
+
+        if (pkt->isWrite()) {
+            // If the wrtie buffer is full, we might unblock now
+            unblock = wb.isFull();
+            wb.deallocate(mshr);
+            if (unblock) {
+                // Did we really unblock?
+                unblock = !wb.isFull();
+                cause = Blocked_NoWBBuffers;
+            }
+        } else {
+            unblock = mq.isFull();
+            mq.deallocate(mshr);
+            if (unblock) {
+                unblock = !mq.isFull();
+                cause = Blocked_NoMSHRs;
+            }
+        }
+    }
+    if (unblock || unblock_target) {
+        cache->clearBlocked(cause);
+    }
+}
+
+void
+MissQueue::squash(int threadNum)
+{
+    bool unblock = false;
+    BlockedCause cause = NUM_BLOCKED_CAUSES;
+
+    if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) {
+        noTargetMSHR = NULL;
+        unblock = true;
+        cause = Blocked_NoTargets;
+    }
+    if (mq.isFull()) {
+        unblock = true;
+        cause = Blocked_NoMSHRs;
+    }
+    mq.squash(threadNum);
+    if (!mq.havePending()) {
+        cache->clearMasterRequest(Request_MSHR);
+    }
+    if (unblock && !mq.isFull()) {
+        cache->clearBlocked(cause);
+    }
+
+}
+
+MSHR*
+MissQueue::findMSHR(Addr addr, int asid) const
+{
+    return mq.findMatch(addr,asid);
+}
+
+bool
+MissQueue::findWrites(Addr addr, int asid, vector<MSHR*> &writes) const
+{
+    return wb.findMatches(addr,asid,writes);
+}
+
+void
+MissQueue::doWriteback(Addr addr, int asid,
+                       int size, uint8_t *data, bool compressed)
+{
+    // Generate request
+    Request * req = new Request(addr, size, 0);
+    Packet * pkt = new Packet(req, Packet::Writeback, -1);
+    pkt->allocate();
+    if (data) {
+        memcpy(pkt->getPtr<uint8_t>(), data, size);
+    }
+
+    if (compressed) {
+        pkt->flags |= COMPRESSED;
+    }
+
+    ///All writebacks charged to same thread @todo figure this out
+    writebacks[pkt->req->getThreadNum()]++;
+
+    allocateWrite(pkt, 0, curTick);
+}
+
+
+void
+MissQueue::doWriteback(Packet * &pkt)
+{
+    writebacks[pkt->req->getThreadNum()]++;
+    allocateWrite(pkt, 0, curTick);
+}
+
+
+MSHR*
+MissQueue::allocateTargetList(Addr addr, int asid)
+{
+   MSHR* mshr = mq.allocateTargetList(addr, asid, blkSize);
+   mshr->pkt->flags |= CACHE_LINE_FILL;
+   if (mq.isFull()) {
+       cache->setBlocked(Blocked_NoMSHRs);
+   }
+   return mshr;
+}
+
+bool
+MissQueue::havePending()
+{
+    return mq.havePending() || wb.havePending() || prefetcher->havePending();
+}
diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh
new file mode 100644
index 000000000..b88b7038c
--- /dev/null
+++ b/src/mem/cache/miss/miss_queue.hh
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Miss and writeback queue declarations.
+ */
+
+#ifndef __MISS_QUEUE_HH__
+#define __MISS_QUEUE_HH__
+
+#include <vector>
+
+#include "mem/cache/miss/mshr.hh"
+#include "mem/cache/miss/mshr_queue.hh"
+#include "base/statistics.hh"
+
+class BaseCache;
+class BasePrefetcher;
+/**
+ * Manages cache misses and writebacks. Contains MSHRs to store miss data
+ * and the writebuffer for writes/writebacks.
+ * @todo need to handle data on writes better (encapsulate).
+ * @todo need to make replacements/writebacks happen in Cache::access
+ */
+class MissQueue
+{
+  protected:
+    /** The MSHRs. */
+    MSHRQueue mq;
+    /** Write Buffer. */
+    MSHRQueue wb;
+
+    // PARAMTERS
+
+    /** The number of MSHRs in the miss queue. */
+    const int numMSHR;
+    /** The number of targets for each MSHR. */
+    const int numTarget;
+    /** The number of write buffers. */
+    const int writeBuffers;
+    /** True if the cache should allocate on a write miss. */
+    const bool writeAllocate;
+    /** Pointer to the parent cache. */
+    BaseCache* cache;
+
+    /** The Prefetcher */
+    BasePrefetcher *prefetcher;
+
+    /** The block size of the parent cache. */
+    int blkSize;
+
+    /** Increasing order number assigned to each incoming request. */
+    uint64_t order;
+
+    bool prefetchMiss;
+
+    // Statistics
+    /**
+     * @addtogroup CacheStatistics
+     * @{
+     */
+    /** Number of blocks written back per thread. */
+    Stats::Vector<> writebacks;
+
+    /** Number of misses that hit in the MSHRs per command and thread. */
+    Stats::Vector<> mshr_hits[NUM_MEM_CMDS];
+    /** Demand misses that hit in the MSHRs. */
+    Stats::Formula demandMshrHits;
+    /** Total number of misses that hit in the MSHRs. */
+    Stats::Formula overallMshrHits;
+
+    /** Number of misses that miss in the MSHRs, per command and thread. */
+    Stats::Vector<> mshr_misses[NUM_MEM_CMDS];
+    /** Demand misses that miss in the MSHRs. */
+    Stats::Formula demandMshrMisses;
+    /** Total number of misses that miss in the MSHRs. */
+    Stats::Formula overallMshrMisses;
+
+    /** Number of misses that miss in the MSHRs, per command and thread. */
+    Stats::Vector<> mshr_uncacheable[NUM_MEM_CMDS];
+    /** Total number of misses that miss in the MSHRs. */
+    Stats::Formula overallMshrUncacheable;
+
+    /** Total cycle latency of each MSHR miss, per command and thread. */
+    Stats::Vector<> mshr_miss_latency[NUM_MEM_CMDS];
+    /** Total cycle latency of demand MSHR misses. */
+    Stats::Formula demandMshrMissLatency;
+    /** Total cycle latency of overall MSHR misses. */
+    Stats::Formula overallMshrMissLatency;
+
+    /** Total cycle latency of each MSHR miss, per command and thread. */
+    Stats::Vector<> mshr_uncacheable_lat[NUM_MEM_CMDS];
+    /** Total cycle latency of overall MSHR misses. */
+    Stats::Formula overallMshrUncacheableLatency;
+
+    /** The total number of MSHR accesses per command and thread. */
+    Stats::Formula mshrAccesses[NUM_MEM_CMDS];
+    /** The total number of demand MSHR accesses. */
+    Stats::Formula demandMshrAccesses;
+    /** The total number of MSHR accesses. */
+    Stats::Formula overallMshrAccesses;
+
+    /** The miss rate in the MSHRs pre command and thread. */
+    Stats::Formula mshrMissRate[NUM_MEM_CMDS];
+    /** The demand miss rate in the MSHRs. */
+    Stats::Formula demandMshrMissRate;
+    /** The overall miss rate in the MSHRs. */
+    Stats::Formula overallMshrMissRate;
+
+    /** The average latency of an MSHR miss, per command and thread. */
+    Stats::Formula avgMshrMissLatency[NUM_MEM_CMDS];
+    /** The average latency of a demand MSHR miss. */
+    Stats::Formula demandAvgMshrMissLatency;
+    /** The average overall latency of an MSHR miss. */
+    Stats::Formula overallAvgMshrMissLatency;
+
+    /** The average latency of an MSHR miss, per command and thread. */
+    Stats::Formula avgMshrUncacheableLatency[NUM_MEM_CMDS];
+    /** The average overall latency of an MSHR miss. */
+    Stats::Formula overallAvgMshrUncacheableLatency;
+
+    /** The number of times a thread hit its MSHR cap. */
+    Stats::Vector<> mshr_cap_events;
+    /** The number of times software prefetches caused the MSHR to block. */
+    Stats::Vector<> soft_prefetch_mshr_full;
+
+    Stats::Scalar<> mshr_no_allocate_misses;
+
+    /**
+     * @}
+     */
+
+  private:
+    /** Pointer to the MSHR that has no targets. */
+    MSHR* noTargetMSHR;
+
+    /**
+     * Allocate a new MSHR to handle the provided miss.
+     * @param req The miss to buffer.
+     * @param size The number of bytes to fetch.
+     * @param time The time the miss occurs.
+     * @return A pointer to the new MSHR.
+     */
+    MSHR* allocateMiss(Packet * &pkt, int size, Tick time);
+
+    /**
+     * Allocate a new WriteBuffer to handle the provided write.
+     * @param req The write to handle.
+     * @param size The number of bytes to write.
+     * @param time The time the write occurs.
+     * @return A pointer to the new write buffer.
+     */
+    MSHR* allocateWrite(Packet * &pkt, int size, Tick time);
+
+  public:
+    /**
+     * Simple Constructor. Initializes all needed internal storage and sets
+     * parameters.
+     * @param numMSHRs The number of outstanding misses to handle.
+     * @param numTargets The number of outstanding targets to each miss.
+     * @param write_buffers The number of outstanding writes to handle.
+     * @param write_allocate If true, treat write misses the same as reads.
+     */
+    MissQueue(int numMSHRs, int numTargets, int write_buffers,
+              bool write_allocate, bool prefetch_miss);
+
+    /**
+     * Deletes all allocated internal storage.
+     */
+    ~MissQueue();
+
+    /**
+     * Register statistics for this object.
+     * @param name The name of the parent cache.
+     */
+    void regStats(const std::string &name);
+
+    /**
+     * Called by the parent cache to set the back pointer.
+     * @param _cache A pointer to the parent cache.
+     */
+    void setCache(BaseCache *_cache);
+
+    void setPrefetcher(BasePrefetcher *_prefetcher);
+
+    /**
+     * Handle a cache miss properly. Either allocate an MSHR for the request,
+     * or forward it through the write buffer.
+     * @param req The request that missed in the cache.
+     * @param blk_size The block size of the cache.
+     * @param time The time the miss is detected.
+     */
+    void handleMiss(Packet * &pkt, int blk_size, Tick time);
+
+    /**
+     * Fetch the block for the given address and buffer the given target.
+     * @param addr The address to fetch.
+     * @param asid The address space of the address.
+     * @param blk_size The block size of the cache.
+     * @param time The time the miss is detected.
+     * @param target The target for the fetch.
+     */
+    MSHR* fetchBlock(Addr addr, int asid, int blk_size, Tick time,
+                     Packet * &target);
+
+    /**
+     * Selects a outstanding request to service.
+     * @return The request to service, NULL if none found.
+     */
+    Packet * getPacket();
+
+    /**
+     * Set the command to the given bus command.
+     * @param req The request to update.
+     * @param cmd The bus command to use.
+     */
+    void setBusCmd(Packet * &pkt, Packet::Command cmd);
+
+    /**
+     * Restore the original command in case of a bus transmission error.
+     * @param req The request to reset.
+     */
+    void restoreOrigCmd(Packet * &pkt);
+
+    /**
+     * Marks a request as in service (sent on the bus). This can have side
+     * effect since storage for no response commands is deallocated once they
+     * are successfully sent.
+     * @param req The request that was sent on the bus.
+     */
+    void markInService(Packet * &pkt);
+
+    /**
+     * Collect statistics and free resources of a satisfied request.
+     * @param req The request that has been satisfied.
+     * @param time The time when the request is satisfied.
+     */
+    void handleResponse(Packet * &pkt, Tick time);
+
+    /**
+     * Removes all outstanding requests for a given thread number. If a request
+     * has been sent to the bus, this function removes all of its targets.
+     * @param req->getThreadNum()ber The thread number of the requests to squash.
+     */
+    void squash(int threadNum);
+
+    /**
+     * Return the current number of outstanding misses.
+     * @return the number of outstanding misses.
+     */
+    int getMisses()
+    {
+        return mq.getAllocatedTargets();
+    }
+
+    /**
+     * Searches for the supplied address in the miss queue.
+     * @param addr The address to look for.
+     * @param asid The address space id.
+     * @return The MSHR that contains the address, NULL if not found.
+     * @warning Currently only searches the miss queue. If non write allocate
+     * might need to search the write buffer for coherence.
+     */
+    MSHR* findMSHR(Addr addr, int asid) const;
+
+    /**
+     * Searches for the supplied address in the write buffer.
+     * @param addr The address to look for.
+     * @param asid The address space id.
+     * @param writes The list of writes that match the address.
+     * @return True if any writes are found
+     */
+    bool findWrites(Addr addr, int asid, std::vector<MSHR*>& writes) const;
+
+    /**
+     * Perform a writeback of dirty data to the given address.
+     * @param addr The address to write to.
+     * @param asid The address space id.
+     * @param xc The execution context of the address space.
+     * @param size The number of bytes to write.
+     * @param data The data to write, can be NULL.
+     * @param compressed True if the data is compressed.
+     */
+    void doWriteback(Addr addr, int asid,
+                     int size, uint8_t *data, bool compressed);
+
+    /**
+     * Perform the given writeback request.
+     * @param req The writeback request.
+     */
+    void doWriteback(Packet * &pkt);
+
+    /**
+     * Returns true if there are outstanding requests.
+     * @return True if there are outstanding requests.
+     */
+    bool havePending();
+
+    /**
+     * Add a target to the given MSHR. This assumes it is in the miss queue.
+     * @param mshr The mshr to add a target to.
+     * @param req The target to add.
+     */
+    void addTarget(MSHR *mshr, Packet * &pkt)
+    {
+        mq.allocateTarget(mshr, pkt);
+    }
+
+    /**
+     * Allocate a MSHR to hold a list of targets to a block involved in a copy.
+     * If the block is marked done then the MSHR already holds the data to
+     * fill the block. Otherwise the block needs to be fetched.
+     * @param addr The address to buffer.
+     * @param asid The address space ID.
+     * @return A pointer to the allocated MSHR.
+     */
+    MSHR* allocateTargetList(Addr addr, int asid);
+
+};
+
+#endif //__MISS_QUEUE_HH__
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
new file mode 100644
index 000000000..db2f40c56
--- /dev/null
+++ b/src/mem/cache/miss/mshr.cc
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Dave Greene
+ */
+
+/**
+ * @file
+ * Miss Status and Handling Register (MSHR) definitions.
+ */
+
+#include <assert.h>
+#include <string>
+#include <vector>
+
+#include "mem/cache/miss/mshr.hh"
+#include "sim/root.hh" // for curTick
+#include "sim/host.hh"
+#include "base/misc.hh"
+#include "mem/cache/cache.hh"
+
+using namespace std;
+
+MSHR::MSHR()
+{
+    inService = false;
+    ntargets = 0;
+    threadNum = -1;
+}
+
+void
+MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size,
+               Packet * &target)
+{
+    addr = _addr;
+    if (target)
+    {
+        //Have a request, just use it
+        pkt = new Packet(target->req, cmd, Packet::Broadcast, size);
+        pkt->time = curTick;
+        pkt->allocate();
+        pkt->senderState = (Packet::SenderState *)this;
+        allocateTarget(target);
+    }
+    else
+    {
+        //need a request first
+        Request * req = new Request();
+        req->setPhys(addr, size, 0);
+        //Thread context??
+        pkt = new Packet(req, cmd, Packet::Broadcast, size);
+        pkt->time = curTick;
+        pkt->allocate();
+        pkt->senderState = (Packet::SenderState *)this;
+    }
+}
+
+// Since we aren't sure if data is being used, don't copy here.
+/**
+ * @todo When we have a "global" data flag, might want to copy data here.
+ */
+void
+MSHR::allocateAsBuffer(Packet * &target)
+{
+    addr = target->getAddr();
+    asid = target->req->getAsid();
+    threadNum = target->req->getThreadNum();
+    pkt = new Packet(target->req, target->cmd, -1);
+    pkt->allocate();
+    pkt->senderState = (Packet::SenderState*)this;
+    pkt->time = curTick;
+}
+
+void
+MSHR::deallocate()
+{
+    assert(targets.empty());
+    assert(ntargets == 0);
+    pkt = NULL;
+    inService = false;
+    allocIter = NULL;
+    readyIter = NULL;
+}
+
+/*
+ * Adds a target to an MSHR
+ */
+void
+MSHR::allocateTarget(Packet * &target)
+{
+    //If we append an invalidate and we issued a read to the bus,
+    //but now have some pending writes, we need to move
+    //the invalidate to before the first non-read
+    if (inService && pkt->isRead() && target->isInvalidate()) {
+        std::list<Packet *> temp;
+
+        while (!targets.empty()) {
+            if (!targets.front()->isRead()) break;
+            //Place on top of temp stack
+            temp.push_front(targets.front());
+            //Remove from targets
+            targets.pop_front();
+        }
+
+        //Now that we have all the reads off until first non-read, we can
+        //place the invalidate on
+        targets.push_front(target);
+
+        //Now we pop off the temp_stack and put them back
+        while (!temp.empty()) {
+            targets.push_front(temp.front());
+            temp.pop_front();
+        }
+    }
+    else {
+        targets.push_back(target);
+    }
+
+    ++ntargets;
+    assert(targets.size() == ntargets);
+    /**
+     * @todo really prioritize the target commands.
+     */
+
+    if (!inService && target->isWrite()) {
+        pkt->cmd = Packet::WriteReq;
+    }
+}
+
+
+
+void
+MSHR::dump()
+{
+    ccprintf(cerr,
+             "inService: %d thread: %d\n"
+             "Addr: %x asid: %d ntargets %d\n"
+             "Targets:\n",
+             inService, threadNum, addr, asid, ntargets);
+
+    TargetListIterator tar_it = targets.begin();
+    for (int i = 0; i < ntargets; i++) {
+        assert(tar_it != targets.end());
+
+        ccprintf(cerr, "\t%d: Addr: %x cmd: %d\n",
+                 i, (*tar_it)->getAddr(), (*tar_it)->cmdToIndex());
+
+        tar_it++;
+    }
+    ccprintf(cerr, "\n");
+}
+
+MSHR::~MSHR()
+{
+    if (pkt)
+        pkt = NULL;
+}
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
new file mode 100644
index 000000000..167aa26cd
--- /dev/null
+++ b/src/mem/cache/miss/mshr.hh
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Miss Status and Handling Register (MSHR) declaration.
+ */
+
+#ifndef __MSHR_HH__
+#define __MSHR_HH__
+
+#include "mem/packet.hh"
+#include <list>
+#include <deque>
+
+class MSHR;
+
+/**
+ * Miss Status and handling Register. This class keeps all the information
+ * needed to handle a cache miss including a list of target requests.
+ */
+class MSHR {
+  public:
+    /** Defines the Data structure of the MSHR targetlist. */
+    typedef std::list<Packet *> TargetList;
+    /** Target list iterator. */
+    typedef std::list<Packet *>::iterator TargetListIterator;
+    /** A list of MSHRs. */
+    typedef std::list<MSHR *> List;
+    /** MSHR list iterator. */
+    typedef List::iterator Iterator;
+    /** MSHR list const_iterator. */
+    typedef List::const_iterator ConstIterator;
+
+    /** Address of the miss. */
+    Addr addr;
+    /** Adress space id of the miss. */
+    short asid;
+    /** True if the request has been sent to the bus. */
+    bool inService;
+    /** Thread number of the miss. */
+    int threadNum;
+    /** The request that is forwarded to the next level of the hierarchy. */
+    Packet * pkt;
+    /** The number of currently allocated targets. */
+    short ntargets;
+    /** The original requesting command. */
+    Packet::Command originalCmd;
+    /** Order number of assigned by the miss queue. */
+    uint64_t order;
+
+    /**
+     * Pointer to this MSHR on the ready list.
+     * @sa MissQueue, MSHRQueue::readyList
+     */
+    Iterator readyIter;
+    /**
+     * Pointer to this MSHR on the allocated list.
+     * @sa MissQueue, MSHRQueue::allocatedList
+     */
+    Iterator allocIter;
+
+private:
+    /** List of all requests that match the address */
+    TargetList targets;
+
+public:
+    /**
+     * Allocate a miss to this MSHR.
+     * @param cmd The requesting command.
+     * @param addr The address of the miss.
+     * @param asid The address space id of the miss.
+     * @param size The number of bytes to request.
+     * @param req  The original miss.
+     */
+    void allocate(Packet::Command cmd, Addr addr, int asid, int size,
+                  Packet * &pkt);
+
+    /**
+     * Allocate this MSHR as a buffer for the given request.
+     * @param target The memory request to buffer.
+     */
+    void allocateAsBuffer(Packet * &target);
+
+    /**
+     * Mark this MSHR as free.
+     */
+    void deallocate();
+
+    /**
+     * Add a request to the list of targets.
+     * @param target The target.
+     */
+    void allocateTarget(Packet * &target);
+
+    /** A simple constructor. */
+    MSHR();
+    /** A simple destructor. */
+    ~MSHR();
+
+    /**
+     * Returns the current number of allocated targets.
+     * @return The current number of allocated targets.
+     */
+    int getNumTargets()
+    {
+        return(ntargets);
+    }
+
+    /**
+     * Returns a pointer to the target list.
+     * @return a pointer to the target list.
+     */
+    TargetList* getTargetList()
+    {
+        return &targets;
+    }
+
+    /**
+     * Returns a reference to the first target.
+     * @return A pointer to the first target.
+     */
+    Packet * getTarget()
+    {
+        return targets.front();
+    }
+
+    /**
+     * Pop first target.
+     */
+    void popTarget()
+    {
+        --ntargets;
+        targets.pop_front();
+    }
+
+    /**
+     * Returns true if there are targets left.
+     * @return true if there are targets
+     */
+    bool hasTargets()
+    {
+        return !targets.empty();
+    }
+
+    /**
+     * Prints the contents of this MSHR to stderr.
+     */
+    void dump();
+};
+
+#endif //__MSHR_HH__
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
new file mode 100644
index 000000000..6516a99f8
--- /dev/null
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/** @file
+ * Definition of the MSHRQueue.
+ */
+
+#include "mem/cache/miss/mshr_queue.hh"
+#include "sim/eventq.hh"
+
+using namespace std;
+
+MSHRQueue::MSHRQueue(int num_mshrs, int reserve)
+    : numMSHRs(num_mshrs + reserve - 1), numReserve(reserve)
+{
+    allocated = 0;
+    inServiceMSHRs = 0;
+    allocatedTargets = 0;
+    registers = new MSHR[numMSHRs];
+    for (int i = 0; i < numMSHRs; ++i) {
+        freeList.push_back(&registers[i]);
+    }
+}
+
+MSHRQueue::~MSHRQueue()
+{
+    delete [] registers;
+}
+
+MSHR*
+MSHRQueue::findMatch(Addr addr, int asid) const
+{
+    MSHR::ConstIterator i = allocatedList.begin();
+    MSHR::ConstIterator end = allocatedList.end();
+    for (; i != end; ++i) {
+        MSHR *mshr = *i;
+        if (mshr->addr == addr) {
+            return mshr;
+        }
+    }
+    return NULL;
+}
+
+bool
+MSHRQueue::findMatches(Addr addr, int asid, vector<MSHR*>& matches) const
+{
+    // Need an empty vector
+    assert(matches.empty());
+    bool retval = false;
+    MSHR::ConstIterator i = allocatedList.begin();
+    MSHR::ConstIterator end = allocatedList.end();
+    for (; i != end; ++i) {
+        MSHR *mshr = *i;
+        if (mshr->addr == addr) {
+            retval = true;
+            matches.push_back(mshr);
+        }
+    }
+    return retval;
+
+}
+
+MSHR*
+MSHRQueue::findPending(Packet * &pkt) const
+{
+    MSHR::ConstIterator i = pendingList.begin();
+    MSHR::ConstIterator end = pendingList.end();
+    for (; i != end; ++i) {
+        MSHR *mshr = *i;
+        if (mshr->addr < pkt->getAddr()) {
+            if (mshr->addr + mshr->pkt->getSize() > pkt->getAddr()) {
+                return mshr;
+            }
+        } else {
+            if (pkt->getAddr() + pkt->getSize() > mshr->addr) {
+                return mshr;
+            }
+        }
+
+        //need to check destination address for copies.
+        //TEMP NOT DOING COPIES
+#if 0
+        if (mshr->pkt->cmd == Copy) {
+            Addr dest = mshr->pkt->dest;
+            if (dest < pkt->addr) {
+                if (dest + mshr->pkt->size > pkt->addr) {
+                    return mshr;
+                }
+            } else {
+                if (pkt->addr + pkt->size > dest) {
+                    return mshr;
+                }
+            }
+        }
+#endif
+    }
+    return NULL;
+}
+
+MSHR*
+MSHRQueue::allocate(Packet * &pkt, int size)
+{
+    Addr aligned_addr = pkt->getAddr() & ~((Addr)size - 1);
+    MSHR *mshr = freeList.front();
+    assert(mshr->getNumTargets() == 0);
+    freeList.pop_front();
+
+    if (!pkt->needsResponse()) {
+        mshr->allocateAsBuffer(pkt);
+    } else {
+        assert(size !=0);
+        mshr->allocate(pkt->cmd, aligned_addr, pkt->req->getAsid(), size, pkt);
+        allocatedTargets += 1;
+    }
+    mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
+    mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
+
+    allocated += 1;
+    return mshr;
+}
+
+MSHR*
+MSHRQueue::allocateFetch(Addr addr, int asid, int size, Packet * &target)
+{
+    MSHR *mshr = freeList.front();
+    assert(mshr->getNumTargets() == 0);
+    freeList.pop_front();
+    mshr->allocate(Packet::ReadReq, addr, asid, size, target);
+    mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
+    mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
+
+    allocated += 1;
+    return mshr;
+}
+
+MSHR*
+MSHRQueue::allocateTargetList(Addr addr, int asid, int size)
+{
+    MSHR *mshr = freeList.front();
+    assert(mshr->getNumTargets() == 0);
+    freeList.pop_front();
+    Packet * dummy;
+    mshr->allocate(Packet::ReadReq, addr, asid, size, dummy);
+    mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
+    mshr->inService = true;
+    ++inServiceMSHRs;
+    ++allocated;
+    return mshr;
+}
+
+
+void
+MSHRQueue::deallocate(MSHR* mshr)
+{
+    deallocateOne(mshr);
+}
+
+MSHR::Iterator
+MSHRQueue::deallocateOne(MSHR* mshr)
+{
+    MSHR::Iterator retval = allocatedList.erase(mshr->allocIter);
+    freeList.push_front(mshr);
+    allocated--;
+    allocatedTargets -= mshr->getNumTargets();
+    if (mshr->inService) {
+        inServiceMSHRs--;
+    } else {
+        pendingList.erase(mshr->readyIter);
+    }
+    mshr->deallocate();
+    return retval;
+}
+
+void
+MSHRQueue::moveToFront(MSHR *mshr)
+{
+    if (!mshr->inService) {
+        assert(mshr == *(mshr->readyIter));
+        pendingList.erase(mshr->readyIter);
+        mshr->readyIter = pendingList.insert(pendingList.begin(), mshr);
+    }
+}
+
+void
+MSHRQueue::markInService(MSHR* mshr)
+{
+    //assert(mshr == pendingList.front());
+    if (!mshr->pkt->needsResponse()) {
+        assert(mshr->getNumTargets() == 0);
+        deallocate(mshr);
+        return;
+    }
+    mshr->inService = true;
+    pendingList.erase(mshr->readyIter);
+    mshr->readyIter = NULL;
+    inServiceMSHRs += 1;
+    //pendingList.pop_front();
+}
+
+void
+MSHRQueue::markPending(MSHR* mshr, Packet::Command cmd)
+{
+    assert(mshr->readyIter == NULL);
+    mshr->pkt->cmd = cmd;
+    mshr->pkt->flags &= ~SATISFIED;
+    mshr->inService = false;
+    --inServiceMSHRs;
+    /**
+     * @ todo might want to add rerequests to front of pending list for
+     * performance.
+     */
+    mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
+}
+
+void
+MSHRQueue::squash(int threadNum)
+{
+    MSHR::Iterator i = allocatedList.begin();
+    MSHR::Iterator end = allocatedList.end();
+    for (; i != end;) {
+        MSHR *mshr = *i;
+        if (mshr->threadNum == threadNum) {
+            while (mshr->hasTargets()) {
+                Packet * target = mshr->getTarget();
+                mshr->popTarget();
+
+                assert(target->req->getThreadNum() == threadNum);
+                target = NULL;
+            }
+            assert(!mshr->hasTargets());
+            assert(mshr->ntargets==0);
+            if (!mshr->inService) {
+                i = deallocateOne(mshr);
+            } else {
+                //mshr->pkt->flags &= ~CACHE_LINE_FILL;
+                ++i;
+            }
+        } else {
+            ++i;
+        }
+    }
+}
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
new file mode 100644
index 000000000..a67f1b9a6
--- /dev/null
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/** @file
+ * Declaration of a structure to manage MSHRs.
+ */
+
+#ifndef __MSHR_QUEUE_HH__
+#define __MSHR_QUEUE_HH__
+
+#include <vector>
+#include "mem/cache/miss/mshr.hh"
+
+/**
+ * A Class for maintaining a list of pending and allocated memory requests.
+ */
+class MSHRQueue {
+  private:
+    /**  MSHR storage. */
+    MSHR* registers;
+    /** Holds pointers to all allocated MSHRs. */
+    MSHR::List allocatedList;
+    /** Holds pointers to MSHRs that haven't been sent to the bus. */
+    MSHR::List pendingList;
+    /** Holds non allocated MSHRs. */
+    MSHR::List freeList;
+
+    // Parameters
+    /**
+     * The total number of MSHRs in this queue. This number is set as the
+     * number of MSHRs requested plus (numReserve - 1). This allows for
+     * the same number of effective MSHRs while still maintaining the reserve.
+     */
+    const int numMSHRs;
+
+    /**
+     * The number of MSHRs to hold in reserve. This is needed because copy
+     * operations can allocate upto 4 MSHRs at one time.
+     */
+    const int numReserve;
+
+  public:
+    /** The number of allocated MSHRs. */
+    int allocated;
+    /** The number of MSHRs that have been forwarded to the bus. */
+    int inServiceMSHRs;
+    /** The number of targets waiting for response. */
+    int allocatedTargets;
+
+    /**
+     * Create a queue with a given number of MSHRs.
+     * @param num_mshrs The number of MSHRs in this queue.
+     * @param reserve The minimum number of MSHRs needed to satisfy any access.
+     */
+    MSHRQueue(int num_mshrs, int reserve = 1);
+
+    /** Destructor */
+    ~MSHRQueue();
+
+    /**
+     * Find the first MSHR that matches the provide address and asid.
+     * @param addr The address to find.
+     * @param asid The address space id.
+     * @return Pointer to the matching MSHR, null if not found.
+     */
+    MSHR* findMatch(Addr addr, int asid) const;
+
+    /**
+     * Find and return all the matching MSHRs in the provided vector.
+     * @param addr The address to find.
+     * @param asid The address space ID.
+     * @param matches The vector to return pointers to the matching MSHRs.
+     * @return True if any matches are found, false otherwise.
+     * @todo Typedef the vector??
+     */
+    bool findMatches(Addr addr, int asid, std::vector<MSHR*>& matches) const;
+
+    /**
+     * Find any pending requests that overlap the given request.
+     * @param req The request to find.
+     * @return A pointer to the earliest matching MSHR.
+     */
+    MSHR* findPending(Packet * &pkt) const;
+
+    /**
+     * Allocates a new MSHR for the request and size. This places the request
+     * as the first target in the MSHR.
+     * @param req The request to handle.
+     * @param size The number in bytes to fetch from memory.
+     * @return The a pointer to the MSHR allocated.
+     *
+     * @pre There are free MSHRs.
+     */
+    MSHR* allocate(Packet * &pkt, int size = 0);
+
+    /**
+     * Allocate a read request for the given address, and places the given
+     * target on the target list.
+     * @param addr The address to fetch.
+     * @param asid The address space for the fetch.
+     * @param size The number of bytes to request.
+     * @param target The first target for the request.
+     * @return Pointer to the new MSHR.
+     */
+    MSHR* allocateFetch(Addr addr, int asid, int size, Packet * &target);
+
+    /**
+     * Allocate a target list for the given address.
+     * @param addr The address to fetch.
+     * @param asid The address space for the fetch.
+     * @param size The number of bytes to request.
+     * @return Pointer to the new MSHR.
+     */
+    MSHR* allocateTargetList(Addr addr, int asid, int size);
+
+    /**
+     * Removes the given MSHR from the queue. This places the MSHR on the
+     * free list.
+     * @param mshr
+     */
+    void deallocate(MSHR* mshr);
+
+    /**
+     * Allocates a target to the given MSHR. Used to keep track of the number
+     * of outstanding targets.
+     * @param mshr The MSHR to allocate the target to.
+     * @param req The target request.
+     */
+    void allocateTarget(MSHR* mshr, Packet * &pkt)
+    {
+        mshr->allocateTarget(pkt);
+        allocatedTargets += 1;
+    }
+
+    /**
+     * Remove a MSHR from the queue. Returns an iterator into the allocatedList
+     * for faster squash implementation.
+     * @param mshr The MSHR to remove.
+     * @return An iterator to the next entry in the allocatedList.
+     */
+    MSHR::Iterator deallocateOne(MSHR* mshr);
+
+    /**
+     * Moves the MSHR to the front of the pending list if it is not in service.
+     * @param mshr The mshr to move.
+     */
+    void moveToFront(MSHR *mshr);
+
+    /**
+     * Mark the given MSHR as in service. This removes the MSHR from the
+     * pendingList. Deallocates the MSHR if it does not expect a response.
+     * @param mshr The MSHR to mark in service.
+     */
+    void markInService(MSHR* mshr);
+
+    /**
+     * Mark an in service mshr as pending, used to resend a request.
+     * @param mshr The MSHR to resend.
+     * @param cmd The command to resend.
+     */
+    void markPending(MSHR* mshr, Packet::Command cmd);
+
+    /**
+     * Squash outstanding requests with the given thread number. If a request
+     * is in service, just squashes the targets.
+     * @param req->getThreadNum()ber The thread to squash.
+     */
+    void squash(int threadNum);
+
+    /**
+     * Returns true if the pending list is not empty.
+     * @return True if there are outstanding requests.
+     */
+    bool havePending() const
+    {
+        return !pendingList.empty();
+    }
+
+    /**
+     * Returns true if there are no free MSHRs.
+     * @return True if this queue is full.
+     */
+    bool isFull() const
+    {
+        return (allocated > numMSHRs - numReserve);
+    }
+
+    /**
+     * Returns the request at the head of the pendingList.
+     * @return The next request to service.
+     */
+    Packet * getReq() const
+    {
+        if (pendingList.empty()) {
+            return NULL;
+        }
+        MSHR* mshr = pendingList.front();
+        return mshr->pkt;
+    }
+
+    /**
+     * Returns the number of outstanding targets.
+     * @return the number of allocated targets.
+     */
+    int getAllocatedTargets() const
+    {
+        return allocatedTargets;
+    }
+
+};
+
+#endif //__MSHR_QUEUE_HH__
diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc
new file mode 100644
index 000000000..897551989
--- /dev/null
+++ b/src/mem/cache/prefetch/base_prefetcher.cc
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ron Dreslinski
+ */
+
+/**
+ * @file
+ * Hardware Prefetcher Definition.
+ */
+
+#include "base/trace.hh"
+#include "mem/cache/base_cache.hh"
+#include "mem/cache/prefetch/base_prefetcher.hh"
+#include "mem/request.hh"
+#include <list>
+
+BasePrefetcher::BasePrefetcher(int size, bool pageStop, bool serialSquash,
+                               bool cacheCheckPush, bool onlyData)
+    :size(size), pageStop(pageStop), serialSquash(serialSquash),
+     cacheCheckPush(cacheCheckPush), only_data(onlyData)
+{
+}
+
+void
+BasePrefetcher::setCache(BaseCache *_cache)
+{
+    cache = _cache;
+    blkSize = cache->getBlockSize();
+}
+
+void
+BasePrefetcher::regStats(const std::string &name)
+{
+    pfIdentified
+        .name(name + ".prefetcher.num_hwpf_identified")
+        .desc("number of hwpf identified")
+        ;
+
+    pfMSHRHit
+        .name(name + ".prefetcher.num_hwpf_already_in_mshr")
+        .desc("number of hwpf that were already in mshr")
+        ;
+
+    pfCacheHit
+        .name(name + ".prefetcher.num_hwpf_already_in_cache")
+        .desc("number of hwpf that were already in the cache")
+        ;
+
+    pfBufferHit
+        .name(name + ".prefetcher.num_hwpf_already_in_prefetcher")
+        .desc("number of hwpf that were already in the prefetch queue")
+        ;
+
+    pfRemovedFull
+        .name(name + ".prefetcher.num_hwpf_evicted")
+        .desc("number of hwpf removed due to no buffer left")
+        ;
+
+    pfRemovedMSHR
+        .name(name + ".prefetcher.num_hwpf_removed_MSHR_hit")
+        .desc("number of hwpf removed because MSHR allocated")
+        ;
+
+    pfIssued
+        .name(name + ".prefetcher.num_hwpf_issued")
+        .desc("number of hwpf issued")
+        ;
+
+    pfSpanPage
+        .name(name + ".prefetcher.num_hwpf_span_page")
+        .desc("number of hwpf spanning a virtual page")
+        ;
+
+    pfSquashed
+        .name(name + ".prefetcher.num_hwpf_squashed_from_miss")
+        .desc("number of hwpf that got squashed due to a miss aborting calculation time")
+        ;
+}
+
+Packet *
+BasePrefetcher::getPacket()
+{
+    DPRINTF(HWPrefetch, "%s:Requesting a hw_pf to issue\n", cache->name());
+
+    if (pf.empty()) {
+        DPRINTF(HWPrefetch, "%s:No HW_PF found\n", cache->name());
+        return NULL;
+    }
+
+    Packet * pkt;
+    bool keepTrying = false;
+    do {
+        pkt = *pf.begin();
+        pf.pop_front();
+        if (!cacheCheckPush) {
+            keepTrying = inCache(pkt);
+        }
+        if (pf.empty()) {
+            cache->clearMasterRequest(Request_PF);
+            if (keepTrying) return NULL; //None left, all were in cache
+        }
+    } while (keepTrying);
+
+    pfIssued++;
+    return pkt;
+}
+
+void
+BasePrefetcher::handleMiss(Packet * &pkt, Tick time)
+{
+    if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && only_data))
+    {
+        //Calculate the blk address
+        Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);
+
+        //Check if miss is in pfq, if so remove it
+        std::list<Packet *>::iterator iter = inPrefetch(blkAddr);
+        if (iter != pf.end()) {
+            DPRINTF(HWPrefetch, "%s:Saw a miss to a queued prefetch, removing it\n", cache->name());
+            pfRemovedMSHR++;
+            pf.erase(iter);
+            if (pf.empty())
+                cache->clearMasterRequest(Request_PF);
+        }
+
+        //Remove anything in queue with delay older than time
+        //since everything is inserted in time order, start from end
+        //and work until pf.empty() or time is earlier
+        //This is done to emulate Aborting the previous work on a new miss
+        //Needed for serial calculators like GHB
+        if (serialSquash) {
+            iter = pf.end();
+            iter--;
+            while (!pf.empty() && ((*iter)->time >= time)) {
+                pfSquashed++;
+                pf.pop_back();
+                iter--;
+            }
+            if (pf.empty())
+                cache->clearMasterRequest(Request_PF);
+        }
+
+
+        std::list<Addr> addresses;
+        std::list<Tick> delays;
+        calculatePrefetch(pkt, addresses, delays);
+
+        std::list<Addr>::iterator addr = addresses.begin();
+        std::list<Tick>::iterator delay = delays.begin();
+        while (addr != addresses.end())
+        {
+            DPRINTF(HWPrefetch, "%s:Found a pf canidate, inserting into prefetch queue\n", cache->name());
+            //temp calc this here...
+            pfIdentified++;
+            //create a prefetch memreq
+            Request * prefetchReq = new Request(*addr, blkSize, 0);
+            Packet * prefetch;
+            prefetch = new Packet(prefetchReq, Packet::HardPFReq, -1);
+            prefetch->allocate();
+            prefetch->req->setThreadContext(pkt->req->getCpuNum(),
+                                            pkt->req->getThreadNum());
+
+            prefetch->time = time + (*delay); //@todo ADD LATENCY HERE
+            //... initialize
+
+            //Check if it is already in the cache
+            if (cacheCheckPush) {
+                if (inCache(prefetch)) {
+                    addr++;
+                    delay++;
+                    continue;
+                }
+            }
+
+            //Check if it is already in the miss_queue
+            if (inMissQueue(prefetch->getAddr(), prefetch->req->getAsid())) {
+                addr++;
+                delay++;
+                continue;
+            }
+
+            //Check if it is already in the pf buffer
+            if (inPrefetch(prefetch->getAddr()) != pf.end()) {
+                pfBufferHit++;
+                addr++;
+                delay++;
+                continue;
+            }
+
+            //We just remove the head if we are full
+            if (pf.size() == size)
+            {
+                DPRINTF(HWPrefetch, "%s:Inserting into prefetch queue, it was full removing oldest\n", cache->name());
+                pfRemovedFull++;
+                pf.pop_front();
+            }
+
+            pf.push_back(prefetch);
+            prefetch->flags |= CACHE_LINE_FILL;
+
+            //Make sure to request the bus, with proper delay
+            cache->setMasterRequest(Request_PF, prefetch->time);
+
+            //Increment through the list
+            addr++;
+            delay++;
+        }
+    }
+}
+
+std::list<Packet *>::iterator
+BasePrefetcher::inPrefetch(Addr address)
+{
+    //Guaranteed to only be one match, we always check before inserting
+    std::list<Packet *>::iterator iter;
+    for (iter=pf.begin(); iter != pf.end(); iter++) {
+        if (((*iter)->getAddr() & ~(Addr)(blkSize-1)) == address) {
+            return iter;
+        }
+    }
+    return pf.end();
+}
+
+
diff --git a/src/mem/cache/prefetch/base_prefetcher.hh b/src/mem/cache/prefetch/base_prefetcher.hh
new file mode 100644
index 000000000..3e4fc89d1
--- /dev/null
+++ b/src/mem/cache/prefetch/base_prefetcher.hh
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ron Dreslinski
+ */
+
+/**
+ * @file
+ * Miss and writeback queue declarations.
+ */
+
+#ifndef __MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__
+#define __MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__
+
+#include "mem/packet.hh"
+#include <list>
+
+class BaseCache;
+class BasePrefetcher
+{
+  protected:
+
+    /** The Prefetch Queue. */
+    std::list<Packet *> pf;
+
+    // PARAMETERS
+
+    /** The number of MSHRs in the Prefetch Queue. */
+    const int size;
+
+    /** Pointr to the parent cache. */
+    BaseCache* cache;
+
+    /** The block size of the parent cache. */
+    int blkSize;
+
+    /** Do we prefetch across page boundaries. */
+    bool pageStop;
+
+    /** Do we remove prefetches with later times than a new miss.*/
+    bool serialSquash;
+
+    /** Do we check if it is in the cache when inserting into buffer,
+        or removing.*/
+    bool cacheCheckPush;
+
+    /** Do we prefetch on only data reads, or on inst reads as well. */
+    bool only_data;
+
+  public:
+
+    Stats::Scalar<> pfIdentified;
+    Stats::Scalar<> pfMSHRHit;
+    Stats::Scalar<> pfCacheHit;
+    Stats::Scalar<> pfBufferHit;
+    Stats::Scalar<> pfRemovedFull;
+    Stats::Scalar<> pfRemovedMSHR;
+    Stats::Scalar<> pfIssued;
+    Stats::Scalar<> pfSpanPage;
+    Stats::Scalar<> pfSquashed;
+
+    void regStats(const std::string &name);
+
+  public:
+    BasePrefetcher(int numMSHRS, bool pageStop, bool serialSquash,
+                   bool cacheCheckPush, bool onlyData);
+
+    virtual ~BasePrefetcher() {}
+
+    void setCache(BaseCache *_cache);
+
+    void handleMiss(Packet * &pkt, Tick time);
+
+    Packet * getPacket();
+
+    bool havePending()
+    {
+        return !pf.empty();
+    }
+
+    virtual void calculatePrefetch(Packet * &pkt,
+                                   std::list<Addr> &addresses,
+                                   std::list<Tick> &delays) = 0;
+
+    virtual bool inCache(Packet * &pkt) = 0;
+
+    virtual bool inMissQueue(Addr address, int asid) = 0;
+
+    std::list<Packet *>::iterator inPrefetch(Addr address);
+};
+
+
+#endif //__MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__
diff --git a/src/mem/cache/prefetch/ghb_prefetcher.cc b/src/mem/cache/prefetch/ghb_prefetcher.cc
new file mode 100644
index 000000000..247ec6e8b
--- /dev/null
+++ b/src/mem/cache/prefetch/ghb_prefetcher.cc
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ron Dreslinski
+ *          Steve Reinhardt
+ */
+
+/**
+ * @file
+ * GHB Prefetcher template instantiations.
+ */
+
+#include "mem/cache/tags/cache_tags.hh"
+
+#include "mem/cache/tags/lru.hh"
+
+#include "base/compression/null_compression.hh"
+
+#include "mem/cache/miss/miss_queue.hh"
+#include "mem/cache/miss/blocking_buffer.hh"
+
+#include "mem/cache/prefetch/ghb_prefetcher.hh"
+
+// Template Instantiations
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+template class GHBPrefetcher<CacheTags<LRU,NullCompression>, MissQueue>;
+template class GHBPrefetcher<CacheTags<LRU,NullCompression>, BlockingBuffer>;
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/prefetch/ghb_prefetcher.hh b/src/mem/cache/prefetch/ghb_prefetcher.hh
new file mode 100644
index 000000000..c22b763d1
--- /dev/null
+++ b/src/mem/cache/prefetch/ghb_prefetcher.hh
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ron Dreslinski
+ */
+
+/**
+ * @file
+ * Describes a ghb prefetcher based on template policies.
+ */
+
+#ifndef __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__
+#define __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__
+
+#include "base/misc.hh" // fatal, panic, and warn
+
+#include "mem/cache/prefetch/prefetcher.hh"
+
+/**
+ * A template-policy based cache. The behavior of the cache can be altered by
+ * supplying different template policies. TagStore handles all tag and data
+ * storage @sa TagStore. Buffering handles all misses and writes/writebacks
+ * @sa MissQueue. Coherence handles all coherence policy details @sa
+ * UniCoherence, SimpleMultiCoherence.
+ */
+template <class TagStore, class Buffering>
+class GHBPrefetcher : public Prefetcher<TagStore, Buffering>
+{
+  protected:
+
+    Buffering* mq;
+    TagStore* tags;
+
+    Addr second_last_miss_addr[64/*MAX_CPUS*/];
+    Addr last_miss_addr[64/*MAX_CPUS*/];
+
+    Tick latency;
+    int degree;
+    bool useCPUId;
+
+  public:
+
+    GHBPrefetcher(int size, bool pageStop, bool serialSquash,
+                  bool cacheCheckPush, bool onlyData,
+                  Tick latency, int degree, bool useCPUId)
+        :Prefetcher<TagStore, Buffering>(size, pageStop, serialSquash,
+                                         cacheCheckPush, onlyData),
+         latency(latency), degree(degree), useCPUId(useCPUId)
+    {
+    }
+
+    ~GHBPrefetcher() {}
+
+    void calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses,
+                           std::list<Tick> &delays)
+    {
+        Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1);
+        int cpuID = pkt->req->getCpuNum();
+        if (!useCPUId) cpuID = 0;
+
+
+        int new_stride = blkAddr - last_miss_addr[cpuID];
+        int old_stride = last_miss_addr[cpuID] -
+                         second_last_miss_addr[cpuID];
+
+        second_last_miss_addr[cpuID] = last_miss_addr[cpuID];
+        last_miss_addr[cpuID] = blkAddr;
+
+        if (new_stride == old_stride) {
+            for (int d=1; d <= degree; d++) {
+                Addr newAddr = blkAddr + d * new_stride;
+                if (this->pageStop &&
+                    (blkAddr & ~(TheISA::VMPageSize - 1)) !=
+                    (newAddr & ~(TheISA::VMPageSize - 1)))
+                {
+                    //Spanned the page, so now stop
+                    this->pfSpanPage += degree - d + 1;
+                    return;
+                }
+                else
+                {
+                    addresses.push_back(newAddr);
+                    delays.push_back(latency);
+                }
+            }
+        }
+    }
+};
+
+#endif // __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__
diff --git a/src/mem/cache/prefetch/stride_prefetcher.cc b/src/mem/cache/prefetch/stride_prefetcher.cc
new file mode 100644
index 000000000..93a096468
--- /dev/null
+++ b/src/mem/cache/prefetch/stride_prefetcher.cc
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ron Dreslinski
+ *          Steve Reinhardt
+ */
+
+/**
+ * @file
+ * Stride Prefetcher template instantiations.
+ */
+
+#include "mem/cache/tags/cache_tags.hh"
+
+#include "mem/cache/tags/lru.hh"
+
+#include "base/compression/null_compression.hh"
+
+#include "mem/cache/miss/miss_queue.hh"
+#include "mem/cache/miss/blocking_buffer.hh"
+
+#include "mem/cache/prefetch/stride_prefetcher.hh"
+
+// Template Instantiations
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+template class StridePrefetcher<CacheTags<LRU,NullCompression>, MissQueue>;
+template class StridePrefetcher<CacheTags<LRU,NullCompression>, BlockingBuffer>;
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/prefetch/stride_prefetcher.hh b/src/mem/cache/prefetch/stride_prefetcher.hh
new file mode 100644
index 000000000..4a8ee7de4
--- /dev/null
+++ b/src/mem/cache/prefetch/stride_prefetcher.hh
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ron Dreslinski
+ */
+
+/**
+ * @file
+ * Describes a strided prefetcher based on template policies.
+ */
+
+#ifndef __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__
+#define __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__
+
+#include "base/misc.hh" // fatal, panic, and warn
+
+#include "mem/cache/prefetch/prefetcher.hh"
+
+/**
+ * A template-policy based cache. The behavior of the cache can be altered by
+ * supplying different template policies. TagStore handles all tag and data
+ * storage @sa TagStore. Buffering handles all misses and writes/writebacks
+ * @sa MissQueue. Coherence handles all coherence policy details @sa
+ * UniCoherence, SimpleMultiCoherence.
+ */
+template <class TagStore, class Buffering>
+class StridePrefetcher : public Prefetcher<TagStore, Buffering>
+{
+  protected:
+
+    Buffering* mq;
+    TagStore* tags;
+
+    class strideEntry
+    {
+      public:
+        Addr IAddr;
+        Addr MAddr;
+        int stride;
+        int64_t confidence;
+
+/*	bool operator < (strideEntry a,strideEntry b)
+        {
+            if (a.confidence == b.confidence) {
+                return true; //??????
+            }
+            else return a.confidence < b.confidence;
+            }*/
+    };
+    Addr* lastMissAddr[64/*MAX_CPUS*/];
+
+    std::list<strideEntry*> table[64/*MAX_CPUS*/];
+    Tick latency;
+    int degree;
+    bool useCPUId;
+
+
+  public:
+
+    StridePrefetcher(int size, bool pageStop, bool serialSquash,
+                     bool cacheCheckPush, bool onlyData,
+                     Tick latency, int degree, bool useCPUId)
+        :Prefetcher<TagStore, Buffering>(size, pageStop, serialSquash,
+                                         cacheCheckPush, onlyData),
+         latency(latency), degree(degree), useCPUId(useCPUId)
+    {
+    }
+
+    ~StridePrefetcher() {}
+
+    void calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses,
+                           std::list<Tick> &delays)
+    {
+//	Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1);
+        int cpuID = pkt->req->getCpuNum();
+        if (!useCPUId) cpuID = 0;
+
+        /* Scan Table for IAddr Match */
+/*	std::list<strideEntry*>::iterator iter;
+        for (iter=table[cpuID].begin();
+             iter !=table[cpuID].end();
+             iter++) {
+            if ((*iter)->IAddr == pkt->pc) break;
+        }
+
+        if (iter != table[cpuID].end()) {
+            //Hit in table
+
+            int newStride = blkAddr - (*iter)->MAddr;
+            if (newStride == (*iter)->stride) {
+                (*iter)->confidence++;
+            }
+            else {
+                (*iter)->stride = newStride;
+                (*iter)->confidence--;
+            }
+
+            (*iter)->MAddr = blkAddr;
+
+            for (int d=1; d <= degree; d++) {
+                Addr newAddr = blkAddr + d * newStride;
+                if (this->pageStop &&
+                    (blkAddr & ~(TheISA::VMPageSize - 1)) !=
+                    (newAddr & ~(TheISA::VMPageSize - 1)))
+                {
+                    //Spanned the page, so now stop
+                    this->pfSpanPage += degree - d + 1;
+                    return;
+                }
+                else
+                {
+                    addresses.push_back(newAddr);
+                    delays.push_back(latency);
+                }
+            }
+        }
+        else {
+            //Miss in table
+            //Find lowest confidence and replace
+
+        }
+*/    }
+};
+
+#endif // __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__
diff --git a/src/mem/cache/prefetch/tagged_prefetcher.hh b/src/mem/cache/prefetch/tagged_prefetcher.hh
new file mode 100644
index 000000000..17f500dd8
--- /dev/null
+++ b/src/mem/cache/prefetch/tagged_prefetcher.hh
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ron Dreslinski
+ */
+
+/**
+ * @file
+ * Describes a tagged prefetcher based on template policies.
+ */
+
+#ifndef __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__
+#define __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__
+
+#include "mem/cache/prefetch/prefetcher.hh"
+
+/**
+ * A template-policy based cache. The behavior of the cache can be altered by
+ * supplying different template policies. TagStore handles all tag and data
+ * storage @sa TagStore. Buffering handles all misses and writes/writebacks
+ * @sa MissQueue. Coherence handles all coherence policy details @sa
+ * UniCoherence, SimpleMultiCoherence.
+ */
+template <class TagStore, class Buffering>
+class TaggedPrefetcher : public Prefetcher<TagStore, Buffering>
+{
+  protected:
+
+    Buffering* mq;
+    TagStore* tags;
+
+    Tick latency;
+    int degree;
+
+  public:
+
+    TaggedPrefetcher(int size, bool pageStop, bool serialSquash,
+                     bool cacheCheckPush, bool onlyData,
+                     Tick latency, int degree);
+
+    ~TaggedPrefetcher() {}
+
+    void calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses,
+                           std::list<Tick> &delays);
+};
+
+#endif // __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__
diff --git a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh
index 7bdabbe14..db5c94820 100644
--- a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh
+++ b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh
@@ -49,10 +49,10 @@ TaggedPrefetcher(int size, bool pageStop, bool serialSquash,
 template <class TagStore, class Buffering>
 void
 TaggedPrefetcher<TagStore, Buffering>::
-calculatePrefetch(MemReqPtr &req, std::list<Addr> &addresses,
+calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses,
                   std::list<Tick> &delays)
 {
-    Addr blkAddr = req->paddr & ~(Addr)(this->blkSize-1);
+    Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1);
 
     for (int d=1; d <= degree; d++) {
         Addr newAddr = blkAddr + d*(this->blkSize);
diff --git a/src/mem/cache/tags/base_tags.cc b/src/mem/cache/tags/base_tags.cc
new file mode 100644
index 000000000..153737300
--- /dev/null
+++ b/src/mem/cache/tags/base_tags.cc
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Ron Dreslinski
+ */
+
+/**
+ * @file
+ * Definitions of BaseTags.
+ */
+
+#include "mem/cache/tags/base_tags.hh"
+
+#include "mem/cache/base_cache.hh"
+#include "cpu/smt.hh" //maxThreadsPerCPU
+#include "sim/sim_exit.hh"
+
+using namespace std;
+
+void
+BaseTags::setCache(BaseCache *_cache)
+{
+    cache = _cache;
+    objName = cache->name();
+}
+
+void
+BaseTags::regStats(const string &name)
+{
+    using namespace Stats;
+    replacements
+        .init(maxThreadsPerCPU)
+        .name(name + ".replacements")
+        .desc("number of replacements")
+        .flags(total)
+        ;
+
+    tagsInUse
+        .name(name + ".tagsinuse")
+        .desc("Cycle average of tags in use")
+        ;
+
+    totalRefs
+        .name(name + ".total_refs")
+        .desc("Total number of references to valid blocks.")
+        ;
+
+    sampledRefs
+        .name(name + ".sampled_refs")
+        .desc("Sample count of references to valid blocks.")
+        ;
+
+    avgRefs
+        .name(name + ".avg_refs")
+        .desc("Average number of references to valid blocks.")
+        ;
+
+    avgRefs = totalRefs/sampledRefs;
+
+    warmupCycle
+        .name(name + ".warmup_cycle")
+        .desc("Cycle when the warmup percentage was hit.")
+        ;
+
+    registerExitCallback(new BaseTagsCallback(this));
+}
diff --git a/src/mem/cache/tags/base_tags.hh b/src/mem/cache/tags/base_tags.hh
new file mode 100644
index 000000000..b7b0c7ef0
--- /dev/null
+++ b/src/mem/cache/tags/base_tags.hh
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Ron Dreslinski
+ */
+
+/**
+ * @file
+ * Declaration of a common base class for cache tagstore objects.
+ */
+
+#ifndef __BASE_TAGS_HH__
+#define __BASE_TAGS_HH__
+
+#include <string>
+#include "base/statistics.hh"
+#include "base/callback.hh"
+
+class BaseCache;
+
+/**
+ * A common base class of Cache tagstore objects.
+ */
+class BaseTags
+{
+  protected:
+    /** Pointer to the parent cache. */
+    BaseCache *cache;
+
+    /** Local copy of the parent cache name. Used for DPRINTF. */
+    std::string objName;
+
+    /**
+     * The number of tags that need to be touched to meet the warmup
+     * percentage.
+     */
+    int warmupBound;
+    /** Marked true when the cache is warmed up. */
+    bool warmedUp;
+
+    // Statistics
+    /**
+     * @addtogroup CacheStatistics
+     * @{
+     */
+
+    /** Number of replacements of valid blocks per thread. */
+    Stats::Vector<> replacements;
+    /** Per cycle average of the number of tags that hold valid data. */
+    Stats::Average<> tagsInUse;
+
+    /** The total number of references to a block before it is replaced. */
+    Stats::Scalar<> totalRefs;
+
+    /**
+     * The number of reference counts sampled. This is different from
+     * replacements because we sample all the valid blocks when the simulator
+     * exits.
+     */
+    Stats::Scalar<> sampledRefs;
+
+    /**
+     * Average number of references to a block before is was replaced.
+     * @todo This should change to an average stat once we have them.
+     */
+    Stats::Formula avgRefs;
+
+    /** The cycle that the warmup percentage was hit. */
+    Stats::Scalar<> warmupCycle;
+    /**
+     * @}
+     */
+
+  public:
+
+    /**
+     * Destructor.
+     */
+    virtual ~BaseTags() {}
+
+    /**
+     * Set the parent cache back pointer. Also copies the cache name to
+     * objName.
+     * @param _cache Pointer to parent cache.
+     */
+    void setCache(BaseCache *_cache);
+
+    /**
+     * Return the parent cache name.
+     * @return the parent cache name.
+     */
+    const std::string &name() const
+    {
+        return objName;
+    }
+
+    /**
+     * Register local statistics.
+     * @param name The name to preceed each statistic name.
+     */
+    void regStats(const std::string &name);
+
+    /**
+     * Average in the reference count for valid blocks when the simulation
+     * exits.
+     */
+    virtual void cleanupRefs() {}
+};
+
+class BaseTagsCallback : public Callback
+{
+    BaseTags *tags;
+  public:
+    BaseTagsCallback(BaseTags *t) : tags(t) {}
+    virtual void process() { tags->cleanupRefs(); };
+};
+
+#endif //__BASE_TAGS_HH__
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
new file mode 100644
index 000000000..82d2c410d
--- /dev/null
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Definitions a fully associative LRU tagstore.
+ */
+
+#include <sstream>
+
+#include <assert.h>
+
+#include "mem/cache/tags/fa_lru.hh"
+#include "base/intmath.hh"
+#include "base/misc.hh"
+
+using namespace std;
+
+FALRU::FALRU(int _blkSize, int _size, int hit_latency)
+    : blkSize(_blkSize), size(_size),
+      numBlks(size/blkSize), hitLatency(hit_latency)
+{
+    if (!isPowerOf2(blkSize))
+        fatal("cache block size (in bytes) `%d' must be a power of two",
+              blkSize);
+    if (!(hitLatency > 0))
+        fatal("Access latency in cycles must be at least one cycle");
+    if (!isPowerOf2(size))
+        fatal("Cache Size must be power of 2 for now");
+
+    // Track all cache sizes from 128K up by powers of 2
+    numCaches = floorLog2(size) - 17;
+    if (numCaches >0){
+        cacheBoundaries = new FALRUBlk *[numCaches];
+        cacheMask = (1 << numCaches) - 1;
+    } else {
+        cacheMask = 0;
+    }
+
+    warmedUp = false;
+    warmupBound = size/blkSize;
+
+    blks = new FALRUBlk[numBlks];
+    head = &(blks[0]);
+    tail = &(blks[numBlks-1]);
+
+    head->prev = NULL;
+    head->next = &(blks[1]);
+    head->inCache = cacheMask;
+
+    tail->prev = &(blks[numBlks-2]);
+    tail->next = NULL;
+    tail->inCache = 0;
+
+    int index = (1 << 17) / blkSize;
+    int j = 0;
+    int flags = cacheMask;
+    for (int i = 1; i < numBlks-1; i++) {
+        blks[i].inCache = flags;
+        if (i == index - 1){
+            cacheBoundaries[j] = &(blks[i]);
+            flags &= ~ (1<<j);
+            ++j;
+            index = index << 1;
+        }
+        blks[i].prev = &(blks[i-1]);
+        blks[i].next = &(blks[i+1]);
+        blks[i].isTouched = false;
+    }
+    assert(j == numCaches);
+    assert(index == numBlks);
+    //assert(check());
+}
+
+void
+FALRU::regStats(const string &name)
+{
+    using namespace Stats;
+    BaseTags::regStats(name);
+    hits
+        .init(numCaches+1)
+        .name(name + ".falru_hits")
+        .desc("The number of hits in each cache size.")
+        ;
+    misses
+        .init(numCaches+1)
+        .name(name + ".falru_misses")
+        .desc("The number of misses in each cache size.")
+        ;
+    accesses
+        .name(name + ".falru_accesses")
+        .desc("The number of accesses to the FA LRU cache.")
+        ;
+
+    for (int i = 0; i < numCaches+1; ++i) {
+        stringstream size_str;
+        if (i < 3){
+            size_str << (1<<(i+7)) <<"K";
+        } else {
+            size_str << (1<<(i-3)) <<"M";
+        }
+
+        hits.subname(i, size_str.str());
+        hits.subdesc(i, "Hits in a " + size_str.str() +" cache");
+        misses.subname(i, size_str.str());
+        misses.subdesc(i, "Misses in a " + size_str.str() +" cache");
+    }
+}
+
+FALRUBlk *
+FALRU::hashLookup(Addr addr) const
+{
+    tagIterator iter = tagHash.find(addr);
+    if (iter != tagHash.end()) {
+        return (*iter).second;
+    }
+    return NULL;
+}
+
+bool
+FALRU::probe(int asid, Addr addr) const
+{
+    Addr blkAddr = blkAlign(addr);
+    FALRUBlk* blk = hashLookup(blkAddr);
+    return blk && blk->tag == blkAddr && blk->isValid();
+}
+
+void
+FALRU::invalidateBlk(int asid, Addr addr)
+{
+    Addr blkAddr = blkAlign(addr);
+    FALRUBlk* blk = (*tagHash.find(blkAddr)).second;
+    if (blk) {
+        assert(blk->tag == blkAddr);
+        blk->status = 0;
+        blk->isTouched = false;
+        tagsInUse--;
+    }
+}
+
+FALRUBlk*
+FALRU::findBlock(Addr addr, int asid, int &lat, int *inCache)
+{
+    accesses++;
+    int tmp_in_cache = 0;
+    Addr blkAddr = blkAlign(addr);
+    FALRUBlk* blk = hashLookup(blkAddr);
+
+    if (blk && blk->isValid()) {
+        assert(blk->tag == blkAddr);
+        tmp_in_cache = blk->inCache;
+        for (int i = 0; i < numCaches; i++) {
+            if (1<<i & blk->inCache) {
+                hits[i]++;
+            } else {
+                misses[i]++;
+            }
+        }
+        hits[numCaches]++;
+        if (blk != head){
+            moveToHead(blk);
+        }
+    } else {
+        blk = NULL;
+        for (int i = 0; i < numCaches+1; ++i) {
+            misses[i]++;
+        }
+    }
+    if (inCache) {
+        *inCache = tmp_in_cache;
+    }
+
+    lat = hitLatency;
+    //assert(check());
+    return blk;
+}
+
+FALRUBlk*
+FALRU::findBlock(Packet * &pkt, int &lat, int *inCache)
+{
+    Addr addr = pkt->getAddr();
+
+    accesses++;
+    int tmp_in_cache = 0;
+    Addr blkAddr = blkAlign(addr);
+    FALRUBlk* blk = hashLookup(blkAddr);
+
+    if (blk && blk->isValid()) {
+        assert(blk->tag == blkAddr);
+        tmp_in_cache = blk->inCache;
+        for (int i = 0; i < numCaches; i++) {
+            if (1<<i & blk->inCache) {
+                hits[i]++;
+            } else {
+                misses[i]++;
+            }
+        }
+        hits[numCaches]++;
+        if (blk != head){
+            moveToHead(blk);
+        }
+    } else {
+        blk = NULL;
+        for (int i = 0; i < numCaches+1; ++i) {
+            misses[i]++;
+        }
+    }
+    if (inCache) {
+        *inCache = tmp_in_cache;
+    }
+
+    lat = hitLatency;
+    //assert(check());
+    return blk;
+}
+
+FALRUBlk*
+FALRU::findBlock(Addr addr, int asid) const
+{
+    Addr blkAddr = blkAlign(addr);
+    FALRUBlk* blk = hashLookup(blkAddr);
+
+    if (blk && blk->isValid()) {
+        assert(blk->tag == blkAddr);
+    } else {
+        blk = NULL;
+    }
+    return blk;
+}
+
+FALRUBlk*
+FALRU::findReplacement(Packet * &pkt, PacketList &writebacks,
+                       BlkList &compress_blocks)
+{
+    FALRUBlk * blk = tail;
+    assert(blk->inCache == 0);
+    moveToHead(blk);
+    tagHash.erase(blk->tag);
+    tagHash[blkAlign(pkt->getAddr())] = blk;
+    if (blk->isValid()) {
+        replacements[0]++;
+    } else {
+        tagsInUse++;
+        blk->isTouched = true;
+        if (!warmedUp && tagsInUse.value() >= warmupBound) {
+            warmedUp = true;
+            warmupCycle = curTick;
+        }
+    }
+    //assert(check());
+    return blk;
+}
+
+void
+FALRU::moveToHead(FALRUBlk *blk)
+{
+    int updateMask = blk->inCache ^ cacheMask;
+    for (int i = 0; i < numCaches; i++){
+        if ((1<<i) & updateMask) {
+            cacheBoundaries[i]->inCache &= ~(1<<i);
+            cacheBoundaries[i] = cacheBoundaries[i]->prev;
+        } else if (cacheBoundaries[i] == blk) {
+            cacheBoundaries[i] = blk->prev;
+        }
+    }
+    blk->inCache = cacheMask;
+    if (blk != head) {
+        if (blk == tail){
+            assert(blk->next == NULL);
+            tail = blk->prev;
+            tail->next = NULL;
+        } else {
+            blk->prev->next = blk->next;
+            blk->next->prev = blk->prev;
+        }
+        blk->next = head;
+        blk->prev = NULL;
+        head->prev = blk;
+        head = blk;
+    }
+}
+
+bool
+FALRU::check()
+{
+    FALRUBlk* blk = head;
+    int size = 0;
+    int boundary = 1<<17;
+    int j = 0;
+    int flags = cacheMask;
+    while (blk) {
+        size += blkSize;
+        if (blk->inCache != flags) {
+            return false;
+        }
+        if (size == boundary && blk != tail) {
+            if (cacheBoundaries[j] != blk) {
+                return false;
+            }
+            flags &=~(1 << j);
+            boundary = boundary<<1;
+            ++j;
+        }
+        blk = blk->next;
+    }
+    return true;
+}
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
new file mode 100644
index 000000000..566e36c27
--- /dev/null
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Declaration of a fully associative LRU tag store.
+ */
+
+#ifndef __FA_LRU_HH__
+#define __FA_LRU_HH__
+
+#include <list>
+
+#include "mem/cache/cache_blk.hh"
+#include "mem/packet.hh"
+#include "base/hashmap.hh"
+#include "mem/cache/tags/base_tags.hh"
+
+/**
+ * A fully associative cache block.
+ */
+class FALRUBlk : public CacheBlk
+{
+public:
+    /** The previous block in LRU order. */
+    FALRUBlk *prev;
+    /** The next block in LRU order. */
+    FALRUBlk *next;
+    /** Has this block been touched? */
+    bool isTouched;
+
+    /**
+     * A bit mask of the sizes of cache that this block is resident in.
+     * Each bit represents a power of 2 in MB size cache.
+     * If bit 0 is set, this block is in a 1MB cache
+     * If bit 2 is set, this block is in a 4MB cache, etc.
+     * There is one bit for each cache smaller than the full size (default
+     * 16MB).
+     */
+    int inCache;
+};
+
+/**
+ * A fully associative LRU cache. Keeps statistics for accesses to a number of
+ * cache sizes at once.
+ */
+class FALRU : public BaseTags
+{
+  public:
+    /** Typedef the block type used in this class. */
+    typedef FALRUBlk BlkType;
+    /** Typedef a list of pointers to the local block type. */
+    typedef std::list<FALRUBlk*> BlkList;
+  protected:
+    /** The block size of the cache. */
+    const int blkSize;
+    /** The size of the cache. */
+    const int size;
+    /** The number of blocks in the cache. */
+    const int numBlks; // calculated internally
+    /** The hit latency of the cache. */
+    const int hitLatency;
+
+    /** Array of pointers to blocks at the cache size  boundaries. */
+    FALRUBlk **cacheBoundaries;
+    /** A mask for the FALRUBlk::inCache bits. */
+    int cacheMask;
+    /** The number of different size caches being tracked. */
+    int numCaches;
+
+    /** The cache blocks. */
+    FALRUBlk *blks;
+
+    /** The MRU block. */
+    FALRUBlk *head;
+    /** The LRU block. */
+    FALRUBlk *tail;
+
+    /** Hash table type mapping addresses to cache block pointers. */
+    typedef m5::hash_map<Addr, FALRUBlk *, m5::hash<Addr> > hash_t;
+    /** Iterator into the address hash table. */
+    typedef hash_t::const_iterator tagIterator;
+
+    /** The address hash table. */
+    hash_t tagHash;
+
+    /**
+     * Find the cache block for the given address.
+     * @param addr The address to find.
+     * @return The cache block of the address, if any.
+     */
+    FALRUBlk * hashLookup(Addr addr) const;
+
+    /**
+     * Move a cache block to the MRU position.
+     * @param blk The block to promote.
+     */
+    void moveToHead(FALRUBlk *blk);
+
+    /**
+     * Check to make sure all the cache boundaries are still where they should
+     * be. Used for debugging.
+     * @return True if everything is correct.
+     */
+    bool check();
+
+    /**
+     * @defgroup FALRUStats Fully Associative LRU specific statistics
+     * The FA lru stack lets us track multiple cache sizes at once. These
+     * statistics track the hits and misses for different cache sizes.
+     * @{
+     */
+
+    /** Hits in each cache size >= 128K. */
+    Stats::Vector<> hits;
+    /** Misses in each cache size >= 128K. */
+    Stats::Vector<> misses;
+    /** Total number of accesses. */
+    Stats::Scalar<> accesses;
+
+    /**
+     * @}
+     */
+
+public:
+    /**
+     * Construct and initialize this cache tagstore.
+     * @param blkSize The block size of the cache.
+     * @param size The size of the cache.
+     * @param hit_latency The hit latency of the cache.
+     */
+    FALRU(int blkSize, int size, int hit_latency);
+
+    /**
+     * Register the stats for this object.
+     * @param name The name to prepend to the stats name.
+     */
+    void regStats(const std::string &name);
+
+    /**
+     * Return true if the address is found in the cache.
+     * @param asid The address space ID.
+     * @param addr The address to look for.
+     * @return True if the address is in the cache.
+     */
+    bool probe(int asid, Addr addr) const;
+
+    /**
+     * Invalidate the cache block that contains the given addr.
+     * @param asid The address space ID.
+     * @param addr The address to invalidate.
+     */
+    void invalidateBlk(int asid, Addr addr);
+
+    /**
+     * Find the block in the cache and update the replacement data. Returns
+     * the access latency and the in cache flags as a side effect
+     * @param addr The address to look for.
+     * @param asid The address space ID.
+     * @param lat The latency of the access.
+     * @param inCache The FALRUBlk::inCache flags.
+     * @return Pointer to the cache block.
+     */
+    FALRUBlk* findBlock(Addr addr, int asid, int &lat, int *inCache = 0);
+
+    /**
+     * Find the block in the cache and update the replacement data. Returns
+     * the access latency and the in cache flags as a side effect
+     * @param req The req whose block to find
+     * @param lat The latency of the access.
+     * @param inCache The FALRUBlk::inCache flags.
+     * @return Pointer to the cache block.
+     */
+    FALRUBlk* findBlock(Packet * &pkt, int &lat, int *inCache = 0);
+
+    /**
+     * Find the block in the cache, do not update the replacement data.
+     * @param addr The address to look for.
+     * @param asid The address space ID.
+     * @return Pointer to the cache block.
+     */
+    FALRUBlk* findBlock(Addr addr, int asid) const;
+
+    /**
+     * Find a replacement block for the address provided.
+     * @param req The request to a find a replacement candidate for.
+     * @param writebacks List for any writebacks to be performed.
+     * @param compress_blocks List of blocks to compress, for adaptive comp.
+     * @return The block to place the replacement in.
+     */
+    FALRUBlk* findReplacement(Packet * &pkt, PacketList & writebacks,
+                              BlkList &compress_blocks);
+
+    /**
+     * Return the hit latency of this cache.
+     * @return The hit latency.
+     */
+    int getHitLatency() const
+    {
+        return hitLatency;
+    }
+
+    /**
+     * Return the block size of this cache.
+     * @return The block size.
+     */
+    int getBlockSize()
+    {
+        return blkSize;
+    }
+
+    /**
+     * Return the subblock size of this cache, always the block size.
+     * @return The block size.
+     */
+    int getSubBlockSize()
+    {
+        return blkSize;
+    }
+
+    /**
+     * Align an address to the block size.
+     * @param addr the address to align.
+     * @return The aligned address.
+     */
+    Addr blkAlign(Addr addr) const
+    {
+        return (addr & ~(Addr)(blkSize-1));
+    }
+
+    /**
+     * Generate the tag from the addres. For fully associative this is just the
+     * block address.
+     * @param addr The address to get the tag from.
+     * @param blk ignored here
+     * @return The tag.
+     */
+    Addr extractTag(Addr addr, FALRUBlk *blk) const
+    {
+        return blkAlign(addr);
+    }
+
+    /**
+     * Return the set of an address. Only one set in a fully associative cache.
+     * @param addr The address to get the set from.
+     * @return 0.
+     */
+    int extractSet(Addr addr) const
+    {
+        return 0;
+    }
+
+    /**
+     * Calculate the block offset of an address.
+     * @param addr the address to get the offset of.
+     * @return the block offset.
+     */
+    int extractBlkOffset(Addr addr) const
+    {
+        return (addr & (Addr)(blkSize-1));
+    }
+
+    /**
+     * Regenerate the block address from the tag and the set.
+     * @param tag The tag of the block.
+     * @param set The set the block belongs to.
+     * @return the block address.
+     */
+    Addr regenerateBlkAddr(Addr tag, int set) const
+    {
+        return (tag);
+    }
+
+    /**
+     * Read the data out of the internal storage of a cache block. FALRU
+     * currently doesn't support data storage.
+     * @param blk The cache block to read.
+     * @param data The buffer to read the data into.
+     * @return The data from the cache block.
+     */
+    void readData(FALRUBlk *blk, uint8_t *data)
+    {
+    }
+
+    /**
+     * Write data into the internal storage of a cache block. FALRU
+     * currently doesn't support data storage.
+     * @param blk The cache block to be written.
+     * @param data The data to write.
+     * @param size The number of bytes to write.
+     * @param writebacks A list for any writebacks to be performed. May be
+     * needed when writing to a compressed block.
+     */
+    void writeData(FALRUBlk *blk, uint8_t *data, int size,
+                   PacketList &writebacks)
+    {
+    }
+
+    /**
+     * Unimplemented. Perform a cache block copy from block aligned addresses.
+     * @param source The block aligned source address.
+     * @param dest The block aligned destination adddress.
+     * @param asid The address space ID.
+     * @param writebacks List for any generated writeback requests.
+     */
+    void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks)
+    {
+    }
+
+    /**
+     * Unimplemented.
+     */
+    void fixCopy(Packet * &pkt, PacketList &writebacks)
+    {
+    }
+
+};
+
+#endif
diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc
new file mode 100644
index 000000000..847fabc88
--- /dev/null
+++ b/src/mem/cache/tags/iic.cc
@@ -0,0 +1,880 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Definitions of the Indirect Index Cache tagstore.
+ */
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include <math.h>
+
+#include "mem/cache/base_cache.hh"
+#include "mem/cache/tags/iic.hh"
+#include "base/intmath.hh"
+#include "sim/root.hh" // for curTick
+
+#include "base/trace.hh" // for DPRINTF
+
+
+using namespace std;
+
+/** Track the number of accesses to each cache set. */
+#define PROFILE_IIC 1
+
+IIC::IIC(IIC::Params &params) :
+    hashSets(params.numSets), blkSize(params.blkSize), assoc(params.assoc),
+    hitLatency(params.hitLatency), subSize(params.subblockSize),
+    numSub(blkSize/subSize),
+    trivialSize((floorLog2(params.size/subSize)*numSub)/8),
+    tagShift(floorLog2(blkSize)), blkMask(blkSize - 1),
+    subShift(floorLog2(subSize)), subMask(numSub - 1),
+    hashDelay(params.hashDelay),
+    numBlocks(params.size/subSize),
+    numTags(hashSets * assoc + params.size/blkSize -1),
+    numSecondary(params.size/blkSize),
+    tagNull(numTags),
+    primaryBound(hashSets * assoc)
+{
+    int i;
+
+    // Check parameters
+    if (blkSize < 4 || !isPowerOf2(blkSize)) {
+        fatal("Block size must be at least 4 and a power of 2");
+    }
+    if (hashSets <= 0 || !isPowerOf2(hashSets)) {
+        fatal("# of hashsets must be non-zero and a power of 2");
+    }
+    if (assoc <= 0) {
+        fatal("associativity must be greater than zero");
+    }
+    if (hitLatency <= 0) {
+        fatal("access latency must be greater than zero");
+    }
+    if (numSub*subSize != blkSize) {
+        fatal("blocksize must be evenly divisible by subblock size");
+    }
+
+    // debug stuff
+    freeSecond = numSecondary;
+
+    warmedUp = false;
+    warmupBound = params.size/blkSize;
+
+    // Replacement Policy Initialization
+    repl = params.rp;
+    repl->setIIC(this);
+
+    //last_miss_time = 0
+
+    // allocate data reference counters
+    dataReferenceCount = new int[numBlocks];
+    memset(dataReferenceCount, 0, numBlocks*sizeof(int));
+
+    // Allocate storage for both internal data and block fast access data.
+    // We allocate it as one large chunk to reduce overhead and to make
+    // deletion easier.
+    int data_index = 0;
+    dataStore = new uint8_t[(numBlocks + numTags) * blkSize];
+    dataBlks = new uint8_t*[numBlocks];
+    for (i = 0; i < numBlocks; ++i) {
+        dataBlks[i] = &dataStore[data_index];
+        freeDataBlock(i);
+        data_index += subSize;
+    }
+
+    assert(data_index == numBlocks * subSize);
+
+    // allocate and init tag store
+    tagStore = new IICTag[numTags];
+
+    int blkIndex = 0;
+    // allocate and init sets
+    sets = new IICSet[hashSets];
+    for (i = 0; i < hashSets; ++i) {
+        sets[i].assoc = assoc;
+        sets[i].tags = new IICTag*[assoc];
+        sets[i].chain_ptr = tagNull;
+
+        for (int j = 0; j < assoc; ++j) {
+            IICTag *tag = &tagStore[blkIndex++];
+            tag->chain_ptr = tagNull;
+            tag->data_ptr.resize(numSub);
+            tag->size = blkSize;
+            tag->trivialData = new uint8_t[trivialSize];
+            tag->numData = 0;
+            sets[i].tags[j] = tag;
+            tag->set = i;
+            tag->data = &dataStore[data_index];
+            data_index += blkSize;
+        }
+    }
+
+    assert(blkIndex == primaryBound);
+
+    for (i = primaryBound; i < tagNull; i++) {
+        tagStore[i].chain_ptr = i+1;
+        //setup data ptrs to subblocks
+        tagStore[i].data_ptr.resize(numSub);
+        tagStore[i].size = blkSize;
+        tagStore[i].trivialData = new uint8_t[trivialSize];
+        tagStore[i].numData = 0;
+        tagStore[i].set = 0;
+        tagStore[i].data = &dataStore[data_index];
+        data_index += blkSize;
+    }
+    freelist = primaryBound;
+}
+
+IIC::~IIC()
+{
+    delete [] dataReferenceCount;
+    delete [] dataStore;
+    delete [] tagStore;
+    delete [] sets;
+}
+
+/* register cache stats */
+void
+IIC::regStats(const string &name)
+{
+    using namespace Stats;
+
+    BaseTags::regStats(name);
+
+    hitHashDepth.init(0, 20, 1);
+    missHashDepth.init(0, 20, 1);
+    setAccess.init(0, hashSets, 1);
+
+    /** IIC Statistics */
+    hitHashDepth
+        .name(name + ".hit_hash_depth_dist")
+        .desc("Dist. of Hash lookup depths")
+        .flags(pdf)
+        ;
+
+    missHashDepth
+        .name(name + ".miss_hash_depth_dist")
+        .desc("Dist. of Hash lookup depths")
+        .flags(pdf)
+        ;
+
+    repl->regStats(name);
+
+    if (PROFILE_IIC)
+        setAccess
+            .name(name + ".set_access_dist")
+            .desc("Dist. of Accesses across sets")
+            .flags(pdf)
+            ;
+
+    missDepthTotal
+        .name(name + ".miss_depth_total")
+        .desc("Total of miss depths")
+        ;
+
+    hashMiss
+        .name(name + ".hash_miss")
+        .desc("Total of misses in hash table")
+        ;
+
+    hitDepthTotal
+        .name(name + ".hit_depth_total")
+        .desc("Total of hit depths")
+        ;
+
+    hashHit
+        .name(name + ".hash_hit")
+        .desc("Total of hites in hash table")
+        ;
+}
+
+// probe cache for presence of given block.
+bool
+IIC::probe(int asid, Addr addr) const
+{
+    return (findBlock(addr,asid) != NULL);
+}
+
+IICTag*
+IIC::findBlock(Addr addr, int asid, int &lat)
+{
+    Addr tag = extractTag(addr);
+    unsigned set = hash(addr);
+    int set_lat;
+
+    unsigned long chain_ptr;
+
+    if (PROFILE_IIC)
+        setAccess.sample(set);
+
+    IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr);
+    set_lat = 1;
+    if (tag_ptr == NULL && chain_ptr != tagNull) {
+        int secondary_depth;
+        tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth);
+        set_lat += secondary_depth;
+        // set depth for statistics fix this later!!! egh
+        sets[set].depth = set_lat;
+
+        if (tag_ptr != NULL) {
+            /* need to move tag into primary table */
+            // need to preserve chain: fix this egh
+            sets[set].tags[assoc-1]->chain_ptr = tag_ptr->chain_ptr;
+            tagSwap(tag_ptr - tagStore, sets[set].tags[assoc-1] - tagStore);
+            tag_ptr = sets[set].findTag(asid, tag, chain_ptr);
+            assert(tag_ptr!=NULL);
+        }
+
+    }
+    set_lat = set_lat * hashDelay + hitLatency;
+    if (tag_ptr != NULL) {
+        // IIC replacement: if this is not the first element of
+        //   list, reorder
+        sets[set].moveToHead(tag_ptr);
+
+        hitHashDepth.sample(sets[set].depth);
+        hashHit++;
+        hitDepthTotal += sets[set].depth;
+        tag_ptr->status |= BlkReferenced;
+        lat = set_lat;
+        if (tag_ptr->whenReady > curTick && tag_ptr->whenReady - curTick > set_lat) {
+            lat = tag_ptr->whenReady - curTick;
+        }
+
+        tag_ptr->refCount += 1;
+    }
+    else {
+        // fall through: cache block not found, not a hit...
+        missHashDepth.sample(sets[set].depth);
+        hashMiss++;
+        missDepthTotal += sets[set].depth;
+        lat = set_lat;
+    }
+    return tag_ptr;
+}
+
+IICTag*
+IIC::findBlock(Packet * &pkt, int &lat)
+{
+    Addr addr = pkt->getAddr();
+    int asid = pkt->req->getAsid();
+
+    Addr tag = extractTag(addr);
+    unsigned set = hash(addr);
+    int set_lat;
+
+    unsigned long chain_ptr;
+
+    if (PROFILE_IIC)
+        setAccess.sample(set);
+
+    IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr);
+    set_lat = 1;
+    if (tag_ptr == NULL && chain_ptr != tagNull) {
+        int secondary_depth;
+        tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth);
+        set_lat += secondary_depth;
+        // set depth for statistics fix this later!!! egh
+        sets[set].depth = set_lat;
+
+        if (tag_ptr != NULL) {
+            /* need to move tag into primary table */
+            // need to preserve chain: fix this egh
+            sets[set].tags[assoc-1]->chain_ptr = tag_ptr->chain_ptr;
+            tagSwap(tag_ptr - tagStore, sets[set].tags[assoc-1] - tagStore);
+            tag_ptr = sets[set].findTag(asid, tag, chain_ptr);
+            assert(tag_ptr!=NULL);
+        }
+
+    }
+    set_lat = set_lat * hashDelay + hitLatency;
+    if (tag_ptr != NULL) {
+        // IIC replacement: if this is not the first element of
+        //   list, reorder
+        sets[set].moveToHead(tag_ptr);
+
+        hitHashDepth.sample(sets[set].depth);
+        hashHit++;
+        hitDepthTotal += sets[set].depth;
+        tag_ptr->status |= BlkReferenced;
+        lat = set_lat;
+        if (tag_ptr->whenReady > curTick && tag_ptr->whenReady - curTick > set_lat) {
+            lat = tag_ptr->whenReady - curTick;
+        }
+
+        tag_ptr->refCount += 1;
+    }
+    else {
+        // fall through: cache block not found, not a hit...
+        missHashDepth.sample(sets[set].depth);
+        hashMiss++;
+        missDepthTotal += sets[set].depth;
+        lat = set_lat;
+    }
+    return tag_ptr;
+}
+
+IICTag*
+IIC::findBlock(Addr addr, int asid) const
+{
+    Addr tag = extractTag(addr);
+    unsigned set = hash(addr);
+
+    unsigned long chain_ptr;
+
+    IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr);
+    if (tag_ptr == NULL && chain_ptr != tagNull) {
+        int secondary_depth;
+        tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth);
+    }
+    return tag_ptr;
+}
+
+
+IICTag*
+IIC::findReplacement(Packet * &pkt, PacketList &writebacks,
+                     BlkList &compress_blocks)
+{
+    DPRINTF(IIC, "Finding Replacement for %x\n", pkt->getAddr());
+    unsigned set = hash(pkt->getAddr());
+    IICTag *tag_ptr;
+    unsigned long *tmp_data = new unsigned long[numSub];
+
+    // Get a enough subblocks for a full cache line
+    for (int i = 0; i < numSub; ++i){
+        tmp_data[i] = getFreeDataBlock(writebacks);
+        assert(dataReferenceCount[tmp_data[i]]==0);
+    }
+
+    tag_ptr = getFreeTag(set, writebacks);
+
+    tag_ptr->set = set;
+    for (int i=0; i< numSub; ++i) {
+        tag_ptr->data_ptr[i] = tmp_data[i];
+        dataReferenceCount[tag_ptr->data_ptr[i]]++;
+    }
+    tag_ptr->numData = numSub;
+    assert(tag_ptr - tagStore < primaryBound); // make sure it is in primary
+    tag_ptr->chain_ptr = tagNull;
+    sets[set].moveToHead(tag_ptr);
+    delete [] tmp_data;
+
+    list<unsigned long> tag_indexes;
+    repl->doAdvance(tag_indexes);
+    while (!tag_indexes.empty()) {
+        if (!tagStore[tag_indexes.front()].isCompressed()) {
+            compress_blocks.push_back(&tagStore[tag_indexes.front()]);
+        }
+        tag_indexes.pop_front();
+    }
+
+    tag_ptr->re = (void*)repl->add(tag_ptr-tagStore);
+
+    return tag_ptr;
+}
+
+void
+IIC::freeReplacementBlock(PacketList & writebacks)
+{
+    IICTag *tag_ptr;
+    unsigned long data_ptr;
+    /* consult replacement policy */
+    tag_ptr = &tagStore[repl->getRepl()];
+    assert(tag_ptr->isValid());
+
+    DPRINTF(Cache, "Replacing %x in IIC: %s\n",
+            regenerateBlkAddr(tag_ptr->tag,0),
+            tag_ptr->isModified() ? "writeback" : "clean");
+    /* write back replaced block data */
+    if (tag_ptr && (tag_ptr->isValid())) {
+        replacements[0]++;
+        totalRefs += tag_ptr->refCount;
+        ++sampledRefs;
+        tag_ptr->refCount = 0;
+
+        if (tag_ptr->isModified()) {
+/*	    Packet * writeback =
+                buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0),
+                                  tag_ptr->req->asid, tag_ptr->xc, blkSize,
+                                  tag_ptr->data,
+                                  tag_ptr->size);
+*/
+            Request *writebackReq = new Request(regenerateBlkAddr(tag_ptr->tag, 0),
+                                           blkSize, 0);
+            Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1);
+            writeback->allocate();
+            memcpy(writeback->getPtr<uint8_t>(), tag_ptr->data, blkSize);
+
+            writebacks.push_back(writeback);
+        }
+    }
+
+    // free the data blocks
+    for (int i = 0; i < tag_ptr->numData; ++i) {
+        data_ptr = tag_ptr->data_ptr[i];
+        assert(dataReferenceCount[data_ptr]>0);
+        if (--dataReferenceCount[data_ptr] == 0) {
+            freeDataBlock(data_ptr);
+        }
+    }
+    freeTag(tag_ptr);
+}
+
+unsigned long
+IIC::getFreeDataBlock(PacketList & writebacks)
+{
+    struct IICTag *tag_ptr;
+    unsigned long data_ptr;
+
+    tag_ptr = NULL;
+    /* find data block */
+    while (blkFreelist.empty()) {
+        freeReplacementBlock(writebacks);
+    }
+
+    data_ptr = blkFreelist.front();
+    blkFreelist.pop_front();
+    DPRINTF(IICMore,"Found free data at %d\n",data_ptr);
+    return data_ptr;
+}
+
+
+
+IICTag*
+IIC::getFreeTag(int set, PacketList & writebacks)
+{
+    unsigned long tag_index;
+    IICTag *tag_ptr;
+    // Add new tag
+    tag_ptr = sets[set].findFree();
+    // if no free in primary, and secondary exists
+    if (!tag_ptr && numSecondary) {
+        // need to spill a tag into secondary storage
+        while (freelist == tagNull) {
+            // get replacements until one is in secondary
+            freeReplacementBlock(writebacks);
+        }
+
+        tag_index = freelist;
+        freelist = tagStore[freelist].chain_ptr;
+        freeSecond--;
+
+        assert(tag_index != tagNull);
+        tagSwap(tag_index, sets[set].tags[assoc-1] - tagStore);
+        tagStore[tag_index].chain_ptr = sets[set].chain_ptr;
+        sets[set].chain_ptr = tag_index;
+
+        tag_ptr = sets[set].tags[assoc-1];
+    }
+    DPRINTF(IICMore,"Found free tag at %d\n",tag_ptr - tagStore);
+    tagsInUse++;
+    if (!warmedUp && tagsInUse.value() >= warmupBound) {
+        warmedUp = true;
+        warmupCycle = curTick;
+    }
+
+    return tag_ptr;
+}
+
+void
+IIC::freeTag(IICTag *tag_ptr)
+{
+    unsigned long tag_index, tmp_index;
+    // Fix tag_ptr
+    if (tag_ptr) {
+        // we have a tag to clear
+        DPRINTF(IICMore,"Freeing Tag for %x\n",
+                regenerateBlkAddr(tag_ptr->tag,0));
+        tagsInUse--;
+        tag_ptr->status = 0;
+        tag_ptr->numData = 0;
+        tag_ptr->re = NULL;
+        tag_index = tag_ptr - tagStore;
+        if (tag_index >= primaryBound) {
+            // tag_ptr points to secondary store
+            assert(tag_index < tagNull); // remove this?? egh
+            if (tag_ptr->chain_ptr == tagNull) {
+                // need to fix chain list
+                unsigned tmp_set = hash(tag_ptr->tag << tagShift);
+                if (sets[tmp_set].chain_ptr == tag_index) {
+                    sets[tmp_set].chain_ptr = tagNull;
+                } else {
+                    tmp_index = sets[tmp_set].chain_ptr;
+                    while (tmp_index != tagNull
+                           && tagStore[tmp_index].chain_ptr != tag_index) {
+                        tmp_index = tagStore[tmp_index].chain_ptr;
+                    }
+                    assert(tmp_index != tagNull);
+                    tagStore[tmp_index].chain_ptr = tagNull;
+                }
+                tag_ptr->chain_ptr = freelist;
+                freelist = tag_index;
+                freeSecond++;
+            } else {
+                // copy next chained entry to this tag location
+                tmp_index = tag_ptr->chain_ptr;
+                tagSwap(tmp_index, tag_index);
+                tagStore[tmp_index].chain_ptr = freelist;
+                freelist = tmp_index;
+                freeSecond++;
+            }
+        } else {
+            // tag_ptr in primary hash table
+            assert(tag_index < primaryBound);
+            tag_ptr->status = 0;
+            unsigned tmp_set = hash(tag_ptr->tag << tagShift);
+            if (sets[tmp_set].chain_ptr != tagNull) { // collapse chain
+                tmp_index = sets[tmp_set].chain_ptr;
+                tagSwap(tag_index, tmp_index);
+                tagStore[tmp_index].chain_ptr = freelist;
+                freelist = tmp_index;
+                freeSecond++;
+                sets[tmp_set].chain_ptr = tag_ptr->chain_ptr;
+                sets[tmp_set].moveToTail(tag_ptr);
+            }
+        }
+    }
+}
+
+void
+IIC::freeDataBlock(unsigned long data_ptr)
+{
+    assert(dataReferenceCount[data_ptr] == 0);
+    DPRINTF(IICMore, "Freeing data at %d\n", data_ptr);
+    blkFreelist.push_front(data_ptr);
+}
+
+/** Use a simple modulo hash. */
+#define SIMPLE_HASH 0
+
+unsigned
+IIC::hash(Addr addr) const {
+#if SIMPLE_HASH
+    return extractTag(addr) % iic_hash_size;
+#else
+    Addr tag, mask, x, y;
+    tag = extractTag(addr);
+    mask = hashSets-1; /* assumes iic_hash_size is a power of 2 */
+    x = tag & mask;
+    y = (tag >> (int)(::log(hashSets)/::log(2))) & mask;
+    assert (x < hashSets && y < hashSets);
+    return x ^ y;
+#endif
+}
+
+
+void
+IICSet::moveToHead(IICTag *tag)
+{
+    if (tags[0] == tag)
+        return;
+
+    // write 'next' block into blks[i], moving up from MRU toward LRU
+    // until we overwrite the block we moved to head.
+
+    // start by setting up to write 'blk' into blks[0]
+    int i = 0;
+    IICTag *next = tag;
+
+    do {
+        assert(i < assoc);
+        // swap blks[i] and next
+        IICTag *tmp = tags[i];
+        tags[i] = next;
+        next = tmp;
+        ++i;
+    } while (next != tag);
+}
+
+void
+IICSet::moveToTail(IICTag *tag)
+{
+    if (tags[assoc-1] == tag)
+        return;
+
+    // write 'next' block into blks[i], moving up from MRU toward LRU
+    // until we overwrite the block we moved to head.
+
+    // start by setting up to write 'blk' into blks[0]
+    int i = assoc - 1;
+    IICTag *next = tag;
+
+    do {
+        assert(i >= 0);
+        // swap blks[i] and next
+        IICTag *tmp = tags[i];
+        tags[i] = next;
+        next = tmp;
+        --i;
+    } while (next != tag);
+}
+
+void
+IIC::tagSwap(unsigned long index1, unsigned long index2)
+{
+    DPRINTF(IIC,"Swapping tag[%d]=%x for tag[%d]=%x\n",index1,
+            tagStore[index1].tag<<tagShift, index2,
+            tagStore[index2].tag<<tagShift);
+    IICTag tmp_tag;
+    tmp_tag = tagStore[index1];
+    tagStore[index1] = tagStore[index2];
+    tagStore[index2] = tmp_tag;
+    if (tagStore[index1].isValid())
+        repl->fixTag(tagStore[index1].re, index2, index1);
+    if (tagStore[index2].isValid())
+        repl->fixTag(tagStore[index2].re, index1, index2);
+}
+
+
+IICTag *
+IIC::secondaryChain(int asid, Addr tag, unsigned long chain_ptr,
+                    int *_depth) const
+{
+    int depth = 0;
+    while (chain_ptr != tagNull) {
+        DPRINTF(IIC,"Searching secondary at %d for %x\n", chain_ptr,
+                tag<<tagShift);
+        if (tagStore[chain_ptr].tag == tag &&
+            tagStore[chain_ptr].asid == asid &&
+            (tagStore[chain_ptr].isValid())) {
+            *_depth = depth;
+            return &tagStore[chain_ptr];
+        }
+        depth++;
+        chain_ptr = tagStore[chain_ptr].chain_ptr;
+    }
+    *_depth = depth;
+    return NULL;
+}
+
+void
+IIC::decompressBlock(unsigned long index)
+{
+    IICTag *tag_ptr = &tagStore[index];
+    if (tag_ptr->isCompressed()) {
+        // decompress the data here.
+    }
+}
+
+void
+IIC::compressBlock(unsigned long index)
+{
+    IICTag *tag_ptr = &tagStore[index];
+    if (!tag_ptr->isCompressed()) {
+        // Compress the data here.
+    }
+}
+
+void
+IIC::invalidateBlk(int asid, Addr addr)
+{
+    IICTag* tag_ptr = findBlock(addr, asid);
+    if (tag_ptr) {
+        for (int i = 0; i < tag_ptr->numData; ++i) {
+            dataReferenceCount[tag_ptr->data_ptr[i]]--;
+            if (dataReferenceCount[tag_ptr->data_ptr[i]] == 0) {
+                freeDataBlock(tag_ptr->data_ptr[i]);
+            }
+        }
+        repl->removeEntry(tag_ptr->re);
+        freeTag(tag_ptr);
+    }
+}
+
+void
+IIC::readData(IICTag *blk, uint8_t *data){
+//    assert(cache->doData());
+    assert(blk->size <= trivialSize || blk->numData > 0);
+    int data_size = blk->size;
+    if (data_size > trivialSize) {
+        for (int i = 0; i < blk->numData; ++i){
+            memcpy(data+i*subSize,
+                   &(dataBlks[blk->data_ptr[i]][0]),
+                   (data_size>subSize)?subSize:data_size);
+            data_size -= subSize;
+        }
+    } else {
+        memcpy(data,blk->trivialData,data_size);
+    }
+}
+
+void
+IIC::writeData(IICTag *blk, uint8_t *write_data, int size,
+               PacketList & writebacks){
+//    assert(cache->doData());
+    assert(size < blkSize || !blk->isCompressed());
+    DPRINTF(IIC, "Writing %d bytes to %x\n", size,
+            blk->tag<<tagShift);
+    // Find the number of subblocks needed, (round up)
+    int num_subs = (size + (subSize -1))/subSize;
+    if (size <= trivialSize) {
+        num_subs = 0;
+    }
+    assert(num_subs <= numSub);
+    if (num_subs > blk->numData) {
+        // need to allocate more data blocks
+        for (int i = blk->numData; i < num_subs; ++i){
+            blk->data_ptr[i] = getFreeDataBlock(writebacks);
+            dataReferenceCount[blk->data_ptr[i]] += 1;
+        }
+    } else if (num_subs < blk->numData){
+        // can free data blocks
+        for (int i=num_subs; i < blk->numData; ++i){
+            // decrement reference count and compare to zero
+            /**
+             * @todo
+             * Make this work with copying.
+             */
+            if (--dataReferenceCount[blk->data_ptr[i]] == 0) {
+                freeDataBlock(blk->data_ptr[i]);
+            }
+        }
+    }
+
+    blk->numData = num_subs;
+    blk->size = size;
+    assert(size <= trivialSize || blk->numData > 0);
+    if (size > trivialSize){
+        for (int i = 0; i < blk->numData; ++i){
+            memcpy(&dataBlks[blk->data_ptr[i]][0], write_data + i*subSize,
+                   (size>subSize)?subSize:size);
+            size -= subSize;
+        }
+    } else {
+        memcpy(blk->trivialData,write_data,size);
+    }
+}
+
+
+/**
+ * @todo This code can break if the src is evicted to get a tag for the dest.
+ */
+void
+IIC::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks)
+{
+//Copy unsuported now
+#if 0
+    IICTag *dest_tag = findBlock(dest, asid);
+
+    if (dest_tag) {
+        for (int i = 0; i < dest_tag->numData; ++i) {
+            if (--dataReferenceCount[dest_tag->data_ptr[i]] == 0) {
+                freeDataBlock(dest_tag->data_ptr[i]);
+            }
+        }
+        // Reset replacement entry
+    } else {
+        dest_tag = getFreeTag(hash(dest), writebacks);
+        dest_tag->re = (void*) repl->add(dest_tag - tagStore);
+        dest_tag->set = hash(dest);
+        dest_tag->tag = extractTag(dest);
+        dest_tag->asid = asid;
+        dest_tag->status = BlkValid | BlkWritable;
+    }
+    // Find the source tag here since it might move if we need to find a
+    // tag for the destination.
+    IICTag *src_tag = findBlock(source, asid);
+    assert(src_tag);
+    assert(!cache->doData() || src_tag->size <= trivialSize
+           || src_tag->numData > 0);
+    // point dest to source data and inc counter
+    for (int i = 0; i < src_tag->numData; ++i) {
+        dest_tag->data_ptr[i] = src_tag->data_ptr[i];
+        ++dataReferenceCount[dest_tag->data_ptr[i]];
+    }
+
+    // Maintain fast access data.
+    memcpy(dest_tag->data, src_tag->data, blkSize);
+
+    dest_tag->xc = src_tag->xc;
+    dest_tag->size = src_tag->size;
+    dest_tag->numData = src_tag->numData;
+    if (src_tag->numData == 0) {
+        // Data is stored in the trivial data, just copy it.
+        memcpy(dest_tag->trivialData, src_tag->trivialData, src_tag->size);
+    }
+
+    dest_tag->status |= BlkDirty;
+    if (dest_tag->size < blkSize) {
+        dest_tag->status |= BlkCompressed;
+    } else {
+        dest_tag->status &= ~BlkCompressed;
+    }
+#endif
+}
+
+void
+IIC::fixCopy(Packet * &pkt, PacketList &writebacks)
+{
+#if 0
+    // if reference counter is greater than 1, do copy
+    // else do write
+    Addr blk_addr = blkAlign(pkt->getAddr);
+    IICTag* blk = findBlock(blk_addr, pkt->req->getAsid());
+
+    if (blk->numData > 0 && dataReferenceCount[blk->data_ptr[0]] != 1) {
+        // copy the data
+        // Mark the block as referenced so it doesn't get replaced.
+        blk->status |= BlkReferenced;
+        for (int i = 0; i < blk->numData; ++i){
+            unsigned long new_data = getFreeDataBlock(writebacks);
+            // Need to refresh pointer
+            /**
+             * @todo Remove this refetch once we change IIC to pointer based
+             */
+            blk = findBlock(blk_addr, pkt->req->getAsid());
+            assert(blk);
+            if (cache->doData()) {
+                memcpy(&(dataBlks[new_data][0]),
+                       &(dataBlks[blk->data_ptr[i]][0]),
+                       subSize);
+            }
+            dataReferenceCount[blk->data_ptr[i]]--;
+            dataReferenceCount[new_data]++;
+            blk->data_ptr[i] = new_data;
+        }
+    }
+#endif
+}
+
+void
+IIC::cleanupRefs()
+{
+    for (int i = 0; i < numTags; ++i) {
+        if (tagStore[i].isValid()) {
+            totalRefs += tagStore[i].refCount;
+            ++sampledRefs;
+        }
+    }
+}
diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh
new file mode 100644
index 000000000..6628f7e7a
--- /dev/null
+++ b/src/mem/cache/tags/iic.hh
@@ -0,0 +1,574 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Declaration of the Indirect Index Cache (IIC) tags store.
+ */
+
+#ifndef __IIC_HH__
+#define __IIC_HH__
+
+#include <list>
+#include <vector>
+
+#include "mem/cache/cache_blk.hh"
+#include "mem/cache/tags/repl/repl.hh"
+#include "mem/packet.hh"
+#include "base/statistics.hh"
+#include "mem/cache/tags/base_tags.hh"
+
+class BaseCache; // Forward declaration
+
+/**
+ * IIC cache blk.
+ */
+class IICTag : public CacheBlk
+{
+  public:
+    /**
+     * Copy the contents of the given IICTag into this one.
+     * @param rhs The tag to copy.
+     * @return const reference to this tag.
+     */
+    const IICTag& operator=(const IICTag& rhs)
+    {
+        CacheBlk::operator=(rhs);
+        chain_ptr = rhs.chain_ptr;
+        re = rhs.re;
+        set = rhs.set;
+        trivialData = rhs.trivialData;
+        numData = rhs.numData;
+        data_ptr.clear();
+        for (int i = 0; i < rhs.numData; ++i) {
+            data_ptr.push_back(rhs.data_ptr[i]);
+        }
+        return *this;
+    }
+
+    /** Hash chain pointer into secondary store. */
+    unsigned long chain_ptr;
+    /** Data array pointers for each subblock. */
+    std::vector<unsigned long> data_ptr;
+    /** Replacement Entry pointer. */
+    void *re;
+    /**
+     * An array to store small compressed data. Conceputally the same size
+     * as the unsused data array pointers.
+     */
+    uint8_t *trivialData;
+    /**
+     * The number of allocated subblocks.
+     */
+    int numData;
+};
+
+/**
+ * A hash set for the IIC primary lookup table.
+ */
+class IICSet{
+  public:
+    /** The associativity of the primary table. */
+    int assoc;
+
+    /** The number of hash chains followed when finding the last block. */
+    int depth;
+    /** The current number of blocks on the chain. */
+    int size;
+
+    /** Tag pointer into the secondary tag storage. */
+    unsigned long chain_ptr;
+
+    /** The LRU list of the primary table. MRU is at 0 index. */
+    IICTag ** tags;
+
+    /**
+     * Find the addr in this set, return the chain pointer to the secondary if
+     * it isn't found.
+     * @param asid The address space ID.
+     * @param tag The address to find.
+     * @param chain_ptr The chain pointer to start the search of the secondary
+     * @return Pointer to the tag, NULL if not found.
+     */
+    IICTag* findTag(int asid, Addr tag, unsigned long &chain_ptr)
+    {
+        depth = 1;
+        for (int i = 0; i < assoc; ++i) {
+            if (tags[i]->tag == tag && tags[i]->isValid()) {
+                return tags[i];
+            }
+        }
+        chain_ptr = this->chain_ptr;
+        return 0;
+    }
+
+    /**
+     * Find an usused tag in this set.
+     * @return Pointer to the unused tag, NULL if none are free.
+     */
+    IICTag* findFree()
+    {
+        for (int i = 0; i < assoc; ++i) {
+            if (!tags[i]->isValid()) {
+                return tags[i];
+            }
+        }
+        return 0;
+    }
+
+    /**
+     * Move a tag to the head of the LRU list
+     * @param tag The tag to move.
+     */
+    void moveToHead(IICTag *tag);
+
+    /**
+     * Move a tag to the tail (LRU) of the LRU list
+     * @param tag The tag to move.
+     */
+    void moveToTail(IICTag *tag);
+};
+
+/**
+ * The IIC tag store. This is a hardware-realizable, fully-associative tag
+ * store that uses software replacement, e.g. Gen.
+ */
+class IIC : public BaseTags
+{
+  public:
+    /** Typedef of the block type used in this class. */
+    typedef IICTag BlkType;
+    /** Typedef for list of pointers to the local block type. */
+    typedef std::list<IICTag*> BlkList;
+  protected:
+    /** The number of set in the primary table. */
+    const int hashSets;
+    /** The block size in bytes. */
+    const int blkSize;
+    /** The associativity of the primary table. */
+    const int assoc;
+    /** The base hit latency. */
+    const int hitLatency;
+    /** The subblock size, used for compression. */
+    const int subSize;
+
+    /** The number of subblocks */
+    const int numSub;
+    /** The number of bytes used by data pointers */
+    const int trivialSize;
+
+    /** The amount to shift address to get the tag. */
+    const int tagShift;
+    /** The mask to get block offset bits. */
+    const unsigned blkMask;
+
+    /** The amount to shift to get the subblock number. */
+    const int subShift;
+    /** The mask to get the correct subblock number. */
+    const unsigned subMask;
+
+    /** The latency of a hash lookup. */
+    const int hashDelay;
+    /** The number of data blocks. */
+    const int numBlocks;
+    /** The total number of tags in primary and secondary. */
+    const int numTags;
+    /** The number of tags in the secondary tag store. */
+    const int numSecondary;
+
+    /** The Null tag pointer. */
+    const int tagNull;
+    /** The last tag in the primary table. */
+    const int primaryBound;
+
+    /** All of the tags */
+    IICTag *tagStore;
+    /**
+     * Pointer to the head of the secondary freelist (maintained with chain
+     * pointers.
+     */
+    unsigned long freelist;
+    /**
+     * The data block freelist.
+     */
+    std::list<unsigned long> blkFreelist;
+
+    /** The primary table. */
+    IICSet *sets;
+
+    /** The replacement policy. */
+    Repl *repl;
+
+    /** An array of data reference counters. */
+    int *dataReferenceCount;
+
+    /** The data blocks. */
+    uint8_t *dataStore;
+
+    /** Storage for the fast access data of each cache block. */
+    uint8_t **dataBlks;
+
+    /**
+     * Count of the current number of free secondary tags.
+     * Used for debugging.
+     */
+    int freeSecond;
+
+    // IIC Statistics
+    /**
+     * @addtogroup IICStatistics IIC Statistics
+     * @{
+     */
+
+    /** Hash hit depth of cache hits. */
+    Stats::Distribution<> hitHashDepth;
+    /** Hash depth for cache misses. */
+    Stats::Distribution<> missHashDepth;
+    /** Count of accesses to each hash set. */
+    Stats::Distribution<> setAccess;
+
+    /** The total hash depth for every miss. */
+    Stats::Scalar<> missDepthTotal;
+    /** The total hash depth for all hits. */
+    Stats::Scalar<> hitDepthTotal;
+    /** The number of hash misses. */
+    Stats::Scalar<> hashMiss;
+    /** The number of hash hits. */
+    Stats::Scalar<> hashHit;
+    /** @} */
+
+  public:
+    /**
+     * Collection of parameters for the IIC.
+     */
+    class Params {
+      public:
+        /** The size in bytes of the cache. */
+        int size;
+        /** The number of sets in the primary table. */
+        int numSets;
+        /** The block size in bytes. */
+        int blkSize;
+        /** The associativity of the primary table. */
+        int assoc;
+        /** The number of cycles for each hash lookup. */
+        int hashDelay;
+        /** The number of cycles to read the data. */
+        int hitLatency;
+        /** The replacement policy. */
+        Repl *rp;
+        /** The subblock size in bytes. */
+        int subblockSize;
+    };
+
+    /**
+     * Construct and initialize this tag store.
+     * @param params The IIC parameters.
+     * @todo
+     * Should make a way to have less tags in the primary than blks in the
+     * cache. Also should be able to specify number of secondary blks.
+     */
+    IIC(Params &params);
+
+    /**
+     * Destructor.
+     */
+    virtual ~IIC();
+
+    /**
+     * Register the statistics.
+     * @param name The name to prepend to the statistic descriptions.
+     */
+    void regStats(const std::string &name);
+
+    /**
+     * Regenerate the block address from the tag.
+     * @param tag The tag of the block.
+     * @param set Not needed for the iic.
+     * @return The block address.
+     */
+    Addr regenerateBlkAddr(Addr tag, int set) {
+        return (((Addr)tag << tagShift));
+    }
+
+    /**
+     * Return the block size.
+     * @return The block size.
+     */
+    int getBlockSize()
+    {
+        return blkSize;
+    }
+
+    /**
+     * Return the subblock size.
+     * @return The subblock size.
+     */
+    int getSubBlockSize()
+    {
+        return subSize;
+    }
+
+    /**
+     * Return the hit latency.
+     * @return the hit latency.
+     */
+    int getHitLatency() const
+    {
+        return hitLatency;
+    }
+
+    /**
+     * Generate the tag from the address.
+     * @param addr The address to a get a tag for.
+     * @param blk Ignored here.
+     * @return the tag.
+     */
+    Addr extractTag(Addr addr, IICTag *blk) const
+    {
+        return (addr >> tagShift);
+    }
+
+     /**
+     * Generate the tag from the address.
+     * @param addr The address to a get a tag for.
+     * @return the tag.
+     */
+    Addr extractTag(Addr addr) const
+    {
+        return (addr >> tagShift);
+    }
+
+   /**
+     * Return the set, always 0 for IIC.
+     * @return 0.
+     */
+    int extractSet(Addr addr) const
+    {
+        return 0;
+    }
+
+    /**
+     * Get the block offset of an address.
+     * @param addr The address to get the offset of.
+     * @return the block offset of the address.
+     */
+    int extractBlkOffset(Addr addr) const
+    {
+        return (addr & blkMask);
+    }
+
+    /**
+     * Align an address to the block size.
+     * @param addr the address to align.
+     * @return The block address.
+     */
+    Addr blkAlign(Addr addr) const
+    {
+        return (addr & ~(Addr)blkMask);
+    }
+
+    /**
+     * Check for the address in the tagstore.
+     * @param asid The address space ID.
+     * @param addr The address to find.
+     * @return true if it is found.
+     */
+    bool probe(int asid, Addr addr) const;
+
+    /**
+     * Swap the position of two tags.
+     * @param index1 The first tag location.
+     * @param index2 The second tag location.
+     */
+    void tagSwap(unsigned long index1, unsigned long index2);
+
+    /**
+     * Clear the reference bit of the tag and return its old value.
+     * @param index The pointer of the tag to manipulate.
+     * @return The previous state of the reference bit.
+     */
+    bool clearRef(unsigned long index)
+    {
+        bool tmp = tagStore[index].isReferenced();
+        tagStore[index].status &= ~BlkReferenced;
+        return tmp;
+    }
+
+    /**
+     * Decompress a block if it is compressed.
+     * @param index The tag store index for the block to uncompress.
+     */
+    void decompressBlock(unsigned long index);
+
+    /**
+     * Try and compress a block if it is not already compressed.
+     * @param index The tag store index for the block to compress.
+     */
+    void compressBlock(unsigned long index);
+
+    /**
+     * Invalidate the block containing the address.
+     * @param asid The address space ID.
+     * @param addr The address to invalidate.
+     */
+    void invalidateBlk(int asid, Addr addr);
+
+    /**
+     * Find the block and update the replacement data. This call also returns
+     * the access latency as a side effect.
+     * @param addr The address to find.
+     * @param asid The address space ID.
+     * @param lat The access latency.
+     * @return A pointer to the block found, if any.
+     */
+    IICTag* findBlock(Addr addr, int asid, int &lat);
+
+    /**
+     * Find the block and update the replacement data. This call also returns
+     * the access latency as a side effect.
+     * @param req The req whose block to find
+     * @param lat The access latency.
+     * @return A pointer to the block found, if any.
+     */
+    IICTag* findBlock(Packet * &pkt, int &lat);
+
+    /**
+     * Find the block, do not update the replacement data.
+     * @param addr The address to find.
+     * @param asid The address space ID.
+     * @return A pointer to the block found, if any.
+     */
+    IICTag* findBlock(Addr addr, int asid) const;
+
+    /**
+     * Find a replacement block for the address provided.
+     * @param req The request to a find a replacement candidate for.
+     * @param writebacks List for any writebacks to be performed.
+     * @param compress_blocks List of blocks to compress, for adaptive comp.
+     * @return The block to place the replacement in.
+     */
+    IICTag* findReplacement(Packet * &pkt, PacketList &writebacks,
+                            BlkList &compress_blocks);
+
+    /**
+     * Read the data from the internal storage of the given cache block.
+     * @param blk The block to read the data from.
+     * @param data The buffer to read the data into.
+     * @return The cache block's data.
+     */
+    void readData(IICTag *blk, uint8_t *data);
+
+    /**
+     * Write the data into the internal storage of the given cache block.
+     * @param blk The block to write to.
+     * @param data The data to write.
+     * @param size The number of bytes to write.
+     * @param writebacks A list for any writebacks to be performed. May be
+     * needed when writing to a compressed block.
+     */
+    void writeData(IICTag *blk, uint8_t *data, int size,
+                   PacketList & writebacks);
+
+    /**
+     * Perform a block aligned copy from the source address to the destination.
+     * @param source The block-aligned source address.
+     * @param dest The block-aligned destination address.
+     * @param asid The address space DI.
+     * @param writebacks List for any generated writeback requests.
+     */
+    void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks);
+
+    /**
+     * If a block is currently marked copy on write, copy it before writing.
+     * @param req The write request.
+     * @param writebacks List for any generated writeback requests.
+     */
+    void fixCopy(Packet * &pkt, PacketList &writebacks);
+
+    /**
+     * Called at end of simulation to complete average block reference stats.
+     */
+    virtual void cleanupRefs();
+private:
+    /**
+     * Return the hash of the address.
+     * @param addr The address to hash.
+     * @return the hash of the address.
+     */
+    unsigned hash(Addr addr) const;
+
+    /**
+     * Search for a block in the secondary tag store. Returns the number of
+     * hash lookups as a side effect.
+     * @param asid The address space ID.
+     * @param tag The tag to match.
+     * @param chain_ptr The first entry to search.
+     * @param depth The number of hash lookups made while searching.
+     * @return A pointer to the block if found.
+     */
+    IICTag *secondaryChain(int asid, Addr tag, unsigned long chain_ptr,
+                            int *depth) const;
+
+    /**
+     * Free the resources associated with the next replacement block.
+     * @param writebacks A list of any writebacks to perform.
+     */
+    void freeReplacementBlock(PacketList & writebacks);
+
+    /**
+     * Return the pointer to a free data block.
+     * @param writebacks A list of any writebacks to perform.
+     * @return A pointer to a free data block.
+     */
+    unsigned long getFreeDataBlock(PacketList & writebacks);
+
+    /**
+     * Get a free tag in the given hash set.
+     * @param set The hash set to search.
+     * @param writebacks A list of any writebacks to perform.
+     * @return a pointer to a free tag.
+     */
+    IICTag* getFreeTag(int set, PacketList & writebacks);
+
+    /**
+     * Free the resources associated with the given tag.
+     * @param tag_ptr The tag to free.
+     */
+    void freeTag(IICTag *tag_ptr);
+
+    /**
+     * Mark the given data block as being available.
+     * @param data_ptr The data block to free.
+     */
+    void freeDataBlock(unsigned long data_ptr);
+};
+#endif // __IIC_HH__
+
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
new file mode 100644
index 000000000..556025a3a
--- /dev/null
+++ b/src/mem/cache/tags/lru.cc
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Definitions of LRU tag store.
+ */
+
+#include <string>
+
+#include "mem/cache/base_cache.hh"
+#include "base/intmath.hh"
+#include "mem/cache/tags/lru.hh"
+#include "sim/root.hh"
+
+using namespace std;
+
+LRUBlk*
+CacheSet::findBlk(int asid, Addr tag) const
+{
+    for (int i = 0; i < assoc; ++i) {
+        if (blks[i]->tag == tag && blks[i]->isValid()) {
+            return blks[i];
+        }
+    }
+    return 0;
+}
+
+
+void
+CacheSet::moveToHead(LRUBlk *blk)
+{
+    // nothing to do if blk is already head
+    if (blks[0] == blk)
+        return;
+
+    // write 'next' block into blks[i], moving up from MRU toward LRU
+    // until we overwrite the block we moved to head.
+
+    // start by setting up to write 'blk' into blks[0]
+    int i = 0;
+    LRUBlk *next = blk;
+
+    do {
+        assert(i < assoc);
+        // swap blks[i] and next
+        LRUBlk *tmp = blks[i];
+        blks[i] = next;
+        next = tmp;
+        ++i;
+    } while (next != blk);
+}
+
+
+// create and initialize a LRU/MRU cache structure
+LRU::LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency) :
+    numSets(_numSets), blkSize(_blkSize), assoc(_assoc), hitLatency(_hit_latency)
+{
+    // Check parameters
+    if (blkSize < 4 || !isPowerOf2(blkSize)) {
+        fatal("Block size must be at least 4 and a power of 2");
+    }
+    if (numSets <= 0 || !isPowerOf2(numSets)) {
+        fatal("# of sets must be non-zero and a power of 2");
+    }
+    if (assoc <= 0) {
+        fatal("associativity must be greater than zero");
+    }
+    if (hitLatency <= 0) {
+        fatal("access latency must be greater than zero");
+    }
+
+    LRUBlk  *blk;
+    int i, j, blkIndex;
+
+    blkMask = blkSize - 1;
+    setShift = floorLog2(blkSize);
+    setMask = numSets - 1;
+    tagShift = setShift + floorLog2(numSets);
+    warmedUp = false;
+    /** @todo Make warmup percentage a parameter. */
+    warmupBound = numSets * assoc;
+
+    sets = new CacheSet[numSets];
+    blks = new LRUBlk[numSets * assoc];
+    // allocate data storage in one big chunk
+    dataBlks = new uint8_t[numSets*assoc*blkSize];
+
+    blkIndex = 0;	// index into blks array
+    for (i = 0; i < numSets; ++i) {
+        sets[i].assoc = assoc;
+
+        sets[i].blks = new LRUBlk*[assoc];
+
+        // link in the data blocks
+        for (j = 0; j < assoc; ++j) {
+            // locate next cache block
+            blk = &blks[blkIndex];
+            blk->data = &dataBlks[blkSize*blkIndex];
+            ++blkIndex;
+
+            // invalidate new cache block
+            blk->status = 0;
+
+            //EGH Fix Me : do we need to initialize blk?
+
+            // Setting the tag to j is just to prevent long chains in the hash
+            // table; won't matter because the block is invalid
+            blk->tag = j;
+            blk->whenReady = 0;
+            blk->asid = -1;
+            blk->isTouched = false;
+            blk->size = blkSize;
+            sets[i].blks[j]=blk;
+            blk->set = i;
+        }
+    }
+}
+
+LRU::~LRU()
+{
+    delete [] dataBlks;
+    delete [] blks;
+    delete [] sets;
+}
+
+// probe cache for presence of given block.
+bool
+LRU::probe(int asid, Addr addr) const
+{
+    //  return(findBlock(Read, addr, asid) != 0);
+    Addr tag = extractTag(addr);
+    unsigned myset = extractSet(addr);
+
+    LRUBlk *blk = sets[myset].findBlk(asid, tag);
+
+    return (blk != NULL);	// true if in cache
+}
+
+LRUBlk*
+LRU::findBlock(Addr addr, int asid, int &lat)
+{
+    Addr tag = extractTag(addr);
+    unsigned set = extractSet(addr);
+    LRUBlk *blk = sets[set].findBlk(asid, tag);
+    lat = hitLatency;
+    if (blk != NULL) {
+        // move this block to head of the MRU list
+        sets[set].moveToHead(blk);
+        if (blk->whenReady > curTick
+            && blk->whenReady - curTick > hitLatency) {
+            lat = blk->whenReady - curTick;
+        }
+        blk->refCount += 1;
+    }
+
+    return blk;
+}
+
+LRUBlk*
+LRU::findBlock(Packet * &pkt, int &lat)
+{
+    Addr addr = pkt->getAddr();
+    int asid = 0;//pkt->req->getAsid();
+
+    Addr tag = extractTag(addr);
+    unsigned set = extractSet(addr);
+    LRUBlk *blk = sets[set].findBlk(asid, tag);
+    lat = hitLatency;
+    if (blk != NULL) {
+        // move this block to head of the MRU list
+        sets[set].moveToHead(blk);
+        if (blk->whenReady > curTick
+            && blk->whenReady - curTick > hitLatency) {
+            lat = blk->whenReady - curTick;
+        }
+        blk->refCount += 1;
+    }
+
+    return blk;
+}
+
+LRUBlk*
+LRU::findBlock(Addr addr, int asid) const
+{
+    Addr tag = extractTag(addr);
+    unsigned set = extractSet(addr);
+    LRUBlk *blk = sets[set].findBlk(asid, tag);
+    return blk;
+}
+
+LRUBlk*
+LRU::findReplacement(Packet * &pkt, PacketList &writebacks,
+                     BlkList &compress_blocks)
+{
+    unsigned set = extractSet(pkt->getAddr());
+    // grab a replacement candidate
+    LRUBlk *blk = sets[set].blks[assoc-1];
+    sets[set].moveToHead(blk);
+    if (blk->isValid()) {
+        replacements[0]++;
+        totalRefs += blk->refCount;
+        ++sampledRefs;
+        blk->refCount = 0;
+    } else if (!blk->isTouched) {
+        tagsInUse++;
+        blk->isTouched = true;
+        if (!warmedUp && tagsInUse.value() >= warmupBound) {
+            warmedUp = true;
+            warmupCycle = curTick;
+        }
+    }
+
+    return blk;
+}
+
+void
+LRU::invalidateBlk(int asid, Addr addr)
+{
+    LRUBlk *blk = findBlock(addr, asid);
+    if (blk) {
+        blk->status = 0;
+        blk->isTouched = false;
+        tagsInUse--;
+    }
+}
+
+void
+LRU::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks)
+{
+    assert(source == blkAlign(source));
+    assert(dest == blkAlign(dest));
+    LRUBlk *source_blk = findBlock(source, asid);
+    assert(source_blk);
+    LRUBlk *dest_blk = findBlock(dest, asid);
+    if (dest_blk == NULL) {
+        // Need to do a replacement
+        Request *search = new Request(dest,1,0);
+        Packet * pkt = new Packet(search, Packet::ReadReq, -1);
+        BlkList dummy_list;
+        dest_blk = findReplacement(pkt, writebacks, dummy_list);
+        if (dest_blk->isValid() && dest_blk->isModified()) {
+            // Need to writeback data.
+/*	    pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag,
+                                                      dest_blk->set),
+                                    dest_blk->req->asid,
+                                    dest_blk->xc,
+                                    blkSize,
+                                    dest_blk->data,
+                                    dest_blk->size);
+*/
+            Request *writebackReq = new Request(regenerateBlkAddr(dest_blk->tag,
+                                                                  dest_blk->set),
+                                                blkSize, 0);
+            Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1);
+            writeback->allocate();
+            memcpy(writeback->getPtr<uint8_t>(),dest_blk->data, blkSize);
+            writebacks.push_back(writeback);
+        }
+        dest_blk->tag = extractTag(dest);
+        dest_blk->asid = asid;
+        delete search;
+        delete pkt;
+    }
+    /**
+     * @todo Can't assume the status once we have coherence on copies.
+     */
+
+    // Set this block as readable, writeable, and dirty.
+    dest_blk->status = 7;
+    memcpy(dest_blk->data, source_blk->data, blkSize);
+}
+
+void
+LRU::cleanupRefs()
+{
+    for (int i = 0; i < numSets*assoc; ++i) {
+        if (blks[i].isValid()) {
+            totalRefs += blks[i].refCount;
+            ++sampledRefs;
+        }
+    }
+}
diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh
new file mode 100644
index 000000000..437244660
--- /dev/null
+++ b/src/mem/cache/tags/lru.hh
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Declaration of a LRU tag store.
+ */
+
+#ifndef __LRU_HH__
+#define __LRU_HH__
+
+#include <list>
+
+#include "mem/cache/cache_blk.hh" // base class
+#include "mem/packet.hh" // for inlined functions
+#include <assert.h>
+#include "mem/cache/tags/base_tags.hh"
+
+class BaseCache;
+
+/**
+ * LRU cache block.
+ */
+class LRUBlk : public CacheBlk {
+  public:
+    /** Has this block been touched? Used to aid calculation of warmup time. */
+    bool isTouched;
+};
+
+/**
+ * An associative set of cache blocks.
+ */
+class CacheSet
+{
+  public:
+    /** The associativity of this set. */
+    int assoc;
+
+    /** Cache blocks in this set, maintained in LRU order 0 = MRU. */
+    LRUBlk **blks;
+
+    /**
+     * Find a block matching the tag in this set.
+     * @param asid The address space ID.
+     * @param tag The Tag to find.
+     * @return Pointer to the block if found.
+     */
+    LRUBlk* findBlk(int asid, Addr tag) const;
+
+    /**
+     * Move the given block to the head of the list.
+     * @param blk The block to move.
+     */
+    void moveToHead(LRUBlk *blk);
+};
+
+/**
+ * A LRU cache tag store.
+ */
+class LRU : public BaseTags
+{
+  public:
+    /** Typedef the block type used in this tag store. */
+    typedef LRUBlk BlkType;
+    /** Typedef for a list of pointers to the local block class. */
+    typedef std::list<LRUBlk*> BlkList;
+  protected:
+    /** The number of sets in the cache. */
+    const int numSets;
+    /** The number of bytes in a block. */
+    const int blkSize;
+    /** The associativity of the cache. */
+    const int assoc;
+    /** The hit latency. */
+    const int hitLatency;
+
+    /** The cache sets. */
+    CacheSet *sets;
+
+    /** The cache blocks. */
+    LRUBlk *blks;
+    /** The data blocks, 1 per cache block. */
+    uint8_t *dataBlks;
+
+    /** The amount to shift the address to get the set. */
+    int setShift;
+    /** The amount to shift the address to get the tag. */
+    int tagShift;
+    /** Mask out all bits that aren't part of the set index. */
+    unsigned setMask;
+    /** Mask out all bits that aren't part of the block offset. */
+    unsigned blkMask;
+
+public:
+    /**
+     * Construct and initialize this tag store.
+     * @param _numSets The number of sets in the cache.
+     * @param _blkSize The number of bytes in a block.
+     * @param _assoc The associativity of the cache.
+     * @param _hit_latency The latency in cycles for a hit.
+     */
+    LRU(int _numSets, int _blkSize,	int _assoc, int _hit_latency);
+
+    /**
+     * Destructor
+     */
+    virtual ~LRU();
+
+    /**
+     * Return the block size.
+     * @return the block size.
+     */
+    int getBlockSize()
+    {
+        return blkSize;
+    }
+
+    /**
+     * Return the subblock size. In the case of LRU it is always the block
+     * size.
+     * @return The block size.
+     */
+    int getSubBlockSize()
+    {
+        return blkSize;
+    }
+
+    /**
+     * Search for the address in the cache.
+     * @param asid The address space ID.
+     * @param addr The address to find.
+     * @return True if the address is in the cache.
+     */
+    bool probe(int asid, Addr addr) const;
+
+    /**
+     * Invalidate the block containing the given address.
+     * @param asid The address space ID.
+     * @param addr The address to invalidate.
+     */
+    void invalidateBlk(int asid, Addr addr);
+
+    /**
+     * Finds the given address in the cache and update replacement data.
+     * Returns the access latency as a side effect.
+     * @param req The request whose block to find.
+     * @param lat The access latency.
+     * @return Pointer to the cache block if found.
+     */
+    LRUBlk* findBlock(Packet * &pkt, int &lat);
+
+    /**
+     * Finds the given address in the cache and update replacement data.
+     * Returns the access latency as a side effect.
+     * @param addr The address to find.
+     * @param asid The address space ID.
+     * @param lat The access latency.
+     * @return Pointer to the cache block if found.
+     */
+    LRUBlk* findBlock(Addr addr, int asid, int &lat);
+
+    /**
+     * Finds the given address in the cache, do not update replacement data.
+     * @param addr The address to find.
+     * @param asid The address space ID.
+     * @return Pointer to the cache block if found.
+     */
+    LRUBlk* findBlock(Addr addr, int asid) const;
+
+    /**
+     * Find a replacement block for the address provided.
+     * @param req The request to a find a replacement candidate for.
+     * @param writebacks List for any writebacks to be performed.
+     * @param compress_blocks List of blocks to compress, for adaptive comp.
+     * @return The block to place the replacement in.
+     */
+    LRUBlk* findReplacement(Packet * &pkt, PacketList &writebacks,
+                            BlkList &compress_blocks);
+
+    /**
+     * Generate the tag from the given address.
+     * @param addr The address to get the tag from.
+     * @return The tag of the address.
+     */
+    Addr extractTag(Addr addr) const
+    {
+        return (addr >> tagShift);
+    }
+
+   /**
+     * Generate the tag from the given address.
+     * @param addr The address to get the tag from.
+     * @param blk Ignored.
+     * @return The tag of the address.
+     */
+    Addr extractTag(Addr addr, LRUBlk *blk) const
+    {
+        return (addr >> tagShift);
+    }
+
+    /**
+     * Calculate the set index from the address.
+     * @param addr The address to get the set from.
+     * @return The set index of the address.
+     */
+    int extractSet(Addr addr) const
+    {
+        return ((addr >> setShift) & setMask);
+    }
+
+    /**
+     * Get the block offset from an address.
+     * @param addr The address to get the offset of.
+     * @return The block offset.
+     */
+    int extractBlkOffset(Addr addr) const
+    {
+        return (addr & blkMask);
+    }
+
+    /**
+     * Align an address to the block size.
+     * @param addr the address to align.
+     * @return The block address.
+     */
+    Addr blkAlign(Addr addr) const
+    {
+        return (addr & ~(Addr)blkMask);
+    }
+
+    /**
+     * Regenerate the block address from the tag.
+     * @param tag The tag of the block.
+     * @param set The set of the block.
+     * @return The block address.
+     */
+    Addr regenerateBlkAddr(Addr tag, unsigned set) const
+    {
+        return ((tag << tagShift) | ((Addr)set << setShift));
+    }
+
+    /**
+     * Return the hit latency.
+     * @return the hit latency.
+     */
+    int getHitLatency() const
+    {
+        return hitLatency;
+    }
+
+    /**
+     * Read the data out of the internal storage of the given cache block.
+     * @param blk The cache block to read.
+     * @param data The buffer to read the data into.
+     * @return The cache block's data.
+     */
+    void readData(LRUBlk *blk, uint8_t *data)
+    {
+        memcpy(data, blk->data, blk->size);
+    }
+
+    /**
+     * Write data into the internal storage of the given cache block. Since in
+     * LRU does not store data differently this just needs to update the size.
+     * @param blk The cache block to write.
+     * @param data The data to write.
+     * @param size The number of bytes to write.
+     * @param writebacks A list for any writebacks to be performed. May be
+     * needed when writing to a compressed block.
+     */
+    void writeData(LRUBlk *blk, uint8_t *data, int size,
+                   PacketList & writebacks)
+    {
+        assert(size <= blkSize);
+        blk->size = size;
+    }
+
+    /**
+     * Perform a block aligned copy from the source address to the destination.
+     * @param source The block-aligned source address.
+     * @param dest The block-aligned destination address.
+     * @param asid The address space DI.
+     * @param writebacks List for any generated writeback requests.
+     */
+    void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks);
+
+    /**
+     * No impl.
+     */
+    void fixCopy(Packet * &pkt, PacketList &writebacks)
+    {
+    }
+
+    /**
+     * Called at end of simulation to complete average block reference stats.
+     */
+    virtual void cleanupRefs();
+};
+
+#endif
diff --git a/src/mem/cache/tags/repl/gen.cc b/src/mem/cache/tags/repl/gen.cc
new file mode 100644
index 000000000..ec1c2aaf3
--- /dev/null
+++ b/src/mem/cache/tags/repl/gen.cc
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Steve Reinhardt
+ */
+
+/**
+ * @file
+ * Definitions of the Generational replacement policy.
+ */
+
+#include <string>
+
+#include "base/misc.hh"
+#include "mem/cache/tags/iic.hh"
+#include "mem/cache/tags/repl/gen.hh"
+#include "sim/builder.hh"
+#include "sim/host.hh"
+
+using namespace std;
+
+GenRepl::GenRepl(const string &_name,
+                 int _num_pools,
+                 int _fresh_res,
+                 int _pool_res) // fix this, should be set by cache
+    : Repl(_name)
+{
+    num_pools = _num_pools;
+    fresh_res = _fresh_res;
+    pool_res = _pool_res;
+    num_entries = 0;
+    num_pool_entries = 0;
+    misses = 0;
+    pools = new GenPool[num_pools+1];
+}
+
+GenRepl::~GenRepl()
+{
+    delete [] pools;
+}
+
+unsigned long
+GenRepl::getRepl()
+{
+    unsigned long tmp;
+    GenReplEntry *re;
+    int i;
+    int num_seen = 0;
+    if (!(num_pool_entries>0)) {
+        fatal("No blks available to replace");
+    }
+    num_entries--;
+    num_pool_entries--;
+    for (i = 0; i < num_pools; i++) {
+        while ((re = pools[i].pop())) {
+            num_seen++;
+            // Remove invalidated entries
+            if (!re->valid) {
+                delete re;
+                continue;
+            }
+            if (iic->clearRef(re->tag_ptr)) {
+                pools[(((i+1)== num_pools)? i :i+1)].push(re, misses);
+            }
+            else {
+                tmp = re->tag_ptr;
+                delete re;
+
+                repl_pool.sample(i);
+
+                return tmp;
+            }
+        }
+    }
+    fatal("No replacement found");
+    return 0xffffffff;
+}
+
+unsigned long *
+GenRepl::getNRepl(int n)
+{
+    unsigned long *tmp;
+    GenReplEntry *re;
+    int i;
+    if (!(num_pool_entries>(n-1))) {
+        fatal("Not enough blks available to replace");
+    }
+    num_entries -= n;
+    num_pool_entries -= n;
+    tmp = new unsigned long[n]; /* array of cache_blk pointers */
+    int blk_index = 0;
+    for (i = 0; i < num_pools && blk_index < n; i++) {
+        while (blk_index < n && (re = pools[i].pop())) {
+            // Remove invalidated entries
+            if (!re->valid) {
+                delete re;
+                continue;
+            }
+            if (iic->clearRef(re->tag_ptr)) {
+                pools[(((i+1)== num_pools)? i :i+1)].push(re, misses);
+            }
+            else {
+                tmp[blk_index] = re->tag_ptr;
+                blk_index++;
+                delete re;
+                repl_pool.sample(i);
+            }
+        }
+    }
+    if (blk_index >= n)
+        return tmp;
+    /* search the fresh pool */
+
+    fatal("No N  replacements found");
+    return NULL;
+}
+
+void
+GenRepl::doAdvance(std::list<unsigned long> &demoted)
+{
+    int i;
+    int num_seen = 0;
+    GenReplEntry *re;
+    misses++;
+    for (i=0; i<num_pools; i++) {
+        while (misses-pools[i].oldest > pool_res && (re = pools[i].pop())!=NULL) {
+            if (iic->clearRef(re->tag_ptr)) {
+                pools[(((i+1)== num_pools)? i :i+1)].push(re, misses);
+                /** @todo Not really demoted, but use it for now. */
+                demoted.push_back(re->tag_ptr);
+                advance_pool.sample(i);
+            }
+            else {
+                pools[(((i-1)<0)?i:i-1)].push(re, misses);
+                demoted.push_back(re->tag_ptr);
+                demote_pool.sample(i);
+            }
+        }
+        num_seen += pools[i].size;
+    }
+    while (misses-pools[num_pools].oldest > fresh_res
+          && (re = pools[num_pools].pop())!=NULL) {
+        num_pool_entries++;
+        if (iic->clearRef(re->tag_ptr)) {
+            pools[num_pools/2].push(re, misses);
+            /** @todo Not really demoted, but use it for now. */
+            demoted.push_back(re->tag_ptr);
+            advance_pool.sample(num_pools);
+        }
+        else {
+            pools[num_pools/2-1].push(re, misses);
+            demoted.push_back(re->tag_ptr);
+            demote_pool.sample(num_pools);
+        }
+    }
+}
+
+void*
+GenRepl::add(unsigned long tag_index)
+{
+    GenReplEntry *re = new GenReplEntry;
+    re->tag_ptr = tag_index;
+    re->valid = true;
+    pools[num_pools].push(re, misses);
+    num_entries++;
+    return (void*)re;
+}
+
+void
+GenRepl::regStats(const string name)
+{
+    using namespace Stats;
+
+    /** GEN statistics */
+    repl_pool
+        .init(0, 16, 1)
+        .name(name + ".repl_pool_dist")
+        .desc("Dist. of Repl. across pools")
+        .flags(pdf)
+        ;
+
+    advance_pool
+        .init(0, 16, 1)
+        .name(name + ".advance_pool_dist")
+        .desc("Dist. of Repl. across pools")
+        .flags(pdf)
+        ;
+
+    demote_pool
+        .init(0, 16, 1)
+        .name(name + ".demote_pool_dist")
+        .desc("Dist. of Repl. across pools")
+        .flags(pdf)
+        ;
+}
+
+int
+GenRepl::fixTag(void* _re, unsigned long old_index, unsigned long new_index)
+{
+    GenReplEntry *re = (GenReplEntry*)_re;
+    assert(re->valid);
+    if (re->tag_ptr == old_index) {
+        re->tag_ptr = new_index;
+        return 1;
+    }
+    fatal("Repl entry: tag ptrs do not match");
+    return 0;
+}
+
+bool
+GenRepl::findTagPtr(unsigned long index)
+{
+    for (int i = 0; i < num_pools + 1; ++i) {
+        list<GenReplEntry*>::const_iterator iter = pools[i].entries.begin();
+        list<GenReplEntry*>::const_iterator end = pools[i].entries.end();
+        for (; iter != end; ++iter) {
+            if ((*iter)->valid && (*iter)->tag_ptr == index) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(GenRepl)
+
+    Param<int> num_pools;
+    Param<int> fresh_res;
+    Param<int> pool_res;
+
+END_DECLARE_SIM_OBJECT_PARAMS(GenRepl)
+
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(GenRepl)
+
+    INIT_PARAM(num_pools, "capacity in bytes"),
+    INIT_PARAM(fresh_res, "associativity"),
+    INIT_PARAM(pool_res, "block size in bytes")
+
+END_INIT_SIM_OBJECT_PARAMS(GenRepl)
+
+
+CREATE_SIM_OBJECT(GenRepl)
+{
+    return new GenRepl(getInstanceName(), num_pools, fresh_res, pool_res);
+}
+
+REGISTER_SIM_OBJECT("GenRepl", GenRepl)
+
+#endif // DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/tags/repl/gen.hh b/src/mem/cache/tags/repl/gen.hh
new file mode 100644
index 000000000..c1ceb3f4e
--- /dev/null
+++ b/src/mem/cache/tags/repl/gen.hh
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ */
+
+/**
+ * @file
+ * Declarations of generational replacement policy
+ */
+
+#ifndef ___GEN_HH__
+#define __GEN_HH__
+
+#include <list>
+
+#include "base/statistics.hh"
+#include "mem/cache/tags/repl/repl.hh"
+
+/**
+ * Generational Replacement entry.
+ */
+class GenReplEntry
+{
+  public:
+    /** Valid flag, used to quickly invalidate bogus entries. */
+    bool valid;
+    /** The difference between this entry and the previous in the pool. */
+    int delta;
+    /** Pointer to the corresponding tag in the IIC. */
+    unsigned long tag_ptr;
+};
+
+/**
+ * Generational replacement pool
+ */
+class GenPool
+{
+  public:
+    /** The time the last entry was added. */
+    Tick newest;
+    /** The time the oldest entry was added. */
+    Tick oldest;
+    /** List of the replacement entries in this pool. */
+    std::list<GenReplEntry*> entries;
+
+    /** The number of entries in this pool. */
+    int size;
+
+    /**
+     * Simple constructor.
+     */
+    GenPool() {
+        newest = 0;
+        oldest = 0;
+        size = 0;
+    }
+
+    /**
+     * Add an entry to this pool.
+     * @param re The entry to add.
+     * @param now The current time.
+     */
+    void push(GenReplEntry *re, Tick now) {
+        ++size;
+        if (!entries.empty()) {
+            re->delta = now - newest;
+            newest = now;
+        } else {
+            re->delta = 0;
+            newest = oldest = now;
+        }
+        entries.push_back(re);
+    }
+
+    /**
+     * Remove an entry from the pool.
+     * @return The entry at the front of the list.
+     */
+    GenReplEntry* pop() {
+        GenReplEntry *tmp = NULL;
+        if (!entries.empty()) {
+            --size;
+            tmp = entries.front();
+            entries.pop_front();
+            oldest += tmp->delta;
+        }
+        return tmp;
+    }
+
+    /**
+     * Return the entry at the front of the list.
+     * @return the entry at the front of the list.
+     */
+    GenReplEntry* top() {
+        return entries.front();
+    }
+
+    /**
+     * Destructor.
+     */
+    ~GenPool() {
+        while (!entries.empty()) {
+            GenReplEntry *tmp = entries.front();
+            entries.pop_front();
+            delete tmp;
+        }
+    }
+};
+
+/**
+ * Generational replacement policy for use with the IIC.
+ * @todo update to use STL and for efficiency
+ */
+class GenRepl : public Repl
+{
+  public:
+    /** The array of pools. */
+    GenPool *pools;
+    /** The number of pools. */
+    int num_pools;
+    /** The amount of time to stay in the fresh pool. */
+    int fresh_res;
+    /** The amount of time to stay in the normal pools. */
+    int pool_res;
+    /** The maximum number of entries */
+    int num_entries;
+    /** The number of entries currently in the pools. */
+    int num_pool_entries;
+    /** The number of misses. Used as the internal time. */
+    Tick misses;
+
+    // Statistics
+
+    /**
+     * @addtogroup CacheStatistics
+     * @{
+     */
+    /** The number of replacements from each pool. */
+    Stats::Distribution<> repl_pool;
+    /** The number of advances out of each pool. */
+    Stats::Distribution<> advance_pool;
+    /** The number of demotions from each pool. */
+    Stats::Distribution<> demote_pool;
+    /**
+     * @}
+     */
+
+    /**
+     * Constructs and initializes this replacement policy.
+     * @param name The name of the policy.
+     * @param num_pools The number of pools to use.
+     * @param fresh_res The amount of time to wait in the fresh pool.
+     * @param pool_res The amount of time to wait in the normal pools.
+     */
+    GenRepl(const std::string &name, int num_pools,
+            int fresh_res, int pool_res);
+
+    /**
+     * Destructor.
+     */
+    ~GenRepl();
+
+    /**
+     * Returns the tag pointer of the cache block to replace.
+     * @return The tag to replace.
+     */
+    virtual unsigned long getRepl();
+
+    /**
+     * Return an array of N tag pointers to replace.
+     * @param n The number of tag pointer to return.
+     * @return An array of tag pointers to replace.
+     */
+    virtual unsigned long *getNRepl(int n);
+
+    /**
+     * Update replacement data
+     */
+    virtual void doAdvance(std::list<unsigned long> &demoted);
+
+    /**
+     * Add a tag to the replacement policy and return a pointer to the
+     * replacement entry.
+     * @param tag_index The tag to add.
+     * @return The replacement entry.
+     */
+    virtual void* add(unsigned long tag_index);
+
+    /**
+     * Register statistics.
+     * @param name The name to prepend to statistic descriptions.
+     */
+    virtual void regStats(const std::string name);
+
+    /**
+     * Update the tag pointer to when the tag moves.
+     * @param re The replacement entry of the tag.
+     * @param old_index The old tag pointer.
+     * @param new_index The new tag pointer.
+     * @return 1 if successful, 0 otherwise.
+     */
+    virtual int fixTag(void *re, unsigned long old_index,
+                       unsigned long new_index);
+
+    /**
+     * Remove this entry from the replacement policy.
+     * @param re The replacement entry to remove
+     */
+    virtual void removeEntry(void *re)
+    {
+        ((GenReplEntry*)re)->valid = false;
+    }
+
+  protected:
+    /**
+     * Debug function to verify that there is only one repl entry per tag.
+     * @param index The tag index to check.
+     */
+    bool findTagPtr(unsigned long index);
+};
+
+#endif /* __GEN_HH__ */
diff --git a/src/mem/cache/tags/repl/repl.cc b/src/mem/cache/tags/repl/repl.cc
new file mode 100644
index 000000000..ce781eb9f
--- /dev/null
+++ b/src/mem/cache/tags/repl/repl.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Nathan Binkert
+ */
+
+/**
+ * Definitions of the base replacement class.
+ */
+
+#include "sim/param.hh"
+#include "mem/cache/tags/repl/repl.hh"
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+DEFINE_SIM_OBJECT_CLASS_NAME("Repl", Repl)
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/tags/repl/repl.hh b/src/mem/cache/tags/repl/repl.hh
new file mode 100644
index 000000000..7c289a5c1
--- /dev/null
+++ b/src/mem/cache/tags/repl/repl.hh
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Erik Hallnor
+ *          Steve Reinhardt
+ *          Nathan Binkert
+ */
+
+/**
+ * @file
+ * Declaration of a base replacement policy class.
+ */
+
+#ifndef __REPL_HH__
+#define __REPL_HH__
+
+#include <string>
+#include <list>
+
+#include "cpu/smt.hh"
+#include "sim/host.hh"
+#include "sim/sim_object.hh"
+
+
+class IIC;
+
+/**
+ * A pure virtual base class that defines the interface of a replacement
+ * policy.
+ */
+class Repl : public SimObject
+{
+ public:
+    /** Pointer to the IIC using this policy. */
+    IIC *iic;
+
+    /**
+     * Construct and initialize this polixy.
+     * @param name The instance name of this policy.
+     */
+    Repl (const std::string &name)
+        : SimObject(name)
+    {
+        iic = NULL;
+    }
+
+    /**
+     * Set the back pointer to the IIC.
+     * @param iic_ptr Pointer to the IIC.
+     */
+    void setIIC(IIC *iic_ptr)
+    {
+        iic = iic_ptr;
+    }
+
+    /**
+     * Returns the tag pointer of the cache block to replace.
+     * @return The tag to replace.
+     */
+    virtual unsigned long getRepl() = 0;
+
+    /**
+     * Return an array of N tag pointers to replace.
+     * @param n The number of tag pointer to return.
+     * @return An array of tag pointers to replace.
+     */
+    virtual unsigned long  *getNRepl(int n) = 0;
+
+    /**
+     * Update replacement data
+     */
+    virtual void doAdvance(std::list<unsigned long> &demoted) = 0;
+
+     /**
+     * Add a tag to the replacement policy and return a pointer to the
+     * replacement entry.
+     * @param tag_index The tag to add.
+     * @return The replacement entry.
+     */
+    virtual void* add(unsigned long tag_index) = 0;
+
+    /**
+     * Register statistics.
+     * @param name The name to prepend to statistic descriptions.
+     */
+    virtual void regStats(const std::string name) = 0;
+
+    /**
+     * Update the tag pointer to when the tag moves.
+     * @param re The replacement entry of the tag.
+     * @param old_index The old tag pointer.
+     * @param new_index The new tag pointer.
+     * @return 1 if successful, 0 otherwise.
+     */
+    virtual int fixTag(void *re, unsigned long old_index,
+                       unsigned long new_index) = 0;
+
+    /**
+     * Remove this entry from the replacement policy.
+     * @param re The replacement entry to remove
+     */
+    virtual void removeEntry(void *re) = 0;
+};
+
+#endif /* SMT_REPL_HH */
diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc
new file mode 100644
index 000000000..bf23fb8cb
--- /dev/null
+++ b/src/mem/cache/tags/split.cc
@@ -0,0 +1,478 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Lisa Hsu
+ */
+
+/**
+ * @file
+ * Definitions of split cache tag store.
+ */
+
+#include <string>
+#include <iostream>
+#include <fstream>
+
+#include "base/cprintf.hh"
+#include "base/intmath.hh"
+#include "base/output.hh"
+#include "base/trace.hh"
+#include "mem/cache/base_cache.hh"
+#include "mem/cache/tags/split.hh"
+#include "mem/cache/tags/split_lifo.hh"
+#include "mem/cache/tags/split_lru.hh"
+
+
+using namespace std;
+using namespace TheISA;
+
+// create and initialize a partitioned cache structure
+Split::Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc,
+             bool _lifo, bool _two_queue, int _hit_latency) :
+    numSets(_numSets), blkSize(_blkSize), lifo(_lifo), hitLatency(_hit_latency)
+{
+    DPRINTF(Split, "new split cache!!\n");
+
+    DPRINTF(Split, "lru has %d numSets, %d blkSize, %d assoc, and %d hit_latency\n",
+            numSets, blkSize, LRU1_assoc, hitLatency);
+
+    lru = new SplitLRU(_numSets, _blkSize, LRU1_assoc, _hit_latency, 1);
+
+    if (total_ways - LRU1_assoc == 0) {
+        lifo_net = NULL;
+        lru_net = NULL;
+    } else {
+        if (lifo) {
+            DPRINTF(Split, "Other partition is a LIFO with size %d in bytes. it gets %d ways\n",
+                    (total_ways - LRU1_assoc)*_numSets*_blkSize, (total_ways - LRU1_assoc));
+            lifo_net = new SplitLIFO(_blkSize, (total_ways - LRU1_assoc)*_numSets*_blkSize,
+                                     (total_ways - LRU1_assoc), _hit_latency, _two_queue, 2);
+            lru_net = NULL;
+        }
+        else {
+            DPRINTF(Split, "other LRU gets %d ways\n", total_ways - LRU1_assoc);
+            lru_net = new SplitLRU(_numSets, _blkSize, total_ways - LRU1_assoc, _hit_latency, 2);
+            lifo_net = NULL;
+        }
+    }
+
+    blkMask = blkSize - 1;
+
+    if (!isPowerOf2(total_ways))
+        warn("total cache ways/columns %d should be power of 2",
+             total_ways);
+
+    warmedUp = false;
+    /** @todo Make warmup percentage a parameter. */
+    warmupBound = numSets * total_ways;
+
+}
+
+Split::~Split()
+{
+    delete lru;
+    if (lifo)
+        delete lifo_net;
+    else
+        delete lru_net;
+}
+
+void
+Split::regStats(const string &name)
+{
+    using namespace Stats;
+
+    BaseTags::regStats(name);
+
+    usedEvictDist.init(0,3000,40);
+    unusedEvictDist.init(0,3000,40);
+    useByCPUCycleDist.init(0,35,1);
+
+    nic_repl
+        .name(name + ".nic_repl")
+        .desc("number of replacements in the nic partition")
+        .precision(0)
+        ;
+
+    cpu_repl
+        .name(name + ".cpu_repl")
+        .desc("number of replacements in the cpu partition")
+        .precision(0)
+        ;
+
+    lru->regStats(name + ".lru");
+
+    if (lifo && lifo_net) {
+        lifo_net->regStats(name + ".lifo_net");
+    } else if (lru_net) {
+        lru_net->regStats(name + ".lru_net");
+    }
+
+    nicUsedWhenEvicted
+        .name(name + ".nicUsedWhenEvicted")
+        .desc("number of NIC blks that were used before evicted")
+        ;
+
+    nicUsedTotLatency
+        .name(name + ".nicUsedTotLatency")
+        .desc("total cycles before eviction of used NIC blks")
+        ;
+
+    nicUsedTotEvicted
+        .name(name + ".nicUsedTotEvicted")
+        .desc("total number of used NIC blks evicted")
+        ;
+
+    nicUsedAvgLatency
+        .name(name + ".nicUsedAvgLatency")
+        .desc("avg number of cycles a used NIC blk is in cache")
+        .precision(0)
+        ;
+    nicUsedAvgLatency = nicUsedTotLatency / nicUsedTotEvicted;
+
+    usedEvictDist
+        .name(name + ".usedEvictDist")
+        .desc("distribution of used NIC blk eviction times")
+        .flags(pdf | cdf)
+        ;
+
+    nicUnusedWhenEvicted
+        .name(name + ".nicUnusedWhenEvicted")
+        .desc("number of NIC blks that were unused when evicted")
+        ;
+
+    nicUnusedTotLatency
+        .name(name + ".nicUnusedTotLatency")
+        .desc("total cycles before eviction of unused NIC blks")
+        ;
+
+    nicUnusedTotEvicted
+        .name(name + ".nicUnusedTotEvicted")
+        .desc("total number of unused NIC blks evicted")
+        ;
+
+    nicUnusedAvgLatency
+        .name(name + ".nicUnusedAvgLatency")
+        .desc("avg number of cycles an unused NIC blk is in cache")
+        .precision(0)
+        ;
+    nicUnusedAvgLatency = nicUnusedTotLatency / nicUnusedTotEvicted;
+
+    unusedEvictDist
+        .name(name + ".unusedEvictDist")
+        .desc("distribution of unused NIC blk eviction times")
+        .flags(pdf | cdf)
+        ;
+
+    nicUseByCPUCycleTotal
+        .name(name + ".nicUseByCPUCycleTotal")
+        .desc("total latency of NIC blks til usage time")
+        ;
+
+    nicBlksUsedByCPU
+        .name(name + ".nicBlksUsedByCPU")
+        .desc("total number of NIC blks used")
+        ;
+
+    nicAvgUsageByCPULatency
+        .name(name + ".nicAvgUsageByCPULatency")
+        .desc("average number of cycles before a NIC blk that is used gets used")
+        .precision(0)
+        ;
+    nicAvgUsageByCPULatency = nicUseByCPUCycleTotal / nicBlksUsedByCPU;
+
+    useByCPUCycleDist
+        .name(name + ".useByCPUCycleDist")
+        .desc("the distribution of cycle time in cache before NIC blk is used")
+        .flags(pdf | cdf)
+        ;
+
+    cpuUsedBlks
+        .name(name + ".cpuUsedBlks")
+        .desc("number of cpu blks that were used before evicted")
+        ;
+
+    cpuUnusedBlks
+        .name(name + ".cpuUnusedBlks")
+        .desc("number of cpu blks that were unused before evicted")
+        ;
+
+    nicAvgLatency
+        .name(name + ".nicAvgLatency")
+        .desc("avg number of cycles a NIC blk is in cache before evicted")
+        .precision(0)
+        ;
+    nicAvgLatency = (nicUnusedTotLatency + nicUsedTotLatency) /
+        (nicUnusedTotEvicted + nicUsedTotEvicted);
+
+    NR_CP_hits
+        .name(name + ".NR_CP_hits")
+        .desc("NIC requests hitting in CPU Partition")
+        ;
+
+    NR_NP_hits
+        .name(name + ".NR_NP_hits")
+        .desc("NIC requests hitting in NIC Partition")
+        ;
+
+    CR_CP_hits
+        .name(name + ".CR_CP_hits")
+        .desc("CPU requests hitting in CPU partition")
+        ;
+
+    CR_NP_hits
+        .name(name + ".CR_NP_hits")
+        .desc("CPU requests hitting in NIC partition")
+        ;
+
+}
+
+// probe cache for presence of given block.
+bool
+Split::probe(int asid, Addr addr) const
+{
+    bool success = lru->probe(asid, addr);
+    if (!success) {
+        if (lifo && lifo_net)
+            success = lifo_net->probe(asid, addr);
+        else if (lru_net)
+            success = lru_net->probe(asid, addr);
+    }
+
+    return success;
+}
+
+SplitBlk*
+Split::findBlock(Packet * &pkt, int &lat)
+{
+
+    Addr aligned = blkAlign(pkt->getAddr());
+
+    if (memHash.count(aligned)) {
+        memHash[aligned]++;
+    } else if (pkt->nic_pkt()) {
+        memHash[aligned] = 1;
+    }
+
+    SplitBlk *blk = lru->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat);
+    if (blk) {
+        if (pkt->nic_pkt()) {
+            NR_CP_hits++;
+        } else {
+            CR_CP_hits++;
+        }
+    } else {
+        if (lifo && lifo_net) {
+            blk = lifo_net->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat);
+
+        } else if (lru_net) {
+            blk = lru_net->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat);
+        }
+        if (blk) {
+            if (pkt->nic_pkt()) {
+                NR_NP_hits++;
+            } else {
+                CR_NP_hits++;
+            }
+        }
+    }
+
+    if (blk) {
+        Tick latency = curTick - blk->ts;
+        if (blk->isNIC) {
+            if (!blk->isUsed && !pkt->nic_pkt()) {
+                    useByCPUCycleDist.sample(latency);
+                    nicUseByCPUCycleTotal += latency;
+                    nicBlksUsedByCPU++;
+            }
+        }
+        blk->isUsed = true;
+
+        if (pkt->nic_pkt()) {
+            DPRINTF(Split, "found block in partition %d\n", blk->part);
+        }
+    }
+    return blk;
+}
+
+SplitBlk*
+Split::findBlock(Addr addr, int asid, int &lat)
+{
+    SplitBlk *blk = lru->findBlock(addr, asid, lat);
+    if (!blk) {
+        if (lifo && lifo_net) {
+            blk = lifo_net->findBlock(addr, asid, lat);
+        } else if (lru_net) {
+            blk = lru_net->findBlock(addr, asid, lat);
+        }
+    }
+
+    return blk;
+}
+
+SplitBlk*
+Split::findBlock(Addr addr, int asid) const
+{
+    SplitBlk *blk = lru->findBlock(addr, asid);
+    if (!blk) {
+        if (lifo && lifo_net) {
+            blk = lifo_net->findBlock(addr, asid);
+        } else if (lru_net) {
+            blk = lru_net->findBlock(addr, asid);
+        }
+    }
+
+    return blk;
+}
+
+SplitBlk*
+Split::findReplacement(Packet * &pkt, PacketList &writebacks,
+                     BlkList &compress_blocks)
+{
+    SplitBlk *blk;
+
+    if (pkt->nic_pkt()) {
+        DPRINTF(Split, "finding a replacement for nic_req\n");
+        nic_repl++;
+        if (lifo && lifo_net)
+            blk = lifo_net->findReplacement(pkt, writebacks,
+                                             compress_blocks);
+        else if (lru_net)
+            blk = lru_net->findReplacement(pkt, writebacks,
+                                            compress_blocks);
+        // in this case, this is an LRU only cache, it's non partitioned
+        else
+            blk = lru->findReplacement(pkt, writebacks, compress_blocks);
+    } else {
+        DPRINTF(Split, "finding replacement for cpu_req\n");
+        blk = lru->findReplacement(pkt, writebacks,
+                                    compress_blocks);
+        cpu_repl++;
+    }
+
+    Tick latency = curTick - blk->ts;
+    if (blk->isNIC) {
+        if (blk->isUsed) {
+            nicUsedWhenEvicted++;
+            usedEvictDist.sample(latency);
+            nicUsedTotLatency += latency;
+            nicUsedTotEvicted++;
+        } else {
+            nicUnusedWhenEvicted++;
+            unusedEvictDist.sample(latency);
+            nicUnusedTotLatency += latency;
+            nicUnusedTotEvicted++;
+        }
+    } else {
+        if (blk->isUsed) {
+            cpuUsedBlks++;
+        } else {
+            cpuUnusedBlks++;
+        }
+    }
+
+    // blk attributes for the new blk coming IN
+    blk->ts = curTick;
+    blk->isNIC = (pkt->nic_pkt()) ? true : false;
+
+    return blk;
+}
+
+void
+Split::invalidateBlk(int asid, Addr addr)
+{
+    SplitBlk *blk = lru->findBlock(addr, asid);
+    if (!blk) {
+        if (lifo && lifo_net)
+            blk = lifo_net->findBlock(addr, asid);
+        else if (lru_net)
+            blk = lru_net->findBlock(addr, asid);
+
+        if (!blk)
+            return;
+    }
+
+    blk->status = 0;
+    blk->isTouched = false;
+    tagsInUse--;
+}
+
+void
+Split::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks)
+{
+    if (lru->probe(asid, source))
+        lru->doCopy(source, dest, asid, writebacks);
+    else {
+        if (lifo && lifo_net)
+            lifo_net->doCopy(source, dest, asid, writebacks);
+        else if (lru_net)
+            lru_net->doCopy(source, dest, asid, writebacks);
+    }
+}
+
+void
+Split::cleanupRefs()
+{
+    lru->cleanupRefs();
+    if (lifo && lifo_net)
+        lifo_net->cleanupRefs();
+    else if (lru_net)
+        lru_net->cleanupRefs();
+
+    ofstream memPrint(simout.resolve("memory_footprint.txt").c_str(),
+                      ios::trunc);
+
+    // this shouldn't be here but it happens at the end, which is what i want
+    memIter end = memHash.end();
+    for (memIter iter = memHash.begin(); iter != end; ++iter) {
+        ccprintf(memPrint, "%8x\t%d\n", (*iter).first, (*iter).second);
+    }
+}
+
+Addr
+Split::regenerateBlkAddr(Addr tag, int set) const
+{
+    if (lifo_net)
+        return lifo_net->regenerateBlkAddr(tag, set);
+    else
+        return lru->regenerateBlkAddr(tag, set);
+}
+
+Addr
+Split::extractTag(Addr addr, SplitBlk *blk) const
+{
+    if (blk->part == 2) {
+        if (lifo_net)
+            return lifo_net->extractTag(addr);
+        else if (lru_net)
+            return lru_net->extractTag(addr);
+        else
+            panic("this shouldn't happen");
+    } else
+        return lru->extractTag(addr);
+}
+
diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh
new file mode 100644
index 000000000..5e0340269
--- /dev/null
+++ b/src/mem/cache/tags/split.hh
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Lisa Hsu
+ */
+
+/**
+ * @file
+ * Declaration of a split/partitioned tag store.
+ */
+
+#ifndef __SPLIT_HH__
+#define __SPLIT_HH__
+
+#include <list>
+
+#include "mem/cache/cache_blk.hh" // base class
+#include "mem/cache/tags/split_blk.hh"
+#include "mem/packet.hh" // for inlined functions
+#include <assert.h>
+#include "mem/cache/tags/base_tags.hh"
+#include "base/hashmap.hh"
+
+class BaseCache;
+class SplitLRU;
+class SplitLIFO;
+
+/**
+ * A  cache tag store.
+ */
+class Split : public BaseTags
+{
+  public:
+    /** Typedef the block type used in this tag store. */
+    typedef SplitBlk BlkType;
+    /** Typedef for a list of pointers to the local block class. */
+    typedef std::list<SplitBlk*> BlkList;
+  protected:
+    /** The number of sets in the cache. */
+    const int numSets;
+    /** The number of bytes in a block. */
+    const int blkSize;
+    /** Whether the 2nd partition (for the nic) is LIFO or not */
+    const bool lifo;
+    /** The hit latency. */
+    const int hitLatency;
+
+    Addr blkMask;
+
+    /** Number of NIC requests that hit in the NIC partition */
+    Stats::Scalar<> NR_NP_hits;
+    /** Number of NIC requests that hit in the CPU partition */
+    Stats::Scalar<> NR_CP_hits;
+    /** Number of CPU requests that hit in the NIC partition */
+    Stats::Scalar<> CR_NP_hits;
+    /** Number of CPU requests that hit in the CPU partition */
+    Stats::Scalar<> CR_CP_hits;
+    /** The number of nic replacements (i.e. misses) */
+    Stats::Scalar<> nic_repl;
+    /** The number of cpu replacements (i.e. misses) */
+    Stats::Scalar<> cpu_repl;
+
+    //For latency studies
+    /** the number of NIC blks that were used before evicted */
+    Stats::Scalar<> nicUsedWhenEvicted;
+    /** the total latency of used NIC blocks in the cache */
+    Stats::Scalar<> nicUsedTotLatency;
+    /** the total number of used NIC blocks evicted */
+    Stats::Scalar<> nicUsedTotEvicted;
+    /** the average number of cycles a used NIC blk is in the cache */
+    Stats::Formula nicUsedAvgLatency;
+    /** the Distribution of used NIC blk eviction times */
+    Stats::Distribution<> usedEvictDist;
+
+    /** the number of NIC blks that were unused before evicted */
+    Stats::Scalar<> nicUnusedWhenEvicted;
+    /** the total latency of unused NIC blks in the cache */
+    Stats::Scalar<> nicUnusedTotLatency;
+    /** the total number of unused NIC blocks evicted */
+    Stats::Scalar<> nicUnusedTotEvicted;
+    /** the average number of cycles an unused NIC blk is in the cache */
+    Stats::Formula nicUnusedAvgLatency;
+    /** the Distribution of unused NIC blk eviction times */
+    Stats::Distribution<> unusedEvictDist;
+
+    /** The total latency of NIC blocks to 1st usage time by CPU */
+    Stats::Scalar<> nicUseByCPUCycleTotal;
+    /** The total number of NIC blocks used */
+    Stats::Scalar<> nicBlksUsedByCPU;
+    /** the average number of cycles before a NIC blk that is used gets used by CPU */
+    Stats::Formula nicAvgUsageByCPULatency;
+    /** the Distribution of cycles time before a NIC blk is used by CPU*/
+    Stats::Distribution<> useByCPUCycleDist;
+
+    /** the number of CPU blks that were used before evicted */
+    Stats::Scalar<> cpuUsedBlks;
+    /** the number of CPU blks that were unused before evicted */
+    Stats::Scalar<> cpuUnusedBlks;
+
+    /** the avg number of cycles before a NIC blk is evicted */
+    Stats::Formula nicAvgLatency;
+
+    typedef m5::hash_map<Addr, int, m5::hash<Addr> > hash_t;
+    typedef hash_t::const_iterator memIter;
+    hash_t memHash;
+
+
+  private:
+    SplitLRU *lru;
+    SplitLRU *lru_net;
+    SplitLIFO *lifo_net;
+
+  public:
+    /**
+     * Construct and initialize this tag store.
+     * @param _numSets The number of sets in the cache.
+     * @param _blkSize The number of bytes in a block.
+     * @param _assoc The associativity of the cache.
+     * @param _hit_latency The latency in cycles for a hit.
+     */
+    Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc,
+          bool _lifo, bool _two_queue, int _hit_latency);
+
+    /**
+     * Destructor
+     */
+    virtual ~Split();
+
+    /**
+     * Register the stats for this object
+     * @param name The name to prepend to the stats name.
+     */
+    void regStats(const std::string &name);
+
+    /**
+     * Return the block size.
+     * @return the block size.
+     */
+    int getBlockSize()
+    {
+        return blkSize;
+    }
+
+    /**
+     * Return the subblock size. In the case of Split it is always the block
+     * size.
+     * @return The block size.
+     */
+    int getSubBlockSize()
+    {
+        return blkSize;
+    }
+
+    /**
+     * Search for the address in the cache.
+     * @param asid The address space ID.
+     * @param addr The address to find.
+     * @return True if the address is in the cache.
+     */
+    bool probe(int asid, Addr addr) const;
+
+    /**
+     * Invalidate the block containing the given address.
+     * @param asid The address space ID.
+     * @param addr The address to invalidate.
+     */
+    void invalidateBlk(int asid, Addr addr);
+
+    /**
+     * Finds the given address in the cache and update replacement data.
+     * Returns the access latency as a side effect.
+     * @param addr The address to find.
+     * @param asid The address space ID.
+     * @param lat The access latency.
+     * @return Pointer to the cache block if found.
+     */
+    SplitBlk* findBlock(Addr addr, int asid, int &lat);
+
+    /**
+     * Finds the given address in the cache and update replacement data.
+     * Returns the access latency as a side effect.
+     * @param req The memory request whose block to find
+     * @param lat The access latency.
+     * @return Pointer to the cache block if found.
+     */
+    SplitBlk* findBlock(Packet * &pkt, int &lat);
+
+    /**
+     * Finds the given address in the cache, do not update replacement data.
+     * @param addr The address to find.
+     * @param asid The address space ID.
+     * @return Pointer to the cache block if found.
+     */
+    SplitBlk* findBlock(Addr addr, int asid) const;
+
+    /**
+     * Find a replacement block for the address provided.
+     * @param req The request to a find a replacement candidate for.
+     * @param writebacks List for any writebacks to be performed.
+     * @param compress_blocks List of blocks to compress, for adaptive comp.
+     * @return The block to place the replacement in.
+     */
+    SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks,
+                            BlkList &compress_blocks);
+
+
+    /**
+     * Generate the tag from the given address.
+     * @param addr The address to get the tag from.
+     * @param blk The block to find the partition it's in
+     * @return The tag of the address.
+     */
+    Addr extractTag(Addr addr, SplitBlk *blk) const;
+
+    /**
+     * Calculate the set index from the address.
+     * @param addr The address to get the set from.
+     * @return The set index of the address.
+     */
+    int extractSet(Addr addr) const
+    {
+        panic("should never call this!\n");
+    }
+
+    /**
+     * Get the block offset from an address.
+     * @param addr The address to get the offset of.
+     * @return The block offset.
+     */
+    int extractBlkOffset(Addr addr) const
+    {
+        return (addr & blkMask);
+    }
+
+    /**
+     * Align an address to the block size.
+     * @param addr the address to align.
+     * @return The block address.
+     */
+    Addr blkAlign(Addr addr) const
+    {
+        return (addr & ~(Addr) (blkMask));
+    }
+
+    /**
+     * Regenerate the block address from the tag.
+     * @param tag The tag of the block.
+     * @param set The set of the block.
+     * @return The block address.
+     */
+    Addr regenerateBlkAddr(Addr tag, int set) const;
+
+    /**
+     * Return the hit latency.
+     * @return the hit latency.
+     */
+    int getHitLatency() const
+    {
+        return hitLatency;
+    }
+
+    /**
+     * Read the data out of the internal storage of the given cache block.
+     * @param blk The cache block to read.
+     * @param data The buffer to read the data into.
+     * @return The cache block's data.
+     */
+    void readData(SplitBlk *blk, uint8_t *data)
+    {
+        memcpy(data, blk->data, blk->size);
+    }
+
+    /**
+     * Write data into the internal storage of the given cache block. Since in
+     * Split does not store data differently this just needs to update the size.
+     * @param blk The cache block to write.
+     * @param data The data to write.
+     * @param size The number of bytes to write.
+     * @param writebacks A list for any writebacks to be performed. May be
+     * needed when writing to a compressed block.
+     */
+    void writeData(SplitBlk *blk, uint8_t *data, int size,
+                   PacketList & writebacks)
+    {
+        assert(size <= blkSize);
+        blk->size = size;
+    }
+
+    /**
+     * Perform a block aligned copy from the source address to the destination.
+     * @param source The block-aligned source address.
+     * @param dest The block-aligned destination address.
+     * @param asid The address space DI.
+     * @param writebacks List for any generated writeback requests.
+     */
+    void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks);
+
+    /**
+     * No impl.
+     */
+    void fixCopy(Packet * &pkt, PacketList &writebacks)
+    {
+    }
+
+    /**
+     * Called at end of simulation to complete average block reference stats.
+     */
+    virtual void cleanupRefs();
+};
+
+#endif
diff --git a/src/mem/cache/tags/split_blk.hh b/src/mem/cache/tags/split_blk.hh
new file mode 100644
index 000000000..f38516180
--- /dev/null
+++ b/src/mem/cache/tags/split_blk.hh
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Lisa Hsu
+ */
+
+/**
+ * @file
+ * Declaration of partitioned tag store cache block class.
+ */
+
+#ifndef __SPLIT_BLK_HH__
+#define __SPLIT_BLK_HH__
+
+#include "mem/cache/cache_blk.hh" // base class
+
+/**
+ * Split cache block.
+ */
+class SplitBlk : public CacheBlk {
+  public:
+    /** Has this block been touched? Used to aid calculation of warmup time. */
+    bool isTouched;
+    /** Has this block been used after being brought in? (for LIFO partition) */
+    bool isUsed;
+    /** is this blk a NIC block? (i.e. requested by the NIC) */
+    bool isNIC;
+    /** timestamp of the arrival of this block into the cache */
+    Tick ts;
+    /** the previous block in the LIFO partition (brought in before than me) */
+    SplitBlk *prev;
+    /** the next block in the LIFO partition (brought in later than me) */
+    SplitBlk *next;
+    /** which partition this block is in */
+    int part;
+
+    SplitBlk()
+        : isTouched(false), isUsed(false), isNIC(false), ts(0), prev(NULL), next(NULL),
+          part(0)
+    {}
+};
+
+#endif
+
diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc
new file mode 100644
index 000000000..f6493fdd2
--- /dev/null
+++ b/src/mem/cache/tags/split_lifo.cc
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Lisa Hsu
+ */
+
+/**
+ * @file
+ * Definitions of LIFO tag store usable in a partitioned cache.
+ */
+
+#include <string>
+
+#include "mem/cache/base_cache.hh"
+#include "base/intmath.hh"
+#include "mem/cache/tags/split_lifo.hh"
+#include "sim/root.hh"
+#include "base/trace.hh"
+
+using namespace std;
+
+SplitBlk*
+LIFOSet::findBlk(int asid, Addr tag) const
+{
+    for (SplitBlk *blk = firstIn; blk != NULL; blk = blk->next) {
+        if (blk->tag == tag && blk->isValid()) {
+            return blk;
+        }
+    }
+    return NULL;
+}
+
+void
+LIFOSet::moveToLastIn(SplitBlk *blk)
+{
+    if (blk == lastIn)
+        return;
+
+    if (blk == firstIn) {
+        blk->next->prev = NULL;
+    } else {
+        blk->prev->next = blk->next;
+        blk->next->prev = blk->prev;
+    }
+    blk->next = NULL;
+    blk->prev = lastIn;
+    lastIn->next = blk;
+
+    lastIn = blk;
+}
+
+void
+LIFOSet::moveToFirstIn(SplitBlk *blk)
+{
+    if (blk == firstIn)
+        return;
+
+    if (blk == lastIn) {
+        blk->prev->next = NULL;
+    } else {
+        blk->next->prev = blk->prev;
+        blk->prev->next = blk->next;
+    }
+
+    blk->prev = NULL;
+    blk->next = firstIn;
+    firstIn->prev = blk;
+
+    firstIn = blk;
+}
+
+// create and initialize a LIFO cache structure
+SplitLIFO::SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool two_Queue, int _part) :
+    blkSize(_blkSize), size(_size), numBlks(_size/_blkSize), numSets((_size/_ways)/_blkSize), ways(_ways),
+    hitLatency(_hit_latency), twoQueue(two_Queue), part(_part)
+{
+    if (!isPowerOf2(blkSize))
+        fatal("cache block size (in bytes) must be a power of 2");
+    if (!(hitLatency > 0))
+        fatal("access latency in cycles must be at least on cycle");
+    if (_ways == 0)
+        fatal("if instantiating a splitLIFO, needs non-zero size!");
+
+
+    SplitBlk  *blk;
+    int i, j, blkIndex;
+
+    setShift = floorLog2(blkSize);
+    blkMask = blkSize - 1;
+    setMask = numSets - 1;
+    tagShift = setShift + floorLog2(numSets);
+
+    warmedUp = false;
+    /** @todo Make warmup percentage a parameter. */
+    warmupBound = size/blkSize;
+
+    // allocate data blocks
+    blks = new SplitBlk[numBlks];
+    sets = new LIFOSet[numSets];
+    dataBlks = new uint8_t[size];
+
+/*
+    // these start off point to same blk
+    top = &(blks[0]);
+    head = top;
+*/
+
+    blkIndex = 0;
+    for (i=0; i < numSets; ++i) {
+        sets[i].ways = ways;
+        sets[i].lastIn = &blks[blkIndex];
+        sets[i].firstIn = &blks[blkIndex + ways - 1];
+
+        /* 3 cases:  if there is 1 way, if there are 2 ways, or if there are 3+.
+           in the case of 1 way, last in and first out point to the same blocks,
+           and the next and prev pointers need to be assigned specially.  and so on
+        */
+        /* deal with the first way */
+        blk = &blks[blkIndex];
+        blk->prev = &blks[blkIndex + 1];
+        blk->next = NULL;
+        blk->data = &dataBlks[blkSize*blkIndex];
+        blk->size = blkSize;
+        blk->part = part;
+        blk->set = i;
+        ++blkIndex;
+
+        /* if there are "middle" ways, do them here */
+        if (ways > 2) {
+            for (j=1; j < ways-1; ++j) {
+                blk = &blks[blkIndex];
+                blk->data = &dataBlks[blkSize*blkIndex];
+                blk->prev = &blks[blkIndex+1];
+                blk->next = &blks[blkIndex-1];
+                blk->data = &(dataBlks[blkSize*blkIndex]);
+                blk->size = blkSize;
+                blk->part = part;
+                blk->set = i;
+                ++blkIndex;
+            }
+        }
+
+        /* do the final way here, depending on whether the final way is the only
+           way or not
+        */
+        if (ways > 1) {
+            blk =  &blks[blkIndex];
+            blk->prev = NULL;
+            blk->next = &blks[blkIndex - 1];
+            blk->data = &dataBlks[blkSize*blkIndex];
+            blk->size = blkSize;
+            blk->part = part;
+            blk->set = i;
+            ++blkIndex;
+        } else {
+            blk->prev = NULL;
+        }
+    }
+    assert(blkIndex == numBlks);
+}
+
+SplitLIFO::~SplitLIFO()
+{
+    delete [] blks;
+    delete [] sets;
+    delete [] dataBlks;
+}
+
+void
+SplitLIFO::regStats(const std::string &name)
+{
+    BaseTags::regStats(name);
+
+    hits
+        .name(name + ".hits")
+        .desc("number of hits on this partition")
+        .precision(0)
+        ;
+
+    misses
+        .name(name + ".misses")
+        .desc("number of misses in this partition")
+        .precision(0)
+        ;
+
+    invalidations
+        .name(name + ".invalidations")
+        .desc("number of invalidations in this partition")
+        .precision(0)
+        ;
+}
+
+// probe cache for presence of given block.
+bool
+SplitLIFO::probe(int asid, Addr addr) const
+{
+    Addr tag = extractTag(addr);
+    unsigned myset = extractSet(addr);
+
+    SplitBlk* blk = sets[myset].findBlk(asid, tag);
+    return (blk != NULL);
+}
+
+SplitBlk*
+SplitLIFO::findBlock(Addr addr, int asid, int &lat)
+{
+    Addr tag = extractTag(addr);
+    unsigned set = extractSet(addr);
+    SplitBlk *blk = sets[set].findBlk(asid, tag);
+
+    lat = hitLatency;
+
+    if (blk) {
+        DPRINTF(Split, "Found LIFO blk %#x in set %d, with tag %#x\n",
+                addr, set, tag);
+        hits++;
+
+        if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency)
+            lat = blk->whenReady - curTick;
+        blk->refCount +=1;
+
+        if (twoQueue) {
+            blk->isUsed = true;
+            sets[set].moveToFirstIn(blk);
+        } else {
+            sets[set].moveToLastIn(blk);
+        }
+    }
+
+    return blk;
+}
+
+SplitBlk*
+SplitLIFO::findBlock(Packet * &pkt, int &lat)
+{
+    Addr addr = pkt->getAddr();
+    int asid = pkt->req->getAsid();
+
+    Addr tag = extractTag(addr);
+    unsigned set = extractSet(addr);
+    SplitBlk *blk = sets[set].findBlk(asid, tag);
+
+    if (blk) {
+        DPRINTF(Split, "Found LIFO blk %#x in set %d, with tag %#x\n",
+                addr, set, tag);
+        hits++;
+
+        if (twoQueue) {
+            blk->isUsed = true;
+            sets[set].moveToFirstIn(blk);
+        } else {
+            sets[set].moveToLastIn(blk);
+        }
+    }
+    lat = hitLatency;
+
+    return blk;
+}
+
+SplitBlk*
+SplitLIFO::findBlock(Addr addr, int asid) const
+{
+    Addr tag = extractTag(addr);
+    unsigned set = extractSet(addr);
+    SplitBlk *blk = sets[set].findBlk(asid, tag);
+
+    return blk;
+}
+
+SplitBlk*
+SplitLIFO::findReplacement(Packet * &pkt, PacketList &writebacks,
+                           BlkList &compress_blocks)
+{
+    unsigned set = extractSet(pkt->getAddr());
+
+    SplitBlk *firstIn = sets[set].firstIn;
+    SplitBlk *lastIn = sets[set].lastIn;
+
+    SplitBlk *blk;
+    if (twoQueue && firstIn->isUsed) {
+        blk = firstIn;
+        blk->isUsed = false;
+        sets[set].moveToLastIn(blk);
+    } else {
+        int withValue = sets[set].withValue;
+        if (withValue == ways) {
+            blk = lastIn;
+        } else {
+            blk = &(sets[set].firstIn[ways - ++withValue]);
+        }
+    }
+
+    DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n",
+            pkt->getAddr(), regenerateBlkAddr(blk->tag, set), blk->status);
+    if (blk->isValid()) {
+        replacements[0]++;
+        totalRefs += blk->refCount;
+        ++sampledRefs;
+        blk->refCount = 0;
+    } else {
+        tagsInUse++;
+        blk->isTouched = true;
+        if (!warmedUp && tagsInUse.value() >= warmupBound) {
+            warmedUp = true;
+            warmupCycle = curTick;
+        }
+    }
+
+    misses++;
+
+    return blk;
+}
+
+void
+SplitLIFO::invalidateBlk(int asid, Addr addr)
+{
+    SplitBlk *blk = findBlock(addr, asid);
+    if (blk) {
+        blk->status = 0;
+        blk->isTouched = false;
+        tagsInUse--;
+        invalidations++;
+    }
+}
+
+void
+SplitLIFO::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks)
+{
+//Copy Unsuported for now
+#if 0
+    assert(source == blkAlign(source));
+    assert(dest == blkAlign(dest));
+    SplitBlk *source_blk = findBlock(source, asid);
+    assert(source_blk);
+    SplitBlk *dest_blk = findBlock(dest, asid);
+    if (dest_blk == NULL) {
+        // Need to do a replacement
+        Packet * pkt = new Packet();
+        pkt->paddr = dest;
+        BlkList dummy_list;
+        dest_blk = findReplacement(pkt, writebacks, dummy_list);
+        if (dest_blk->isValid() && dest_blk->isModified()) {
+            // Need to writeback data.
+            pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag,
+                                                      dest_blk->set),
+                                    dest_blk->req->asid,
+                                    dest_blk->xc,
+                                    blkSize,
+                                    (cache->doData())?dest_blk->data:0,
+                                    dest_blk->size);
+            writebacks.push_back(pkt);
+        }
+        dest_blk->tag = extractTag(dest);
+        dest_blk->req->asid = asid;
+        /**
+         * @todo Do we need to pass in the execution context, or can we
+         * assume its the same?
+         */
+        assert(source_blk->xc);
+        dest_blk->xc = source_blk->xc;
+    }
+    /**
+     * @todo Can't assume the status once we have coherence on copies.
+     */
+
+    // Set this block as readable, writeable, and dirty.
+    dest_blk->status = 7;
+    if (cache->doData()) {
+        memcpy(dest_blk->data, source_blk->data, blkSize);
+    }
+#endif
+}
+
+void
+SplitLIFO::cleanupRefs()
+{
+    for (int i = 0; i < numBlks; ++i) {
+        if (blks[i].isValid()) {
+            totalRefs += blks[i].refCount;
+            ++sampledRefs;
+        }
+    }
+}
diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh
new file mode 100644
index 000000000..dfcaa0b67
--- /dev/null
+++ b/src/mem/cache/tags/split_lifo.hh
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Lisa Hsu
+ */
+
+/**
+ * @file
+ * Declaration of a LIFO tag store usable in a partitioned cache.
+ */
+
+#ifndef __SPLIT_LIFO_HH__
+#define __SPLIT_LIFO_HH__
+
+#include <list>
+
+#include "mem/cache/cache_blk.hh" // base class
+#include "mem/cache/tags/split_blk.hh"
+#include "mem/packet.hh" // for inlined functions
+#include "base/hashmap.hh"
+#include <assert.h>
+#include "mem/cache/tags/base_tags.hh"
+
+class BaseCache;
+
+/**
+ * A LIFO set of cache blks
+ */
+class LIFOSet {
+  public:
+    /** the number of blocks in this set */
+    int ways;
+
+    /** Cache blocks in this set, maintained in LIFO order where
+        0 = Last in (head) */
+    SplitBlk *lastIn;
+    SplitBlk *firstIn;
+
+    /** has the initial "filling" of this set finished? i.e., have you had
+     * 'ways' number of compulsory misses in this set yet? if withValue == ways,
+     * then yes.  withValue is meant to be the number of blocks in the set that have
+     * gone through their first compulsory miss.
+     */
+    int withValue;
+
+    /**
+     * Find a block matching the tag in this set.
+     * @param asid The address space ID.
+     * @param tag the Tag you are looking for
+     * @return Pointer to the block, if found, NULL otherwise
+     */
+    SplitBlk* findBlk(int asid, Addr tag) const;
+
+    void moveToLastIn(SplitBlk *blk);
+    void moveToFirstIn(SplitBlk *blk);
+
+    LIFOSet()
+        : ways(-1), lastIn(NULL), firstIn(NULL), withValue(0)
+    {}
+};
+
+/**
+ * A LIFO cache tag store.
+ */
+class SplitLIFO : public BaseTags
+{
+  public:
+    /** Typedef the block type used in this tag store. */
+    typedef SplitBlk BlkType;
+    /** Typedef for a list of pointers to the local block class. */
+    typedef std::list<SplitBlk*> BlkList;
+  protected:
+    /** The number of bytes in a block. */
+    const int blkSize;
+    /** the size of the cache in bytes */
+    const int size;
+    /** the number of blocks in the cache */
+    const int numBlks;
+    /** the number of sets in the cache */
+    const int numSets;
+    /** the number of ways in the cache */
+    const int ways;
+    /** The hit latency. */
+    const int hitLatency;
+    /** whether this is a "2 queue" replacement @sa moveToLastIn @sa moveToFirstIn */
+    const bool twoQueue;
+    /** indicator for which partition this is */
+    const int part;
+
+    /** The cache blocks. */
+    SplitBlk *blks;
+    /** The Cache sets */
+    LIFOSet *sets;
+    /** The data blocks, 1 per cache block. */
+    uint8_t *dataBlks;
+
+    /** The amount to shift the address to get the set. */
+    int setShift;
+    /** The amount to shift the address to get the tag. */
+    int tagShift;
+    /** Mask out all bits that aren't part of the set index. */
+    unsigned setMask;
+    /** Mask out all bits that aren't part of the block offset. */
+    unsigned blkMask;
+
+
+    /** the number of hit in this partition */
+    Stats::Scalar<> hits;
+    /** the number of blocks brought into this partition (i.e. misses) */
+    Stats::Scalar<> misses;
+    /** the number of invalidations in this partition */
+    Stats::Scalar<> invalidations;
+
+public:
+    /**
+     * Construct and initialize this tag store.
+     * @param _numSets The number of sets in the cache.
+     * @param _blkSize The number of bytes in a block.
+     * @param _assoc The associativity of the cache.
+     * @param _hit_latency The latency in cycles for a hit.
+     */
+    SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool twoQueue, int _part);
+
+    /**
+     * Destructor
+     */
+    virtual ~SplitLIFO();
+
+    /**
+     * Register the statistics for this object
+     * @param name The name to precede the stat
+     */
+    void regStats(const std::string &name);
+
+    /**
+     * Return the block size.
+     * @return the block size.
+     */
+    int getBlockSize()
+    {
+        return blkSize;
+    }
+
+    /**
+     * Return the subblock size. In the case of LIFO it is always the block
+     * size.
+     * @return The block size.
+     */
+    int getSubBlockSize()
+    {
+        return blkSize;
+    }
+
+    /**
+     * Search for the address in the cache.
+     * @param asid The address space ID.
+     * @param addr The address to find.
+     * @return True if the address is in the cache.
+     */
+    bool probe(int asid, Addr addr) const;
+
+    /**
+     * Invalidate the block containing the given address.
+     * @param asid The address space ID.
+     * @param addr The address to invalidate.
+     */
+    void invalidateBlk(int asid, Addr addr);
+
+    /**
+     * Finds the given address in the cache and update replacement data.
+     * Returns the access latency as a side effect.
+     * @param addr The address to find.
+     * @param asid The address space ID.
+     * @param lat The access latency.
+     * @return Pointer to the cache block if found.
+     */
+    SplitBlk* findBlock(Addr addr, int asid, int &lat);
+
+    /**
+     * Finds the given address in the cache and update replacement data.
+     * Returns the access latency as a side effect.
+     * @param req The req whose block to find
+     * @param lat The access latency.
+     * @return Pointer to the cache block if found.
+     */
+    SplitBlk* findBlock(Packet * &pkt, int &lat);
+
+    /**
+     * Finds the given address in the cache, do not update replacement data.
+     * @param addr The address to find.
+     * @param asid The address space ID.
+     * @return Pointer to the cache block if found.
+     */
+    SplitBlk* findBlock(Addr addr, int asid) const;
+
+    /**
+     * Find a replacement block for the address provided.
+     * @param req The request to a find a replacement candidate for.
+     * @param writebacks List for any writebacks to be performed.
+     * @param compress_blocks List of blocks to compress, for adaptive comp.
+     * @return The block to place the replacement in.
+     */
+    SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks,
+                            BlkList &compress_blocks);
+
+    /**
+     * Generate the tag from the given address.
+     * @param addr The address to get the tag from.
+     * @return The tag of the address.
+     */
+    Addr extractTag(Addr addr) const
+    {
+        return (addr >> tagShift);
+    }
+
+     /**
+     * Generate the tag from the given address.
+     * @param addr The address to get the tag from.
+     * @param blk Ignored
+     * @return The tag of the address.
+     */
+    Addr extractTag(Addr addr, SplitBlk *blk) const
+    {
+        return (addr >> tagShift);
+    }
+
+   /**
+     * Calculate the set index from the address.
+     * @param addr The address to get the set from.
+     * @return The set index of the address.
+     */
+    int extractSet(Addr addr) const
+    {
+        return ((addr >> setShift) & setMask);
+    }
+
+    /**
+     * Get the block offset from an address.
+     * @param addr The address to get the offset of.
+     * @return The block offset.
+     */
+    int extractBlkOffset(Addr addr) const
+    {
+        return (addr & blkMask);
+    }
+
+    /**
+     * Align an address to the block size.
+     * @param addr the address to align.
+     * @return The block address.
+     */
+    Addr blkAlign(Addr addr) const
+    {
+        return (addr & ~(Addr)blkMask);
+    }
+
+    /**
+     * Regenerate the block address from the tag.
+     * @param tag The tag of the block.
+     * @param set The set of the block.
+     * @return The block address.
+     */
+    Addr regenerateBlkAddr(Addr tag, unsigned set) const
+    {
+        return ((tag << tagShift) | ((Addr)set << setShift));
+    }
+
+    /**
+     * Return the hit latency.
+     * @return the hit latency.
+     */
+    int getHitLatency() const
+    {
+        return hitLatency;
+    }
+
+    /**
+     * Read the data out of the internal storage of the given cache block.
+     * @param blk The cache block to read.
+     * @param data The buffer to read the data into.
+     * @return The cache block's data.
+     */
+    void readData(SplitBlk *blk, uint8_t *data)
+    {
+        memcpy(data, blk->data, blk->size);
+    }
+
+    /**
+     * Write data into the internal storage of the given cache block. Since in
+     * LIFO does not store data differently this just needs to update the size.
+     * @param blk The cache block to write.
+     * @param data The data to write.
+     * @param size The number of bytes to write.
+     * @param writebacks A list for any writebacks to be performed. May be
+     * needed when writing to a compressed block.
+     */
+    void writeData(SplitBlk *blk, uint8_t *data, int size,
+                   PacketList & writebacks)
+    {
+        assert(size <= blkSize);
+        blk->size = size;
+    }
+
+    /**
+     * Perform a block aligned copy from the source address to the destination.
+     * @param source The block-aligned source address.
+     * @param dest The block-aligned destination address.
+     * @param asid The address space DI.
+     * @param writebacks List for any generated writeback requests.
+     */
+    void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks);
+
+    /**
+     * No impl.
+     */
+    void fixCopy(Packet * &pkt, PacketList &writebacks)
+    {
+    }
+
+    /**
+     * Called at end of simulation to complete average block reference stats.
+     */
+    virtual void cleanupRefs();
+};
+
+#endif
diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc
new file mode 100644
index 000000000..7fc742001
--- /dev/null
+++ b/src/mem/cache/tags/split_lru.cc
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Lisa Hsu
+ */
+
+/**
+ * @file
+ * Definitions of LRU tag store for a partitioned cache.
+ */
+
+#include <string>
+
+#include "mem/cache/base_cache.hh"
+#include "base/intmath.hh"
+#include "mem/cache/tags/split_lru.hh"
+#include "sim/root.hh"
+
+using namespace std;
+
+SplitBlk*
+SplitCacheSet::findBlk(int asid, Addr tag) const
+{
+    for (int i = 0; i < assoc; ++i) {
+        if (blks[i]->tag == tag && blks[i]->isValid()) {
+            return blks[i];
+        }
+    }
+    return 0;
+}
+
+
+void
+SplitCacheSet::moveToHead(SplitBlk *blk)
+{
+    // nothing to do if blk is already head
+    if (blks[0] == blk)
+        return;
+
+    // write 'next' block into blks[i], moving up from MRU toward LRU
+    // until we overwrite the block we moved to head.
+
+    // start by setting up to write 'blk' into blks[0]
+    int i = 0;
+    SplitBlk *next = blk;
+
+    do {
+        assert(i < assoc);
+        // swap blks[i] and next
+        SplitBlk *tmp = blks[i];
+        blks[i] = next;
+        next = tmp;
+        ++i;
+    } while (next != blk);
+}
+
+
+// create and initialize a LRU/MRU cache structure
+SplitLRU::SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part) :
+    numSets(_numSets), blkSize(_blkSize), assoc(_assoc), hitLatency(_hit_latency), part(_part)
+{
+    // Check parameters
+    if (blkSize < 4 || !isPowerOf2(blkSize)) {
+        fatal("Block size must be at least 4 and a power of 2");
+    }
+    if (numSets <= 0 || !isPowerOf2(numSets)) {
+        fatal("# of sets must be non-zero and a power of 2");
+    }
+    if (assoc <= 0) {
+        fatal("associativity must be greater than zero");
+    }
+    if (hitLatency <= 0) {
+        fatal("access latency must be greater than zero");
+    }
+
+    SplitBlk  *blk;
+    int i, j, blkIndex;
+
+    blkMask = blkSize - 1;
+    setShift = floorLog2(blkSize);
+    setMask = numSets - 1;
+    tagShift = setShift + floorLog2(numSets);
+    warmedUp = false;
+    /** @todo Make warmup percentage a parameter. */
+    warmupBound = numSets * assoc;
+
+    sets = new SplitCacheSet[numSets];
+    blks = new SplitBlk[numSets * assoc];
+    // allocate data storage in one big chunk
+    dataBlks = new uint8_t[numSets*assoc*blkSize];
+
+    blkIndex = 0;	// index into blks array
+    for (i = 0; i < numSets; ++i) {
+        sets[i].assoc = assoc;
+
+        sets[i].blks = new SplitBlk*[assoc];
+
+        // link in the data blocks
+        for (j = 0; j < assoc; ++j) {
+            // locate next cache block
+            blk = &blks[blkIndex];
+            blk->data = &dataBlks[blkSize*blkIndex];
+            ++blkIndex;
+
+            // invalidate new cache block
+            blk->status = 0;
+
+            //EGH Fix Me : do we need to initialize blk?
+
+            // Setting the tag to j is just to prevent long chains in the hash
+            // table; won't matter because the block is invalid
+            blk->tag = j;
+            blk->whenReady = 0;
+            blk->asid = -1;
+            blk->isTouched = false;
+            blk->size = blkSize;
+            sets[i].blks[j]=blk;
+            blk->set = i;
+            blk->part = part;
+        }
+    }
+}
+
+SplitLRU::~SplitLRU()
+{
+    delete [] dataBlks;
+    delete [] blks;
+    delete [] sets;
+}
+
+void
+SplitLRU::regStats(const std::string &name)
+{
+    BaseTags::regStats(name);
+
+    hits
+        .name(name + ".hits")
+        .desc("number of hits on this partition")
+        .precision(0)
+        ;
+
+    misses
+        .name(name + ".misses")
+        .desc("number of misses in this partition")
+        .precision(0)
+        ;
+}
+
+// probe cache for presence of given block.
+bool
+SplitLRU::probe(int asid, Addr addr) const
+{
+    //  return(findBlock(Read, addr, asid) != 0);
+    Addr tag = extractTag(addr);
+    unsigned myset = extractSet(addr);
+
+    SplitBlk *blk = sets[myset].findBlk(asid, tag);
+
+    return (blk != NULL);	// true if in cache
+}
+
+SplitBlk*
+SplitLRU::findBlock(Addr addr, int asid, int &lat)
+{
+    Addr tag = extractTag(addr);
+    unsigned set = extractSet(addr);
+    SplitBlk *blk = sets[set].findBlk(asid, tag);
+    lat = hitLatency;
+    if (blk != NULL) {
+        // move this block to head of the MRU list
+        sets[set].moveToHead(blk);
+        if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency){
+            lat = blk->whenReady - curTick;
+        }
+        blk->refCount += 1;
+        hits++;
+    }
+
+    return blk;
+}
+
+SplitBlk*
+SplitLRU::findBlock(Packet * &pkt, int &lat)
+{
+    Addr addr = pkt->getAddr();
+    int asid = pkt->req->getAsid();
+
+    Addr tag = extractTag(addr);
+    unsigned set = extractSet(addr);
+    SplitBlk *blk = sets[set].findBlk(asid, tag);
+    lat = hitLatency;
+    if (blk != NULL) {
+        // move this block to head of the MRU list
+        sets[set].moveToHead(blk);
+        if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency){
+            lat = blk->whenReady - curTick;
+        }
+        blk->refCount += 1;
+        hits++;
+    }
+
+    return blk;
+}
+
+SplitBlk*
+SplitLRU::findBlock(Addr addr, int asid) const
+{
+    Addr tag = extractTag(addr);
+    unsigned set = extractSet(addr);
+    SplitBlk *blk = sets[set].findBlk(asid, tag);
+    return blk;
+}
+
+SplitBlk*
+SplitLRU::findReplacement(Packet * &pkt, PacketList &writebacks,
+                     BlkList &compress_blocks)
+{
+    unsigned set = extractSet(pkt->getAddr());
+    // grab a replacement candidate
+    SplitBlk *blk = sets[set].blks[assoc-1];
+    sets[set].moveToHead(blk);
+    if (blk->isValid()) {
+        replacements[0]++;
+        totalRefs += blk->refCount;
+        ++sampledRefs;
+        blk->refCount = 0;
+    } else if (!blk->isTouched) {
+        tagsInUse++;
+        blk->isTouched = true;
+        if (!warmedUp && tagsInUse.value() >= warmupBound) {
+            warmedUp = true;
+            warmupCycle = curTick;
+        }
+    }
+
+    misses++;
+
+    return blk;
+}
+
+void
+SplitLRU::invalidateBlk(int asid, Addr addr)
+{
+    SplitBlk *blk = findBlock(addr, asid);
+    if (blk) {
+        blk->status = 0;
+        blk->isTouched = false;
+        tagsInUse--;
+    }
+}
+
+void
+SplitLRU::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks)
+{
+//Copy not supported for now
+#if 0
+    assert(source == blkAlign(source));
+    assert(dest == blkAlign(dest));
+    SplitBlk *source_blk = findBlock(source, asid);
+    assert(source_blk);
+    SplitBlk *dest_blk = findBlock(dest, asid);
+    if (dest_blk == NULL) {
+        // Need to do a replacement
+        Packet * pkt = new Packet();
+        pkt->paddr = dest;
+        BlkList dummy_list;
+        dest_blk = findReplacement(pkt, writebacks, dummy_list);
+        if (dest_blk->isValid() && dest_blk->isModified()) {
+            // Need to writeback data.
+            pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag,
+                                                      dest_blk->set),
+                                    dest_blk->req->asid,
+                                    dest_blk->xc,
+                                    blkSize,
+                                    (cache->doData())?dest_blk->data:0,
+                                    dest_blk->size);
+            writebacks.push_back(pkt);
+        }
+        dest_blk->tag = extractTag(dest);
+        dest_blk->req->asid = asid;
+        /**
+         * @todo Do we need to pass in the execution context, or can we
+         * assume its the same?
+         */
+        assert(source_blk->xc);
+        dest_blk->xc = source_blk->xc;
+    }
+    /**
+     * @todo Can't assume the status once we have coherence on copies.
+     */
+
+    // Set this block as readable, writeable, and dirty.
+    dest_blk->status = 7;
+    if (cache->doData()) {
+        memcpy(dest_blk->data, source_blk->data, blkSize);
+    }
+#endif
+}
+
+void
+SplitLRU::cleanupRefs()
+{
+    for (int i = 0; i < numSets*assoc; ++i) {
+        if (blks[i].isValid()) {
+            totalRefs += blks[i].refCount;
+            ++sampledRefs;
+        }
+    }
+}
diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh
new file mode 100644
index 000000000..03886b1d8
--- /dev/null
+++ b/src/mem/cache/tags/split_lru.hh
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Lisa Hsu
+ */
+
+/**
+ * @file
+ * Declaration of a LRU tag store for a partitioned cache.
+ */
+
+#ifndef __SPLIT_LRU_HH__
+#define __SPLIT_LRU_HH__
+
+#include <list>
+
+#include "mem/cache/cache_blk.hh" // base class
+#include "mem/cache/tags/split_blk.hh"
+#include "mem/packet.hh" // for inlined functions
+#include <assert.h>
+#include "mem/cache/tags/base_tags.hh"
+
+class BaseCache;
+
+/**
+ * An associative set of cache blocks.
+ */
+
+class SplitCacheSet
+{
+  public:
+    /** The associativity of this set. */
+    int assoc;
+
+    /** Cache blocks in this set, maintained in LRU order 0 = MRU. */
+    SplitBlk **blks;
+
+    /**
+     * Find a block matching the tag in this set.
+     * @param asid The address space ID.
+     * @param tag The Tag to find.
+     * @return Pointer to the block if found.
+     */
+    SplitBlk* findBlk(int asid, Addr tag) const;
+
+    /**
+     * Move the given block to the head of the list.
+     * @param blk The block to move.
+     */
+    void moveToHead(SplitBlk *blk);
+};
+
+/**
+ * A LRU cache tag store.
+ */
+class SplitLRU : public BaseTags
+{
+  public:
+    /** Typedef the block type used in this tag store. */
+    typedef SplitBlk BlkType;
+    /** Typedef for a list of pointers to the local block class. */
+    typedef std::list<SplitBlk*> BlkList;
+  protected:
+    /** The number of sets in the cache. */
+    const int numSets;
+    /** The number of bytes in a block. */
+    const int blkSize;
+    /** The associativity of the cache. */
+    const int assoc;
+    /** The hit latency. */
+    const int hitLatency;
+    /** indicator for which partition this is */
+    const int part;
+
+    /** The cache sets. */
+    SplitCacheSet *sets;
+
+    /** The cache blocks. */
+    SplitBlk *blks;
+    /** The data blocks, 1 per cache block. */
+    uint8_t *dataBlks;
+
+    /** The amount to shift the address to get the set. */
+    int setShift;
+    /** The amount to shift the address to get the tag. */
+    int tagShift;
+    /** Mask out all bits that aren't part of the set index. */
+    unsigned setMask;
+    /** Mask out all bits that aren't part of the block offset. */
+    unsigned blkMask;
+
+    /** number of hits in this partition */
+    Stats::Scalar<> hits;
+    /** number of blocks brought into this partition (i.e. misses) */
+    Stats::Scalar<> misses;
+
+public:
+    /**
+     * Construct and initialize this tag store.
+     * @param _numSets The number of sets in the cache.
+     * @param _blkSize The number of bytes in a block.
+     * @param _assoc The associativity of the cache.
+     * @param _hit_latency The latency in cycles for a hit.
+     */
+    SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part);
+
+    /**
+     * Destructor
+     */
+    virtual ~SplitLRU();
+
+    /**
+     * Register the statistics for this object
+     * @param name The name to precede the stat
+     */
+    void regStats(const std::string &name);
+
+    /**
+     * Return the block size.
+     * @return the block size.
+     */
+    int getBlockSize()
+    {
+        return blkSize;
+    }
+
+    /**
+     * Return the subblock size. In the case of LRU it is always the block
+     * size.
+     * @return The block size.
+     */
+    int getSubBlockSize()
+    {
+        return blkSize;
+    }
+
+    /**
+     * Search for the address in the cache.
+     * @param asid The address space ID.
+     * @param addr The address to find.
+     * @return True if the address is in the cache.
+     */
+    bool probe(int asid, Addr addr) const;
+
+    /**
+     * Invalidate the block containing the given address.
+     * @param asid The address space ID.
+     * @param addr The address to invalidate.
+     */
+    void invalidateBlk(int asid, Addr addr);
+
+    /**
+     * Finds the given address in the cache and update replacement data.
+     * Returns the access latency as a side effect.
+     * @param addr The address to find.
+     * @param asid The address space ID.
+     * @param lat The access latency.
+     * @return Pointer to the cache block if found.
+     */
+    SplitBlk* findBlock(Addr addr, int asid, int &lat);
+
+    /**
+     * Finds the given address in the cache and update replacement data.
+     * Returns the access latency as a side effect.
+     * @param req The req whose block to find.
+     * @param lat The access latency.
+     * @return Pointer to the cache block if found.
+     */
+    SplitBlk* findBlock(Packet * &pkt, int &lat);
+
+    /**
+     * Finds the given address in the cache, do not update replacement data.
+     * @param addr The address to find.
+     * @param asid The address space ID.
+     * @return Pointer to the cache block if found.
+     */
+    SplitBlk* findBlock(Addr addr, int asid) const;
+
+    /**
+     * Find a replacement block for the address provided.
+     * @param req The request to a find a replacement candidate for.
+     * @param writebacks List for any writebacks to be performed.
+     * @param compress_blocks List of blocks to compress, for adaptive comp.
+     * @return The block to place the replacement in.
+     */
+    SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks,
+                            BlkList &compress_blocks);
+
+    /**
+     * Generate the tag from the given address.
+     * @param addr The address to get the tag from.
+     * @return The tag of the address.
+     */
+    Addr extractTag(Addr addr) const
+    {
+        return (addr >> tagShift);
+    }
+
+    /**
+     * Generate the tag from the given address.
+     * @param addr The address to get the tag from.
+     * @param blk Ignored.
+     * @return The tag of the address.
+     */
+    Addr extractTag(Addr addr, SplitBlk *blk) const
+    {
+        return (addr >> tagShift);
+    }
+
+    /**
+     * Calculate the set index from the address.
+     * @param addr The address to get the set from.
+     * @return The set index of the address.
+     */
+    int extractSet(Addr addr) const
+    {
+        return ((addr >> setShift) & setMask);
+    }
+
+    /**
+     * Get the block offset from an address.
+     * @param addr The address to get the offset of.
+     * @return The block offset.
+     */
+    int extractBlkOffset(Addr addr) const
+    {
+        return (addr & blkMask);
+    }
+
+    /**
+     * Align an address to the block size.
+     * @param addr the address to align.
+     * @return The block address.
+     */
+    Addr blkAlign(Addr addr) const
+    {
+        return (addr & ~(Addr)blkMask);
+    }
+
+    /**
+     * Regenerate the block address from the tag.
+     * @param tag The tag of the block.
+     * @param set The set of the block.
+     * @return The block address.
+     */
+    Addr regenerateBlkAddr(Addr tag, unsigned set) const
+    {
+        return ((tag << tagShift) | ((Addr)set << setShift));
+    }
+
+    /**
+     * Return the hit latency.
+     * @return the hit latency.
+     */
+    int getHitLatency() const
+    {
+        return hitLatency;
+    }
+
+    /**
+     * Read the data out of the internal storage of the given cache block.
+     * @param blk The cache block to read.
+     * @param data The buffer to read the data into.
+     * @return The cache block's data.
+     */
+    void readData(SplitBlk *blk, uint8_t *data)
+    {
+        memcpy(data, blk->data, blk->size);
+    }
+
+    /**
+     * Write data into the internal storage of the given cache block. Since in
+     * LRU does not store data differently this just needs to update the size.
+     * @param blk The cache block to write.
+     * @param data The data to write.
+     * @param size The number of bytes to write.
+     * @param writebacks A list for any writebacks to be performed. May be
+     * needed when writing to a compressed block.
+     */
+    void writeData(SplitBlk *blk, uint8_t *data, int size,
+                   PacketList & writebacks)
+    {
+        assert(size <= blkSize);
+        blk->size = size;
+    }
+
+    /**
+     * Perform a block aligned copy from the source address to the destination.
+     * @param source The block-aligned source address.
+     * @param dest The block-aligned destination address.
+     * @param asid The address space DI.
+     * @param writebacks List for any generated writeback requests.
+     */
+    void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks);
+
+    /**
+     * No impl.
+     */
+    void fixCopy(Packet * &pkt, PacketList &writebacks)
+    {
+    }
+
+    /**
+     * Called at end of simulation to complete average block reference stats.
+     */
+    virtual void cleanupRefs();
+};
+
+#endif
diff --git a/src/mem/config/cache.hh b/src/mem/config/cache.hh
new file mode 100644
index 000000000..24da04021
--- /dev/null
+++ b/src/mem/config/cache.hh
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Nathan Binkert
+ */
+
+/**
+ * @file
+ * Central location to configure which cache types we want to build
+ * into the simulator.  In the future, this should probably be
+ * autogenerated by some sort of configuration script.
+ */
+#define USE_CACHE_LRU 1
+#define USE_CACHE_FALRU 1
+// #define USE_CACHE_SPLIT 1
+// #define USE_CACHE_SPLIT_LIFO 1
+#define USE_CACHE_IIC 1
+
diff --git a/src/mem/mem_object.hh b/src/mem/mem_object.hh
index ac547619d..c81ea03d8 100644
--- a/src/mem/mem_object.hh
+++ b/src/mem/mem_object.hh
@@ -50,7 +50,7 @@ class MemObject : public SimObject
 
   public:
     /** Additional function to return the Port of a memory object. */
-    virtual Port *getPort(const std::string &if_name) = 0;
+    virtual Port *getPort(const std::string &if_name, int idx = -1) = 0;
 };
 
 #endif //__MEM_MEM_OBJECT_HH__
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 56dd2bdfa..91298df8c 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -57,6 +57,19 @@ Packet::cmdString() const
     }
 }
 
+const std::string &
+Packet::cmdIdxToString(Packet::Command idx)
+{
+    switch (idx) {
+      case ReadReq:         return ReadReqString;
+      case WriteReq:        return WriteReqString;
+      case WriteReqNoAck:   return WriteReqNoAckString;
+      case ReadResp:        return ReadRespString;
+      case WriteResp:       return WriteRespString;
+      default:              return OtherCmdString;
+    }
+}
+
 /** delete the data pointed to in the data pointer. Ok to call to matter how
  * data was allocted. */
 void
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 403039d96..534db0077 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -41,10 +41,24 @@
 #include "mem/request.hh"
 #include "arch/isa_traits.hh"
 #include "sim/root.hh"
+#include <list>
 
 struct Packet;
 typedef Packet* PacketPtr;
 typedef uint8_t* PacketDataPtr;
+typedef std::list<PacketPtr> PacketList;
+
+//Coherence Flags
+#define NACKED_LINE 1 << 0
+#define SATISFIED 1 << 1
+#define SHARED_LINE 1 << 2
+#define CACHE_LINE_FILL 1 << 3
+#define COMPRESSED 1 << 4
+#define NO_ALLOCATE 1 << 5
+
+//For statistics we need max number of commands, hard code it at
+//20 for now.  @todo fix later
+#define NUM_MEM_CMDS 1 << 9
 
 /**
  * A Packet is used to encapsulate a transfer between two objects in
@@ -55,6 +69,10 @@ typedef uint8_t* PacketDataPtr;
  */
 class Packet
 {
+  public:
+    /** Temporary FLAGS field until cache gets working, this should be in coherence/sender state. */
+    uint64_t flags;
+
   private:
    /** A pointer to the data being transfered.  It can be differnt
     *    sizes at each level of the heirarchy so it belongs in the
@@ -100,8 +118,12 @@ class Packet
     /** Is the 'src' field valid? */
     bool srcValid;
 
+
   public:
 
+    /** Used to calculate latencies for each packet.*/
+    Tick time;
+
     /** The special destination address indicating that the packet
      *   should be routed based on its address. */
     static const short Broadcast = -1;
@@ -149,30 +171,58 @@ class Packet
         IsRequest	= 1 << 4,
         IsResponse 	= 1 << 5,
         NeedsResponse	= 1 << 6,
+        IsSWPrefetch    = 1 << 7,
+        IsHWPrefetch    = 1 << 8
     };
 
   public:
     /** List of all commands associated with a packet. */
     enum Command
     {
+        InvalidCmd      = 0,
         ReadReq		= IsRead  | IsRequest | NeedsResponse,
         WriteReq	= IsWrite | IsRequest | NeedsResponse,
         WriteReqNoAck	= IsWrite | IsRequest,
-        ReadResp	= IsRead  | IsResponse,
-        WriteResp	= IsWrite | IsResponse
+        ReadResp	= IsRead  | IsResponse | NeedsResponse,
+        WriteResp	= IsWrite | IsResponse | NeedsResponse,
+        Writeback       = IsWrite | IsRequest,
+        SoftPFReq       = IsRead  | IsRequest | IsSWPrefetch | NeedsResponse,
+        HardPFReq       = IsRead  | IsRequest | IsHWPrefetch | NeedsResponse,
+        SoftPFResp      = IsRead  | IsResponse | IsSWPrefetch | NeedsResponse,
+        HardPFResp      = IsRead  | IsResponse | IsHWPrefetch | NeedsResponse,
+        InvalidateReq   = IsInvalidate | IsRequest,
+        WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest,
+        UpgradeReq      = IsInvalidate | IsRequest | NeedsResponse,
+        UpgradeResp     = IsInvalidate | IsResponse | NeedsResponse,
+        ReadExReq       = IsRead | IsInvalidate | IsRequest | NeedsResponse,
+        ReadExResp      = IsRead | IsInvalidate | IsResponse | NeedsResponse
     };
 
     /** Return the string name of the cmd field (for debugging and
      *   tracing). */
     const std::string &cmdString() const;
 
+    /** Reutrn the string to a cmd given by idx. */
+    const std::string &cmdIdxToString(Command idx);
+
+    /** Return the index of this command. */
+    inline int cmdToIndex() const { return (int) cmd; }
+
     /** The command field of the packet. */
     Command cmd;
 
     bool isRead() 	 { return (cmd & IsRead)  != 0; }
+    bool isWrite()       { return (cmd & IsWrite) != 0; }
     bool isRequest()	 { return (cmd & IsRequest)  != 0; }
     bool isResponse()	 { return (cmd & IsResponse) != 0; }
     bool needsResponse() { return (cmd & NeedsResponse) != 0; }
+    bool isInvalidate()  { return (cmd * IsInvalidate) != 0; }
+
+    bool isCacheFill() { return (flags & CACHE_LINE_FILL) != 0; }
+    bool isNoAllocate() { return (flags & NO_ALLOCATE) != 0; }
+    bool isCompressed() { return (flags & COMPRESSED) != 0; }
+
+    bool nic_pkt() { assert("Unimplemented\n" && 0); }
 
     /** Possible results of a packet's request. */
     enum Result
@@ -197,6 +247,10 @@ class Packet
 
     Addr getAddr() const { assert(addrSizeValid); return addr; }
     int getSize() const { assert(addrSizeValid); return size; }
+    Addr getOffset(int blkSize) const { return addr & (Addr)(blkSize - 1); }
+
+    void addrOverride(Addr newAddr) { assert(addrSizeValid); addr = newAddr; }
+    void cmdOverride(Command newCmd) { cmd = newCmd; }
 
     /** Constructor.  Note that a Request object must be constructed
      *   first, but the Requests's physical address and size fields
@@ -210,6 +264,21 @@ class Packet
            req(_req), coherence(NULL), senderState(NULL), cmd(_cmd),
            result(Unknown)
     {
+        flags = 0;
+    }
+
+    /** Alternate constructor if you are trying to create a packet with
+     *  a request that is for a whole block, not the address from the req.
+     *  this allows for overriding the size/addr of the req.*/
+    Packet(Request *_req, Command _cmd, short _dest, int _blkSize)
+        :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
+           addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize),
+           dest(_dest),
+           addrSizeValid(_req->validPaddr), srcValid(false),
+           req(_req), coherence(NULL), senderState(NULL), cmd(_cmd),
+           result(Unknown)
+    {
+        flags = 0;
     }
 
     /** Destructor. */
@@ -242,8 +311,9 @@ class Packet
      *   should not be called. */
     void makeTimingResponse() {
         assert(needsResponse());
+        assert(isRequest());
         int icmd = (int)cmd;
-        icmd &= ~(IsRequest | NeedsResponse);
+        icmd &= ~(IsRequest);
         icmd |= IsResponse;
         cmd = (Command)icmd;
         dest = src;
diff --git a/src/mem/page_table.cc b/src/mem/page_table.cc
index b5cecc7da..a34a0393a 100644
--- a/src/mem/page_table.cc
+++ b/src/mem/page_table.cc
@@ -54,6 +54,9 @@ PageTable::PageTable(System *_system, Addr _pageSize)
       system(_system)
 {
     assert(isPowerOf2(pageSize));
+    pTableCache[0].vaddr = 0;
+    pTableCache[1].vaddr = 0;
+    pTableCache[2].vaddr = 0;
 }
 
 PageTable::~PageTable()
@@ -95,7 +98,7 @@ PageTable::allocate(Addr vaddr, int size)
     assert(pageOffset(vaddr) == 0);
 
     for (; size > 0; size -= pageSize, vaddr += pageSize) {
-        std::map<Addr,Addr>::iterator iter = pTable.find(vaddr);
+        m5::hash_map<Addr,Addr>::iterator iter = pTable.find(vaddr);
 
         if (iter != pTable.end()) {
             // already mapped
@@ -103,6 +106,12 @@ PageTable::allocate(Addr vaddr, int size)
         }
 
         pTable[vaddr] = system->new_page();
+        pTableCache[2].paddr = pTableCache[1].paddr;
+        pTableCache[2].vaddr = pTableCache[1].vaddr;
+        pTableCache[1].paddr = pTableCache[0].paddr;
+        pTableCache[1].vaddr = pTableCache[0].vaddr;
+        pTableCache[0].paddr = pTable[vaddr];
+        pTableCache[0].vaddr = vaddr;
     }
 }
 
@@ -112,7 +121,22 @@ bool
 PageTable::translate(Addr vaddr, Addr &paddr)
 {
     Addr page_addr = pageAlign(vaddr);
-    std::map<Addr,Addr>::iterator iter = pTable.find(page_addr);
+    paddr = 0;
+
+    if (pTableCache[0].vaddr == vaddr) {
+        paddr = pTableCache[0].paddr;
+        return true;
+    }
+    if (pTableCache[1].vaddr == vaddr) {
+        paddr = pTableCache[1].paddr;
+        return true;
+    }
+    if (pTableCache[2].vaddr == vaddr) {
+        paddr = pTableCache[2].paddr;
+        return true;
+    }
+
+    m5::hash_map<Addr,Addr>::iterator iter = pTable.find(page_addr);
 
     if (iter == pTable.end()) {
         return false;
@@ -130,7 +154,7 @@ PageTable::translate(RequestPtr &req)
     assert(pageAlign(req->getVaddr() + req->getSize() - 1)
            == pageAlign(req->getVaddr()));
     if (!translate(req->getVaddr(), paddr)) {
-        return genMachineCheckFault();
+        return genPageTableFault(req->getVaddr());
     }
     req->setPaddr(paddr);
     return page_check(req->getPaddr(), req->getSize());
diff --git a/src/mem/page_table.hh b/src/mem/page_table.hh
index f7212d423..494c0ce9a 100644
--- a/src/mem/page_table.hh
+++ b/src/mem/page_table.hh
@@ -37,9 +37,9 @@
 #define __PAGE_TABLE__
 
 #include <string>
-#include <map>
 
 #include "arch/isa_traits.hh"
+#include "base/hashmap.hh"
 #include "base/trace.hh"
 #include "mem/request.hh"
 #include "mem/packet.hh"
@@ -53,7 +53,14 @@ class System;
 class PageTable
 {
   protected:
-    std::map<Addr,Addr> pTable;
+    m5::hash_map<Addr,Addr> pTable;
+
+    struct cacheElement {
+        Addr paddr;
+        Addr vaddr;
+    } ;
+
+    struct cacheElement pTableCache[3];
 
     const Addr pageSize;
     const Addr offsetMask;
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index fb31fb4a3..2d66602ab 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -173,9 +173,9 @@ PhysicalMemory::doFunctionalAccess(Packet *pkt)
 }
 
 Port *
-PhysicalMemory::getPort(const std::string &if_name)
+PhysicalMemory::getPort(const std::string &if_name, int idx)
 {
-    if (if_name == "") {
+    if (if_name == "port" && idx == -1) {
         if (port != NULL)
            panic("PhysicalMemory::getPort: additional port requested to memory!");
         port = new MemoryPort(name() + "-port", this);
diff --git a/src/mem/physical.hh b/src/mem/physical.hh
index 88ea543da..50fa75ed3 100644
--- a/src/mem/physical.hh
+++ b/src/mem/physical.hh
@@ -108,7 +108,7 @@ class PhysicalMemory : public MemObject
   public:
     int deviceBlockSize();
     void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop);
-    virtual Port *getPort(const std::string &if_name);
+    virtual Port *getPort(const std::string &if_name, int idx = -1);
     void virtual init();
 
     // fast back-door memory access for vtophys(), remote gdb, etc.
diff --git a/src/mem/port.hh b/src/mem/port.hh
index 2edad095e..17b1f4a00 100644
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -74,7 +74,7 @@ class Port
   private:
 
     /** Descriptive name (for DPRINTF output) */
-    const std::string portName;
+    mutable std::string portName;
 
     /** A pointer to the peer port.  Ports always come in pairs, that way they
         can use a standardized interface to communicate between different
@@ -83,6 +83,10 @@ class Port
 
   public:
 
+    Port()
+        : peer(NULL)
+    { }
+
     /**
      * Constructor.
      *
@@ -105,6 +109,9 @@ class Port
         RangeChange
     };
 
+    void setName(const std::string &name)
+    { portName = name; }
+
     /** Function to set the pointer for the peer port.
         @todo should be called by the configuration stuff (python).
     */
diff --git a/src/mem/request.hh b/src/mem/request.hh
index af1d6d8a8..a1524f807 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -44,6 +44,7 @@ class Request;
 
 typedef Request* RequestPtr;
 
+
 /** The request is a Load locked/store conditional. */
 const unsigned LOCKED		= 0x001;
 /** The virtual address is also the physical address. */
@@ -62,6 +63,8 @@ const unsigned PF_EXCLUSIVE	= 0x100;
 const unsigned EVICT_NEXT	= 0x200;
 /** The request should ignore unaligned access faults */
 const unsigned NO_ALIGN_FAULT   = 0x400;
+/** The request was an instruction read. */
+const unsigned INST_READ        = 0x800;
 
 class Request
 {
@@ -224,6 +227,11 @@ class Request
     /** Accessor function for pc.*/
     Addr getPC() { assert(validPC); return pc; }
 
+    /** Accessor Function to Check Cacheability. */
+    bool isUncacheable() { return getFlags() & UNCACHEABLE; }
+
+    bool isInstRead() { return getFlags() & INST_READ; }
+
     friend class Packet;
 };
 
diff --git a/src/python/SConscript b/src/python/SConscript
index 7b0f591eb..c9e713199 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -75,24 +75,35 @@ def addPkg(pkgdir):
 # build_env flags.
 def MakeDefinesPyFile(target, source, env):
     f = file(str(target[0]), 'w')
-    print >>f, "m5_build_env = ",
-    print >>f, source[0]
+    print >>f, "m5_build_env = ", source[0]
     f.close()
 
 optionDict = dict([(opt, env[opt]) for opt in env.ExportOptions])
 env.Command('m5/defines.py', Value(optionDict), MakeDefinesPyFile)
 
+def MakeInfoPyFile(target, source, env):
+    f = file(str(target[0]), 'w')
+    for src in source:
+        data = ''.join(file(src.srcnode().abspath, 'r').xreadlines())
+        print >>f, "%s = %s" % (src, repr(data))
+    f.close()
+
+env.Command('m5/info.py',
+            [ '#/AUTHORS', '#/LICENSE', '#/README', '#/RELEASE_NOTES' ],
+            MakeInfoPyFile)
+
 # Now specify the packages & files for the zip archive.
 addPkg('m5')
 pyzip_files.append('m5/defines.py')
+pyzip_files.append('m5/info.py')
 pyzip_files.append(join(env['ROOT'], 'util/pbs/jobfile.py'))
 
-env.Command(['swig/main_wrap.cc', 'm5/main.py'],
-            'swig/main.i',
+env.Command(['swig/cc_main_wrap.cc', 'm5/cc_main.py'],
+            'swig/cc_main.i',
             '$SWIG $SWIGFLAGS -outdir ${TARGETS[1].dir} '
             '-o ${TARGETS[0]} $SOURCES')
 
-pyzip_dep_files.append('m5/main.py')
+pyzip_dep_files.append('m5/cc_main.py')
 
 # Action function to build the zip archive.  Uses the PyZipFile module
 # included in the standard Python library.
diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py
index 60a61d66e..3d0e3defa 100644
--- a/src/python/m5/__init__.py
+++ b/src/python/m5/__init__.py
@@ -27,14 +27,14 @@
 # Authors: Nathan Binkert
 #          Steve Reinhardt
 
-import sys, os, time, atexit, optparse
+import atexit, os, sys
 
 # import the SWIG-wrapped main C++ functions
-import main
+import cc_main
 # import a few SWIG-wrapped items (those that are likely to be used
 # directly by user scripts) completely into this module for
 # convenience
-from main import simulate, SimLoopExitEvent
+from cc_main import simulate, SimLoopExitEvent
 
 # import the m5 compile options
 import defines
@@ -57,20 +57,6 @@ def AddToPath(path):
     # so place the new dir right after that.
     sys.path.insert(1, path)
 
-
-# Callback to set trace flags.  Not necessarily the best way to do
-# things in the long run (particularly if we change how these global
-# options are handled).
-def setTraceFlags(option, opt_str, value, parser):
-    objects.Trace.flags = value
-
-# Standard optparse options.  Need to be explicitly included by the
-# user script when it calls optparse.OptionParser().
-standardOptions = [
-    optparse.make_option("--traceflags", type="string", action="callback",
-                         callback=setTraceFlags)
-    ]
-
 # make a SmartDict out of the build options for our local use
 import smartdict
 build_env = smartdict.SmartDict()
@@ -80,16 +66,26 @@ build_env.update(defines.m5_build_env)
 env = smartdict.SmartDict()
 env.update(os.environ)
 
+# Function to provide to C++ so it can look up instances based on paths
+def resolveSimObject(name):
+    obj = config.instanceDict[name]
+    return obj.getCCObject()
+
+from main import options, arguments, main
+
 # The final hook to generate .ini files.  Called from the user script
 # once the config is built.
 def instantiate(root):
     config.ticks_per_sec = float(root.clock.frequency)
     # ugly temporary hack to get output to config.ini
-    sys.stdout = file('config.ini', 'w')
+    sys.stdout = file(os.path.join(options.outdir, 'config.ini'), 'w')
     root.print_ini()
     sys.stdout.close() # close config.ini
     sys.stdout = sys.__stdout__ # restore to original
-    main.initialize()  # load config.ini into C++ and process it
+    cc_main.loadIniFile(resolveSimObject)  # load config.ini into C++
+    root.createCCObject()
+    root.connectPorts()
+    cc_main.finalInit()
     noDot = True # temporary until we fix dot
     if not noDot:
        dot = pydot.Dot()
@@ -103,12 +99,105 @@ def instantiate(root):
 
 # Export curTick to user script.
 def curTick():
-    return main.cvar.curTick
+    return cc_main.cvar.curTick
 
 # register our C++ exit callback function with Python
-atexit.register(main.doExitCleanup)
+atexit.register(cc_main.doExitCleanup)
 
 # This import allows user scripts to reference 'm5.objects.Foo' after
 # just doing an 'import m5' (without an 'import m5.objects').  May not
 # matter since most scripts will probably 'from m5.objects import *'.
 import objects
+
+# This loops until all objects have been fully drained.
+def doDrain(root):
+    all_drained = drain(root)
+    while (not all_drained):
+        all_drained = drain(root)
+
+# Tries to drain all objects.  Draining might not be completed unless
+# all objects return that they are drained on the first call.  This is
+# because as objects drain they may cause other objects to no longer
+# be drained.
+def drain(root):
+    all_drained = False
+    drain_event = cc_main.createCountedDrain()
+    unready_objects = root.startDrain(drain_event, True)
+    # If we've got some objects that can't drain immediately, then simulate
+    if unready_objects > 0:
+        drain_event.setCount(unready_objects)
+        simulate()
+    else:
+        all_drained = True
+    cc_main.cleanupCountedDrain(drain_event)
+    return all_drained
+
+def resume(root):
+    root.resume()
+
+def checkpoint(root, dir):
+    if not isinstance(root, objects.Root):
+        raise TypeError, "Object is not a root object. Checkpoint must be called on a root object."
+    doDrain(root)
+    print "Writing checkpoint"
+    cc_main.serializeAll(dir)
+    resume(root)
+
+def restoreCheckpoint(root, dir):
+    print "Restoring from checkpoint"
+    cc_main.unserializeAll(dir)
+    resume(root)
+
+def changeToAtomic(system):
+    if not isinstance(system, objects.Root) and not isinstance(system, System):
+        raise TypeError, "Object is not a root or system object.  Checkpoint must be "
+        "called on a root object."
+    doDrain(system)
+    print "Changing memory mode to atomic"
+    system.changeTiming(cc_main.SimObject.Atomic)
+    resume(system)
+
+def changeToTiming(system):
+    if not isinstance(system, objects.Root) and not isinstance(system, System):
+        raise TypeError, "Object is not a root or system object.  Checkpoint must be "
+        "called on a root object."
+    doDrain(system)
+    print "Changing memory mode to timing"
+    system.changeTiming(cc_main.SimObject.Timing)
+    resume(system)
+
+def switchCpus(cpuList):
+    if not isinstance(cpuList, list):
+        raise RuntimeError, "Must pass a list to this function"
+    for i in cpuList:
+        if not isinstance(i, tuple):
+            raise RuntimeError, "List must have tuples of (oldCPU,newCPU)"
+
+    [old_cpus, new_cpus] = zip(*cpuList)
+
+    for cpu in old_cpus:
+        if not isinstance(cpu, objects.BaseCPU):
+            raise TypeError, "%s is not of type BaseCPU", cpu
+    for cpu in new_cpus:
+        if not isinstance(cpu, objects.BaseCPU):
+            raise TypeError, "%s is not of type BaseCPU", cpu
+
+    # Drain all of the individual CPUs
+    drain_event = cc_main.createCountedDrain()
+    unready_cpus = 0
+    for old_cpu in old_cpus:
+        unready_cpus += old_cpu.startDrain(drain_event, False)
+    # If we've got some objects that can't drain immediately, then simulate
+    if unready_cpus > 0:
+        drain_event.setCount(unready_cpus)
+        simulate()
+    cc_main.cleanupCountedDrain(drain_event)
+    # Now all of the CPUs are ready to be switched out
+    for old_cpu in old_cpus:
+        old_cpu._ccObject.switchOut()
+    index = 0
+    print "Switching CPUs"
+    for new_cpu in new_cpus:
+        new_cpu.takeOverFrom(old_cpus[index])
+        new_cpu._ccObject.resume()
+        index += 1
diff --git a/src/python/m5/attrdict.py b/src/python/m5/attrdict.py
new file mode 100644
index 000000000..4ee7f1b8c
--- /dev/null
+++ b/src/python/m5/attrdict.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+__all__ = [ 'attrdict' ]
+
+class attrdict(dict):
+    def __getattr__(self, attr):
+        if attr in self:
+            return self.__getitem__(attr)
+        return super(attrdict, self).__getattribute__(attr)
+
+    def __setattr__(self, attr, value):
+        if attr in dir(self):
+            return super(attrdict, self).__setattr__(attr, value)
+        return self.__setitem__(attr, value)
+
+    def __delattr__(self, attr):
+        if attr in self:
+            return self.__delitem__(attr)
+        return super(attrdict, self).__delattr__(attr, value)
+
+if __name__ == '__main__':
+    x = attrdict()
+    x.y = 1
+    x['z'] = 2
+    print x['y'], x.y
+    print x['z'], x.z
+    print dir(x)
+    print x
+
+    print
+
+    del x['y']
+    del x.z
+    print dir(x)
+    print(x)
diff --git a/src/python/m5/config.py b/src/python/m5/config.py
index 97e13c900..8eed28dcc 100644
--- a/src/python/m5/config.py
+++ b/src/python/m5/config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2004-2005 The Regents of The University of Michigan
+# Copyright (c) 2004-2006 The Regents of The University of Michigan
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -27,10 +27,10 @@
 # Authors: Steve Reinhardt
 #          Nathan Binkert
 
-import os, re, sys, types, inspect
+import os, re, sys, types, inspect, copy
 
 import m5
-from m5 import panic
+from m5 import panic, cc_main
 from convert import *
 from multidict import multidict
 
@@ -84,65 +84,22 @@ class Singleton(type):
 #
 # Once a set of Python objects have been instantiated in a hierarchy,
 # calling 'instantiate(obj)' (where obj is the root of the hierarchy)
-# will generate a .ini file.  See simple-4cpu.py for an example
-# (corresponding to m5-test/simple-4cpu.ini).
+# will generate a .ini file.
 #
 #####################################################################
 
-#####################################################################
-#
-# ConfigNode/SimObject classes
-#
-# The Python class hierarchy rooted by ConfigNode (which is the base
-# class of SimObject, which in turn is the base class of all other M5
-# SimObject classes) has special attribute behavior.  In general, an
-# object in this hierarchy has three categories of attribute-like
-# things:
-#
-# 1. Regular Python methods and variables.  These must start with an
-# underscore to be treated normally.
-#
-# 2. SimObject parameters.  These values are stored as normal Python
-# attributes, but all assignments to these attributes are checked
-# against the pre-defined set of parameters stored in the class's
-# _params dictionary.  Assignments to attributes that do not
-# correspond to predefined parameters, or that are not of the correct
-# type, incur runtime errors.
+# dict to look up SimObjects based on path
+instanceDict = {}
+
+#############################
 #
-# 3. Hierarchy children.  The child nodes of a ConfigNode are stored
-# in the node's _children dictionary, but can be accessed using the
-# Python attribute dot-notation (just as they are printed out by the
-# simulator).  Children cannot be created using attribute assigment;
-# they must be added by specifying the parent node in the child's
-# constructor or using the '+=' operator.
-
-# The SimObject parameters are the most complex, for a few reasons.
-# First, both parameter descriptions and parameter values are
-# inherited.  Thus parameter description lookup must go up the
-# inheritance chain like normal attribute lookup, but this behavior
-# must be explicitly coded since the lookup occurs in each class's
-# _params attribute.  Second, because parameter values can be set
-# on SimObject classes (to implement default values), the parameter
-# checking behavior must be enforced on class attribute assignments as
-# well as instance attribute assignments.  Finally, because we allow
-# class specialization via inheritance (e.g., see the L1Cache class in
-# the simple-4cpu.py example), we must do parameter checking even on
-# class instantiation.  To provide all these features, we use a
-# metaclass to define most of the SimObject parameter behavior for
-# this class hierarchy.
+# Utility methods
 #
-#####################################################################
+#############################
 
 def isSimObject(value):
     return isinstance(value, SimObject)
 
-def isSimObjectClass(value):
-    try:
-        return issubclass(value, SimObject)
-    except TypeError:
-        # happens if value is not a class at all
-        return False
-
 def isSimObjectSequence(value):
     if not isinstance(value, (list, tuple)) or len(value) == 0:
         return False
@@ -153,22 +110,9 @@ def isSimObjectSequence(value):
 
     return True
 
-def isSimObjectClassSequence(value):
-    if not isinstance(value, (list, tuple)) or len(value) == 0:
-        return False
-
-    for val in value:
-        if not isNullPointer(val) and not isSimObjectClass(val):
-            return False
-
-    return True
-
 def isSimObjectOrSequence(value):
     return isSimObject(value) or isSimObjectSequence(value)
 
-def isSimObjectClassOrSequence(value):
-    return isSimObjectClass(value) or isSimObjectClassSequence(value)
-
 def isNullPointer(value):
     return isinstance(value, NullSimObject)
 
@@ -188,40 +132,36 @@ def applyOrMap(objOrSeq, meth, *args, **kwargs):
         return [applyMethod(o, meth, *args, **kwargs) for o in objOrSeq]
 
 
-# The metaclass for ConfigNode (and thus for everything that derives
-# from ConfigNode, including SimObject).  This class controls how new
-# classes that derive from ConfigNode are instantiated, and provides
-# inherited class behavior (just like a class controls how instances
-# of that class are instantiated, and provides inherited instance
-# behavior).
+# The metaclass for SimObject.  This class controls how new classes
+# that derive from SimObject are instantiated, and provides inherited
+# class behavior (just like a class controls how instances of that
+# class are instantiated, and provides inherited instance behavior).
 class MetaSimObject(type):
     # Attributes that can be set only at initialization time
     init_keywords = { 'abstract' : types.BooleanType,
                       'type' : types.StringType }
     # Attributes that can be set any time
-    keywords = { 'check' : types.FunctionType,
-                 'children' : types.ListType }
+    keywords = { 'check' : types.FunctionType }
 
     # __new__ is called before __init__, and is where the statements
     # in the body of the class definition get loaded into the class's
-    # __dict__.  We intercept this to filter out parameter assignments
+    # __dict__.  We intercept this to filter out parameter & port assignments
     # and only allow "private" attributes to be passed to the base
     # __new__ (starting with underscore).
     def __new__(mcls, name, bases, dict):
-        if dict.has_key('_init_dict'):
-            # must have been called from makeSubclass() rather than
-            # via Python class declaration; bypass filtering process.
-            cls_dict = dict
-        else:
-            # Copy "private" attributes (including special methods
-            # such as __new__) to the official dict.  Everything else
-            # goes in _init_dict to be filtered in __init__.
-            cls_dict = {}
-            for key,val in dict.items():
-                if key.startswith('_'):
-                    cls_dict[key] = val
-                    del dict[key]
-            cls_dict['_init_dict'] = dict
+        # Copy "private" attributes, functions, and classes to the
+        # official dict.  Everything else goes in _init_dict to be
+        # filtered in __init__.
+        cls_dict = {}
+        value_dict = {}
+        for key,val in dict.items():
+            if key.startswith('_') or isinstance(val, (types.FunctionType,
+                                                       types.TypeType)):
+                cls_dict[key] = val
+            else:
+                # must be a param/port setting
+                value_dict[key] = val
+        cls_dict['_value_dict'] = value_dict
         return super(MetaSimObject, mcls).__new__(mcls, name, bases, cls_dict)
 
     # subclass initialization
@@ -231,10 +171,15 @@ class MetaSimObject(type):
         super(MetaSimObject, cls).__init__(name, bases, dict)
 
         # initialize required attributes
-        cls._params = multidict()
-        cls._values = multidict()
-        cls._instantiated = False # really instantiated or subclassed
-        cls._anon_subclass_counter = 0
+
+        # class-only attributes
+        cls._params = multidict() # param descriptions
+        cls._ports = multidict()  # port descriptions
+
+        # class or instance attributes
+        cls._values = multidict()   # param values
+        cls._port_map = multidict() # port bindings
+        cls._instantiated = False # really instantiated, cloned, or subclassed
 
         # We don't support multiple inheritance.  If you want to, you
         # must fix multidict to deal with it properly.
@@ -243,22 +188,34 @@ class MetaSimObject(type):
 
         base = bases[0]
 
-        # the only time the following is not true is when we define
-        # the SimObject class itself
+        # Set up general inheritance via multidicts.  A subclass will
+        # inherit all its settings from the base class.  The only time
+        # the following is not true is when we define the SimObject
+        # class itself (in which case the multidicts have no parent).
         if isinstance(base, MetaSimObject):
             cls._params.parent = base._params
+            cls._ports.parent = base._ports
             cls._values.parent = base._values
+            cls._port_map.parent = base._port_map
+            # mark base as having been subclassed
             base._instantiated = True
 
-        # now process the _init_dict items
-        for key,val in cls._init_dict.items():
-            if isinstance(val, (types.FunctionType, types.TypeType)):
-                type.__setattr__(cls, key, val)
-
+        # Now process the _value_dict items.  They could be defining
+        # new (or overriding existing) parameters or ports, setting
+        # class keywords (e.g., 'abstract'), or setting parameter
+        # values or port bindings.  The first 3 can only be set when
+        # the class is defined, so we handle them here.  The others
+        # can be set later too, so just emulate that by calling
+        # setattr().
+        for key,val in cls._value_dict.items():
             # param descriptions
-            elif isinstance(val, ParamDesc):
+            if isinstance(val, ParamDesc):
                 cls._new_param(key, val)
 
+            # port objects
+            elif isinstance(val, Port):
+                cls._ports[key] = val
+
             # init-time-only keywords
             elif cls.init_keywords.has_key(key):
                 cls._set_keyword(key, val, cls.init_keywords[key])
@@ -267,27 +224,6 @@ class MetaSimObject(type):
             else:
                 setattr(cls, key, val)
 
-        # Pull the deep-copy memoization dict out of the class dict if
-        # it's there...
-        memo = cls.__dict__.get('_memo', {})
-
-        # Handle SimObject values
-        for key,val in cls._values.iteritems():
-            # SimObject instances need to be promoted to classes.
-            # Existing classes should not have any instance values, so
-            # these can only occur at the lowest level dict (the
-            # parameters just being set in this class definition).
-            if isSimObjectOrSequence(val):
-                assert(val == cls._values.local[key])
-                cls._values[key] = applyOrMap(val, 'makeClass', memo)
-            # SimObject classes need to be subclassed so that
-            # parameters that get set at this level only affect this
-            # level and derivatives.
-            elif isSimObjectClassOrSequence(val):
-                assert(not cls._values.local.has_key(key))
-                cls._values[key] = applyOrMap(val, 'makeSubclass', {}, memo)
-
-
     def _set_keyword(cls, keyword, val, kwtype):
         if not isinstance(val, kwtype):
             raise TypeError, 'keyword %s has bad type %s (expecting %s)' % \
@@ -313,15 +249,19 @@ class MetaSimObject(type):
             cls._set_keyword(attr, value, cls.keywords[attr])
             return
 
-        # must be SimObject param
-        param = cls._params.get(attr, None)
-        if param:
-            # It's ok: set attribute by delegating to 'object' class.
-            if isSimObjectOrSequence(value) and cls._instantiated:
-                raise AttributeError, \
-                  "Cannot set SimObject parameter '%s' after\n" \
+        if cls._ports.has_key(attr):
+            self._ports[attr].connect(self, attr, value)
+            return
+
+        if isSimObjectOrSequence(value) and cls._instantiated:
+            raise RuntimeError, \
+                  "cannot set SimObject parameter '%s' after\n" \
                   "    class %s has been instantiated or subclassed" \
                   % (attr, cls.__name__)
+
+        # check for param
+        param = cls._params.get(attr, None)
+        if param:
             try:
                 cls._values[attr] = param.convert(value)
             except Exception, e:
@@ -329,12 +269,12 @@ class MetaSimObject(type):
                       (e, cls.__name__, attr, value)
                 e.args = (msg, )
                 raise
-        # I would love to get rid of this
         elif isSimObjectOrSequence(value):
-           cls._values[attr] = value
+            # if RHS is a SimObject, it's an implicit child assignment
+            cls._values[attr] = value
         else:
             raise AttributeError, \
-                  "Class %s has no parameter %s" % (cls.__name__, attr)
+                  "Class %s has no parameter \'%s\'" % (cls.__name__, attr)
 
     def __getattr__(cls, attr):
         if cls._values.has_key(attr):
@@ -343,23 +283,7 @@ class MetaSimObject(type):
         raise AttributeError, \
               "object '%s' has no attribute '%s'" % (cls.__name__, attr)
 
-    # Create a subclass of this class.  Basically a function interface
-    # to the standard Python class definition mechanism, primarily for
-    # internal use.  'memo' dict param supports "deep copy" (really
-    # "deep subclass") operations... within a given operation,
-    # multiple references to a class should result in a single
-    # subclass object with multiple references to it (as opposed to
-    # mutiple unique subclasses).
-    def makeSubclass(cls, init_dict, memo = {}):
-        subcls = memo.get(cls)
-        if not subcls:
-            name = cls.__name__ + '_' + str(cls._anon_subclass_counter)
-            cls._anon_subclass_counter += 1
-            subcls = MetaSimObject(name, (cls,),
-                                   { '_init_dict': init_dict, '_memo': memo })
-        return subcls
-
-# The ConfigNode class is the root of the special hierarchy.  Most of
+# The SimObject class is the root of the special hierarchy.  Most of
 # the code in this class deals with the configuration hierarchy itself
 # (parent/child node relationships).
 class SimObject(object):
@@ -367,82 +291,79 @@ class SimObject(object):
     # get this metaclass.
     __metaclass__ = MetaSimObject
 
-    # __new__ operator allocates new instances of the class.  We
-    # override it here just to support "deep instantiation" operation
-    # via the _memo dict.  When recursively instantiating an object
-    # hierarchy we want to make sure that each class is instantiated
-    # only once, and that if there are multiple references to the same
-    # original class, we end up with the corresponding instantiated
-    # references all pointing to the same instance.
-    def __new__(cls, _memo = None, **kwargs):
-        if _memo is not None and _memo.has_key(cls):
-            # return previously instantiated object
-            assert(len(kwargs) == 0)
-            return _memo[cls]
-        else:
-            # Need a new one... if it needs to be memoized, this will
-            # happen in __init__.  We defer the insertion until then
-            # so __init__ can use the memo dict to tell whether or not
-            # to perform the initialization.
-            return super(SimObject, cls).__new__(cls, **kwargs)
-
-    # Initialize new instance previously allocated by __new__.  For
-    # objects with SimObject-valued params, we need to recursively
-    # instantiate the classes represented by those param values as
-    # well (in a consistent "deep copy"-style fashion; see comment
-    # above).
-    def __init__(self, _memo = None, **kwargs):
-        if _memo is not None:
-            # We're inside a "deep instantiation"
-            assert(isinstance(_memo, dict))
-            assert(len(kwargs) == 0)
-            if _memo.has_key(self.__class__):
-                # __new__ returned an existing, already initialized
-                # instance, so there's nothing to do here
-                assert(_memo[self.__class__] == self)
-                return
-            # no pre-existing object, so remember this one here
-            _memo[self.__class__] = self
-        else:
-            # This is a new top-level instantiation... don't memoize
-            # this objcet, but prepare to memoize any recursively
-            # instantiated objects.
-            _memo = {}
-
-        self.__class__._instantiated = True
+    # Initialize new instance.  For objects with SimObject-valued
+    # children, we need to recursively clone the classes represented
+    # by those param values as well in a consistent "deep copy"-style
+    # fashion.  That is, we want to make sure that each instance is
+    # cloned only once, and that if there are multiple references to
+    # the same original object, we end up with the corresponding
+    # cloned references all pointing to the same cloned instance.
+    def __init__(self, **kwargs):
+        ancestor = kwargs.get('_ancestor')
+        memo_dict = kwargs.get('_memo')
+        if memo_dict is None:
+            # prepare to memoize any recursively instantiated objects
+            memo_dict = {}
+        elif ancestor:
+            # memoize me now to avoid problems with recursive calls
+            memo_dict[ancestor] = self
+
+        if not ancestor:
+            ancestor = self.__class__
+        ancestor._instantiated = True
 
+        # initialize required attributes
+        self._parent = None
         self._children = {}
+        self._ccObject = None  # pointer to C++ object
+        self._instantiated = False # really "cloned"
+
         # Inherit parameter values from class using multidict so
         # individual value settings can be overridden.
-        self._values = multidict(self.__class__._values)
-        # For SimObject-valued parameters, the class should have
-        # classes (not instances) for the values.  We need to
-        # instantiate these classes rather than just inheriting the
-        # class object.
-        for key,val in self.__class__._values.iteritems():
-            if isSimObjectClass(val):
-                setattr(self, key, val(_memo))
-            elif isSimObjectClassSequence(val) and len(val):
-                setattr(self, key, [ v(_memo) for v in val ])
+        self._values = multidict(ancestor._values)
+        # clone SimObject-valued parameters
+        for key,val in ancestor._values.iteritems():
+            if isSimObject(val):
+                setattr(self, key, val(_memo=memo_dict))
+            elif isSimObjectSequence(val) and len(val):
+                setattr(self, key, [ v(_memo=memo_dict) for v in val ])
+        # clone port references.  no need to use a multidict here
+        # since we will be creating new references for all ports.
+        self._port_map = {}
+        for key,val in ancestor._port_map.iteritems():
+            self._port_map[key] = applyOrMap(val, 'clone', memo_dict)
         # apply attribute assignments from keyword args, if any
         for key,val in kwargs.iteritems():
             setattr(self, key, val)
 
-    # Use this instance as a template to create a new class.
-    def makeClass(self, memo = {}):
-        cls = memo.get(self)
-        if not cls:
-            cls =  self.__class__.makeSubclass(self._values.local)
-            memo[self] = cls
-        return cls
-
-    # Direct instantiation of instances (cloning) is no longer
-    # allowed; must generate class from instance first.
+    # "Clone" the current instance by creating another instance of
+    # this instance's class, but that inherits its parameter values
+    # and port mappings from the current instance.  If we're in a
+    # "deep copy" recursive clone, check the _memo dict to see if
+    # we've already cloned this instance.
     def __call__(self, **kwargs):
-        raise TypeError, "cannot instantiate SimObject; "\
-              "use makeClass() to make class first"
+        memo_dict = kwargs.get('_memo')
+        if memo_dict is None:
+            # no memo_dict: must be top-level clone operation.
+            # this is only allowed at the root of a hierarchy
+            if self._parent:
+                raise RuntimeError, "attempt to clone object %s " \
+                      "not at the root of a tree (parent = %s)" \
+                      % (self, self._parent)
+            # create a new dict and use that.
+            memo_dict = {}
+            kwargs['_memo'] = memo_dict
+        elif memo_dict.has_key(self):
+            # clone already done & memoized
+            return memo_dict[self]
+        return self.__class__(_ancestor = self, **kwargs)
 
     def __getattr__(self, attr):
+        if self._ports.has_key(attr):
+            # return reference that can be assigned to another port
+            # via __setattr__
+            return self._ports[attr].makeRef(self, attr)
+
         if self._values.has_key(attr):
             return self._values[attr]
 
@@ -457,10 +378,19 @@ class SimObject(object):
             object.__setattr__(self, attr, value)
             return
 
+        if self._ports.has_key(attr):
+            # set up port connection
+            self._ports[attr].connect(self, attr, value)
+            return
+
+        if isSimObjectOrSequence(value) and self._instantiated:
+            raise RuntimeError, \
+                  "cannot set SimObject parameter '%s' after\n" \
+                  "    instance been cloned %s" % (attr, `self`)
+
         # must be SimObject param
         param = self._params.get(attr, None)
         if param:
-            # It's ok: set attribute by delegating to 'object' class.
             try:
                 value = param.convert(value)
             except Exception, e:
@@ -468,7 +398,6 @@ class SimObject(object):
                       (e, self.__class__.__name__, attr, value)
                 e.args = (msg, )
                 raise
-        # I would love to get rid of this
         elif isSimObjectOrSequence(value):
             pass
         else:
@@ -507,13 +436,13 @@ class SimObject(object):
         self._children[name] = value
 
     def set_path(self, parent, name):
-        if not hasattr(self, '_parent'):
+        if not self._parent:
             self._parent = parent
             self._name = name
             parent.add_child(name, self)
 
     def path(self):
-        if not hasattr(self, '_parent'):
+        if not self._parent:
             return 'root'
         ppath = self._parent.path()
         if ppath == 'root':
@@ -554,6 +483,8 @@ class SimObject(object):
     def print_ini(self):
         print '[' + self.path() + ']'	# .ini section header
 
+        instanceDict[self.path()] = self
+
         if hasattr(self, 'type') and not isinstance(self, ParamContext):
             print 'type=%s' % self.type
 
@@ -585,6 +516,59 @@ class SimObject(object):
         for child in child_names:
             self._children[child].print_ini()
 
+    # Call C++ to create C++ object corresponding to this object and
+    # (recursively) all its children
+    def createCCObject(self):
+        self.getCCObject() # force creation
+        for child in self._children.itervalues():
+            child.createCCObject()
+
+    # Get C++ object corresponding to this object, calling C++ if
+    # necessary to construct it.  Does *not* recursively create
+    # children.
+    def getCCObject(self):
+        if not self._ccObject:
+            self._ccObject = -1 # flag to catch cycles in recursion
+            self._ccObject = cc_main.createSimObject(self.path())
+        elif self._ccObject == -1:
+            raise RuntimeError, "%s: recursive call to getCCObject()" \
+                  % self.path()
+        return self._ccObject
+
+    # Create C++ port connections corresponding to the connections in
+    # _port_map (& recursively for all children)
+    def connectPorts(self):
+        for portRef in self._port_map.itervalues():
+            applyOrMap(portRef, 'ccConnect')
+        for child in self._children.itervalues():
+            child.connectPorts()
+
+    def startDrain(self, drain_event, recursive):
+        count = 0
+        # ParamContexts don't serialize
+        if isinstance(self, SimObject) and not isinstance(self, ParamContext):
+            count += self._ccObject.drain(drain_event)
+        if recursive:
+            for child in self._children.itervalues():
+                count += child.startDrain(drain_event, True)
+        return count
+
+    def resume(self):
+        if isinstance(self, SimObject) and not isinstance(self, ParamContext):
+            self._ccObject.resume()
+        for child in self._children.itervalues():
+            child.resume()
+
+    def changeTiming(self, mode):
+        if isinstance(self, System):
+            self._ccObject.setMemoryMode(mode)
+        for child in self._children.itervalues():
+            child.changeTiming(mode)
+
+    def takeOverFrom(self, old_cpu):
+        cpu_ptr = cc_main.convertToBaseCPUPtr(old_cpu._ccObject)
+        self._ccObject.takeOverFrom(cpu_ptr)
+
     # generate output file for 'dot' to display as a pretty graph.
     # this code is currently broken.
     def outputDot(self, dot):
@@ -675,9 +659,9 @@ class BaseProxy(object):
 
         if self._search_up:
             while not done:
-                try: obj = obj._parent
-                except: break
-
+                obj = obj._parent
+                if not obj:
+                    break
                 result, done = self.find(obj)
 
         if not done:
@@ -793,16 +777,16 @@ Self = ProxyFactory(search_self = True, search_up = False)
 #
 # Parameter description classes
 #
-# The _params dictionary in each class maps parameter names to
-# either a Param or a VectorParam object.  These objects contain the
+# The _params dictionary in each class maps parameter names to either
+# a Param or a VectorParam object.  These objects contain the
 # parameter description string, the parameter type, and the default
-# value (loaded from the PARAM section of the .odesc files).  The
-# _convert() method on these objects is used to force whatever value
-# is assigned to the parameter to the appropriate type.
+# value (if any).  The convert() method on these objects is used to
+# force whatever value is assigned to the parameter to the appropriate
+# type.
 #
 # Note that the default values are loaded into the class's attribute
 # space when the parameter dictionary is initialized (in
-# MetaConfigNode._setparams()); after that point they aren't used.
+# MetaSimObject._new_param()); after that point they aren't used.
 #
 #####################################################################
 
@@ -1419,6 +1403,107 @@ MaxAddr = Addr.max
 MaxTick = Tick.max
 AllMemory = AddrRange(0, MaxAddr)
 
+
+#####################################################################
+#
+# Port objects
+#
+# Ports are used to interconnect objects in the memory system.
+#
+#####################################################################
+
+# Port reference: encapsulates a reference to a particular port on a
+# particular SimObject.
+class PortRef(object):
+    def __init__(self, simobj, name, isVec):
+        assert(isSimObject(simobj))
+        self.simobj = simobj
+        self.name = name
+        self.index = -1
+        self.isVec = isVec # is this a vector port?
+        self.peer = None   # not associated with another port yet
+        self.ccConnected = False # C++ port connection done?
+
+    # Set peer port reference.  Called via __setattr__ as a result of
+    # a port assignment, e.g., "obj1.port1 = obj2.port2".
+    def setPeer(self, other):
+        if self.isVec:
+            curMap = self.simobj._port_map.get(self.name, [])
+            self.index = len(curMap)
+            curMap.append(other)
+        else:
+            curMap = self.simobj._port_map.get(self.name)
+            if curMap and not self.isVec:
+                print "warning: overwriting port", self.simobj, self.name
+            curMap = other
+        self.simobj._port_map[self.name] = curMap
+        self.peer = other
+
+    def clone(self, memo):
+        newRef = copy.copy(self)
+        assert(isSimObject(newRef.simobj))
+        newRef.simobj = newRef.simobj(_memo=memo)
+        # Tricky: if I'm the *second* PortRef in the pair to be
+        # cloned, then my peer is still in the middle of its clone
+        # method, and thus hasn't returned to its owner's
+        # SimObject.__init__ to get installed in _port_map.  As a
+        # result I have no way of finding the *new* peer object.  So I
+        # mark myself as "waiting" for my peer, and I let the *first*
+        # PortRef clone call set up both peer pointers after I return.
+        newPeer = newRef.simobj._port_map.get(self.name)
+        if newPeer:
+            if self.isVec:
+                assert(self.index != -1)
+                newPeer = newPeer[self.index]
+            # other guy is all set up except for his peer pointer
+            assert(newPeer.peer == -1) # peer must be waiting for handshake
+            newPeer.peer = newRef
+            newRef.peer = newPeer
+        else:
+            # other guy is in clone; just wait for him to do the work
+            newRef.peer = -1 # mark as waiting for handshake
+        return newRef
+
+    # Call C++ to create corresponding port connection between C++ objects
+    def ccConnect(self):
+        if self.ccConnected: # already done this
+            return
+        peer = self.peer
+        cc_main.connectPorts(self.simobj.getCCObject(), self.name, self.index,
+                             peer.simobj.getCCObject(), peer.name, peer.index)
+        self.ccConnected = True
+        peer.ccConnected = True
+
+# Port description object.  Like a ParamDesc object, this represents a
+# logical port in the SimObject class, not a particular port on a
+# SimObject instance.  The latter are represented by PortRef objects.
+class Port(object):
+    def __init__(self, desc):
+        self.desc = desc
+        self.isVec = False
+
+    # Generate a PortRef for this port on the given SimObject with the
+    # given name
+    def makeRef(self, simobj, name):
+        return PortRef(simobj, name, self.isVec)
+
+    # Connect an instance of this port (on the given SimObject with
+    # the given name) with the port described by the supplied PortRef
+    def connect(self, simobj, name, ref):
+        if not isinstance(ref, PortRef):
+            raise TypeError, \
+                  "assigning non-port reference port '%s'" % name
+        myRef = self.makeRef(simobj, name)
+        myRef.setPeer(ref)
+        ref.setPeer(myRef)
+
+# VectorPort description object.  Like Port, but represents a vector
+# of connections (e.g., as on a Bus).
+class VectorPort(Port):
+    def __init__(self, desc):
+        Port.__init__(self, desc)
+        self.isVec = True
+
 #####################################################################
 
 # __all__ defines the list of symbols that get exported when
@@ -1436,5 +1521,6 @@ __all__ = ['SimObject', 'ParamContext', 'Param', 'VectorParam',
            'NetworkBandwidth', 'MemoryBandwidth',
            'Range', 'AddrRange', 'MaxAddr', 'MaxTick', 'AllMemory',
            'Null', 'NULL',
-           'NextEthernetAddr']
+           'NextEthernetAddr',
+           'Port', 'VectorPort']
 
diff --git a/src/python/m5/main.py b/src/python/m5/main.py
new file mode 100644
index 000000000..afe73d94c
--- /dev/null
+++ b/src/python/m5/main.py
@@ -0,0 +1,321 @@
+# Copyright (c) 2005 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+import code, optparse, os, socket, sys
+from datetime import datetime
+from attrdict import attrdict
+
+try:
+    import info
+except ImportError:
+    info = None
+
+__all__ = [ 'options', 'arguments', 'main' ]
+
+usage="%prog [m5 options] script.py [script options]"
+version="%prog 2.0"
+brief_copyright='''
+Copyright (c) 2001-2006
+The Regents of The University of Michigan
+All Rights Reserved
+'''
+
+# there's only one option parsing done, so make it global and add some
+# helper functions to make it work well.
+parser = optparse.OptionParser(usage=usage, version=version,
+                               description=brief_copyright,
+                               formatter=optparse.TitledHelpFormatter())
+parser.disable_interspersed_args()
+
+# current option group
+group = None
+
+def set_group(*args, **kwargs):
+    '''set the current option group'''
+    global group
+    if not args and not kwargs:
+        group = None
+    else:
+        group = parser.add_option_group(*args, **kwargs)
+
+class splitter(object):
+    def __init__(self, split):
+        self.split = split
+    def __call__(self, option, opt_str, value, parser):
+        getattr(parser.values, option.dest).extend(value.split(self.split))
+
+def add_option(*args, **kwargs):
+    '''add an option to the current option group, or global none set'''
+
+    # if action=split, but allows the option arguments
+    # themselves to be lists separated by the split variable'''
+
+    if kwargs.get('action', None) == 'append' and 'split' in kwargs:
+        split = kwargs.pop('split')
+        kwargs['default'] = []
+        kwargs['type'] = 'string'
+        kwargs['action'] = 'callback'
+        kwargs['callback'] = splitter(split)
+
+    if group:
+        return group.add_option(*args, **kwargs)
+
+    return parser.add_option(*args, **kwargs)
+
+def bool_option(name, default, help):
+    '''add a boolean option called --name and --no-name.
+    Display help depending on which is the default'''
+
+    tname = '--%s' % name
+    fname = '--no-%s' % name
+    dest = name.replace('-', '_')
+    if default:
+        thelp = optparse.SUPPRESS_HELP
+        fhelp = help
+    else:
+        thelp = help
+        fhelp = optparse.SUPPRESS_HELP
+
+    add_option(tname, action="store_true", default=default, help=thelp)
+    add_option(fname, action="store_false", dest=dest, help=fhelp)
+
+# Help options
+add_option('-A', "--authors", action="store_true", default=False,
+    help="Show author information")
+add_option('-C', "--copyright", action="store_true", default=False,
+    help="Show full copyright information")
+add_option('-R', "--readme", action="store_true", default=False,
+    help="Show the readme")
+add_option('-N', "--release-notes", action="store_true", default=False,
+    help="Show the release notes")
+
+# Options for configuring the base simulator
+add_option('-d', "--outdir", metavar="DIR", default=".",
+    help="Set the output directory to DIR [Default: %default]")
+add_option('-i', "--interactive", action="store_true", default=False,
+    help="Invoke the interactive interpreter after running the script")
+add_option("--pdb", action="store_true", default=False,
+    help="Invoke the python debugger before running the script")
+add_option('-p', "--path", metavar="PATH[:PATH]", action='append', split=':',
+    help="Prepend PATH to the system path when invoking the script")
+add_option('-q', "--quiet", action="count", default=0,
+    help="Reduce verbosity")
+add_option('-v', "--verbose", action="count", default=0,
+    help="Increase verbosity")
+
+# Statistics options
+set_group("Statistics Options")
+add_option("--stats-file", metavar="FILE", default="m5stats.txt",
+    help="Sets the output file for statistics [Default: %default]")
+
+# Debugging options
+set_group("Debugging Options")
+add_option("--debug-break", metavar="TIME[,TIME]", action='append', split=',',
+    help="Cycle to create a breakpoint")
+
+# Tracing options
+set_group("Trace Options")
+add_option("--trace-flags", metavar="FLAG[,FLAG]", action='append', split=',',
+    help="Sets the flags for tracing")
+add_option("--trace-start", metavar="TIME", default='0s',
+    help="Start tracing at TIME (must have units)")
+add_option("--trace-file", metavar="FILE", default="cout",
+    help="Sets the output file for tracing [Default: %default]")
+add_option("--trace-circlebuf", metavar="SIZE", type="int", default=0,
+    help="If SIZE is non-zero, turn on the circular buffer with SIZE lines")
+add_option("--no-trace-circlebuf", action="store_const", const=0,
+    dest='trace_circlebuf', help=optparse.SUPPRESS_HELP)
+bool_option("trace-dumponexit", default=False,
+    help="Dump trace buffer on exit")
+add_option("--trace-ignore", metavar="EXPR", action='append', split=':',
+    help="Ignore EXPR sim objects")
+
+# Execution Trace options
+set_group("Execution Trace Options")
+bool_option("speculative", default=True,
+    help="Don't capture speculative instructions")
+bool_option("print-cycle", default=True,
+    help="Don't print cycle numbers in trace output")
+bool_option("print-symbol", default=True,
+    help="Disable PC symbols in trace output")
+bool_option("print-opclass", default=True,
+    help="Don't print op class type in trace output")
+bool_option("print-thread", default=True,
+    help="Don't print thread number in trace output")
+bool_option("print-effaddr", default=True,
+    help="Don't print effective address in trace output")
+bool_option("print-data", default=True,
+    help="Don't print result data in trace output")
+bool_option("print-iregs", default=False,
+    help="Print fetch sequence numbers in trace output")
+bool_option("print-fetch-seq", default=False,
+    help="Print fetch sequence numbers in trace output")
+bool_option("print-cpseq", default=False,
+    help="Print correct path sequence numbers in trace output")
+
+options = attrdict()
+arguments = []
+
+def usage(exitcode=None):
+    parser.print_help()
+    if exitcode is not None:
+        sys.exit(exitcode)
+
+def parse_args():
+    _opts,args = parser.parse_args()
+    opts = attrdict(_opts.__dict__)
+
+    # setting verbose and quiet at the same time doesn't make sense
+    if opts.verbose > 0 and opts.quiet > 0:
+        usage(2)
+
+    # store the verbosity in a single variable.  0 is default,
+    # negative numbers represent quiet and positive values indicate verbose
+    opts.verbose -= opts.quiet
+
+    del opts.quiet
+
+    options.update(opts)
+    arguments.extend(args)
+    return opts,args
+
+def main():
+    import cc_main
+
+    parse_args()
+
+    done = False
+    if options.copyright:
+        done = True
+        print info.LICENSE
+        print
+
+    if options.authors:
+        done = True
+        print 'Author information:'
+        print
+        print info.AUTHORS
+        print
+
+    if options.readme:
+        done = True
+        print 'Readme:'
+        print
+        print info.README
+        print
+
+    if options.release_notes:
+        done = True
+        print 'Release Notes:'
+        print
+        print info.RELEASE_NOTES
+        print
+
+    if done:
+        sys.exit(0)
+
+    if options.verbose >= 0:
+        print "M5 Simulator System"
+        print brief_copyright
+        print
+        print "M5 compiled %s" % cc_main.cvar.compileDate;
+        print "M5 started %s" % datetime.now().ctime()
+        print "M5 executing on %s" % socket.gethostname()
+
+    # check to make sure we can find the listed script
+    if not arguments or not os.path.isfile(arguments[0]):
+        usage(2)
+
+    # tell C++ about output directory
+    cc_main.setOutputDir(options.outdir)
+
+    # update the system path with elements from the -p option
+    sys.path[0:0] = options.path
+
+    import objects
+
+    # set stats options
+    objects.Statistics.text_file = options.stats_file
+
+    # set debugging options
+    objects.Debug.break_cycles = options.debug_break
+
+    # set tracing options
+    objects.Trace.flags = options.trace_flags
+    objects.Trace.start = options.trace_start
+    objects.Trace.file = options.trace_file
+    objects.Trace.bufsize = options.trace_circlebuf
+    objects.Trace.dump_on_exit = options.trace_dumponexit
+    objects.Trace.ignore = options.trace_ignore
+
+    # set execution trace options
+    objects.ExecutionTrace.speculative = options.speculative
+    objects.ExecutionTrace.print_cycle = options.print_cycle
+    objects.ExecutionTrace.pc_symbol = options.print_symbol
+    objects.ExecutionTrace.print_opclass = options.print_opclass
+    objects.ExecutionTrace.print_thread = options.print_thread
+    objects.ExecutionTrace.print_effaddr = options.print_effaddr
+    objects.ExecutionTrace.print_data = options.print_data
+    objects.ExecutionTrace.print_iregs = options.print_iregs
+    objects.ExecutionTrace.print_fetchseq = options.print_fetch_seq
+    objects.ExecutionTrace.print_cpseq = options.print_cpseq
+
+    sys.argv = arguments
+    sys.path = [ os.path.dirname(sys.argv[0]) ] + sys.path
+
+    scope = { '__file__' : sys.argv[0] }
+
+    # we want readline if we're doing anything interactive
+    if options.interactive or options.pdb:
+        exec("import readline", scope)
+
+    # if pdb was requested, execfile the thing under pdb, otherwise,
+    # just do the execfile normally
+    if options.pdb:
+        from pdb import Pdb
+        debugger = Pdb()
+        debugger.run('execfile("%s")' % sys.argv[0], scope)
+    else:
+        execfile(sys.argv[0], scope)
+
+    # once the script is done
+    if options.interactive:
+        interact = code.InteractiveConsole(scope)
+        interact.interact("M5 Interactive Console")
+
+if __name__ == '__main__':
+    from pprint import pprint
+
+    parse_args()
+
+    print 'opts:'
+    pprint(options, indent=4)
+    print
+
+    print 'args:'
+    pprint(arguments, indent=4)
diff --git a/src/python/m5/objects/BaseCPU.py b/src/python/m5/objects/BaseCPU.py
index 2e78578df..5bf98be9c 100644
--- a/src/python/m5/objects/BaseCPU.py
+++ b/src/python/m5/objects/BaseCPU.py
@@ -6,10 +6,10 @@ class BaseCPU(SimObject):
     abstract = True
     mem = Param.MemObject("memory")
 
+    system = Param.System(Parent.any, "system object")
     if build_env['FULL_SYSTEM']:
         dtb = Param.AlphaDTB("Data TLB")
         itb = Param.AlphaITB("Instruction TLB")
-        system = Param.System(Parent.any, "system object")
         cpu_id = Param.Int(-1, "CPU identifier")
     else:
         workload = VectorParam.Process("processes to run")
diff --git a/src/python/m5/objects/BaseCache.py b/src/python/m5/objects/BaseCache.py
index 33f44759b..497b2b038 100644
--- a/src/python/m5/objects/BaseCache.py
+++ b/src/python/m5/objects/BaseCache.py
@@ -1,29 +1,26 @@
 from m5.config import *
-from BaseMem import BaseMem
+from MemObject import MemObject
 
 class Prefetch(Enum): vals = ['none', 'tagged', 'stride', 'ghb']
 
-class BaseCache(BaseMem):
+class BaseCache(MemObject):
     type = 'BaseCache'
     adaptive_compression = Param.Bool(False,
         "Use an adaptive compression scheme")
     assoc = Param.Int("associativity")
     block_size = Param.Int("block size in bytes")
+    latency = Param.Int("Latency")
     compressed_bus = Param.Bool(False,
         "This cache connects to a compressed memory")
     compression_latency = Param.Latency('0ns',
         "Latency in cycles of compression algorithm")
     do_copy = Param.Bool(False, "perform fast copies in the cache")
     hash_delay = Param.Int(1, "time in cycles of hash access")
-    in_bus = Param.Bus(NULL, "incoming bus object")
     lifo = Param.Bool(False,
         "whether this NIC partition should use LIFO repl. policy")
     max_miss_count = Param.Counter(0,
         "number of misses to handle before calling exit")
-    mem_trace = Param.MemTraceWriter(NULL,
-                                     "memory trace writer to record accesses")
     mshrs = Param.Int("number of MSHRs (max outstanding requests)")
-    out_bus = Param.Bus("outgoing bus object")
     prioritizeRequests = Param.Bool(False,
         "always service demand misses first")
     protocol = Param.CoherenceProtocol(NULL, "coherence protocol to use")
@@ -63,3 +60,6 @@ class BaseCache(BaseMem):
          "Use the CPU ID to seperate calculations of prefetches")
     prefetch_data_accesses_only = Param.Bool(False,
          "Only prefetch on data not on instruction accesses")
+    hit_latency = Param.Int(1,"Hit Latency of the cache")
+    cpu_side = Port("Port on side closer to CPU")
+    mem_side = Port("Port on side closer to MEM")
diff --git a/src/python/m5/objects/Bridge.py b/src/python/m5/objects/Bridge.py
index 880535755..c9e673afb 100644
--- a/src/python/m5/objects/Bridge.py
+++ b/src/python/m5/objects/Bridge.py
@@ -3,6 +3,8 @@ from MemObject import MemObject
 
 class Bridge(MemObject):
     type = 'Bridge'
+    side_a = Port('Side A port')
+    side_b = Port('Side B port')
     queue_size_a = Param.Int(16, "The number of requests to buffer")
     queue_size_b = Param.Int(16, "The number of requests to buffer")
     delay = Param.Latency('0ns', "The latency of this bridge")
diff --git a/src/python/m5/objects/Bus.py b/src/python/m5/objects/Bus.py
index c37dab438..e0278e6c3 100644
--- a/src/python/m5/objects/Bus.py
+++ b/src/python/m5/objects/Bus.py
@@ -3,4 +3,6 @@ from MemObject import MemObject
 
 class Bus(MemObject):
     type = 'Bus'
+    port = VectorPort("vector port for connecting devices")
+    default = Port("Default port for requests that aren't handeled by a device.")
     bus_id = Param.Int(0, "blah")
diff --git a/src/python/m5/objects/Device.py b/src/python/m5/objects/Device.py
index 7798f5f04..222f750da 100644
--- a/src/python/m5/objects/Device.py
+++ b/src/python/m5/objects/Device.py
@@ -4,6 +4,7 @@ from MemObject import MemObject
 class PioDevice(MemObject):
     type = 'PioDevice'
     abstract = True
+    pio = Port("Programmed I/O port")
     platform = Param.Platform(Parent.any, "Platform this device is part of")
     system = Param.System(Parent.any, "System this device is part of")
 
@@ -16,3 +17,4 @@ class BasicPioDevice(PioDevice):
 class DmaDevice(PioDevice):
     type = 'DmaDevice'
     abstract = True
+    dma = Port("DMA port")
diff --git a/src/python/m5/objects/FuncUnit.py b/src/python/m5/objects/FuncUnit.py
new file mode 100644
index 000000000..f61590ae9
--- /dev/null
+++ b/src/python/m5/objects/FuncUnit.py
@@ -0,0 +1,17 @@
+from m5.config import *
+
+class OpType(Enum):
+    vals = ['(null)', 'IntAlu', 'IntMult', 'IntDiv', 'FloatAdd',
+            'FloatCmp', 'FloatCvt', 'FloatMult', 'FloatDiv', 'FloatSqrt',
+            'MemRead', 'MemWrite', 'IprAccess', 'InstPrefetch']
+
+class OpDesc(SimObject):
+    type = 'OpDesc'
+    issueLat = Param.Int(1, "cycles until another can be issued")
+    opClass = Param.OpType("type of operation")
+    opLat = Param.Int(1, "cycles until result is available")
+
+class FUDesc(SimObject):
+    type = 'FUDesc'
+    count = Param.Int("number of these FU's available")
+    opList = VectorParam.OpDesc("operation classes for this FU type")
diff --git a/src/python/m5/objects/AlphaFullCPU.py b/src/python/m5/objects/O3CPU.py
index 2988305d3..d6bc454ad 100644
--- a/src/python/m5/objects/AlphaFullCPU.py
+++ b/src/python/m5/objects/O3CPU.py
@@ -2,14 +2,16 @@ from m5 import build_env
 from m5.config import *
 from BaseCPU import BaseCPU
 
-class DerivAlphaFullCPU(BaseCPU):
-    type = 'DerivAlphaFullCPU'
+class DerivO3CPU(BaseCPU):
+    type = 'DerivO3CPU'
     activity = Param.Unsigned("Initial count")
     numThreads = Param.Unsigned("number of HW thread contexts")
 
     checker = Param.BaseCPU(NULL, "checker")
 
     cachePorts = Param.Unsigned("Cache Ports")
+    icache_port = Port("Instruction Port")
+    dcache_port = Port("Data Port")
 
     decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
     renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
@@ -37,12 +39,10 @@ class DerivAlphaFullCPU(BaseCPU):
                "Issue/Execute/Writeback delay")
     issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
               "to the IEW stage)")
+    dispatchWidth = Param.Unsigned("Dispatch width")
     issueWidth = Param.Unsigned("Issue width")
-    executeWidth = Param.Unsigned("Execute width")
-    executeIntWidth = Param.Unsigned("Integer execute width")
-    executeFloatWidth = Param.Unsigned("Floating point execute width")
-    executeBranchWidth = Param.Unsigned("Branch execute width")
-    executeMemoryWidth = Param.Unsigned("Memory execute width")
+    wbWidth = Param.Unsigned("Writeback width")
+    wbDepth = Param.Unsigned("Writeback depth")
     fuPool = Param.FUPool(NULL, "Functional Unit pool")
 
     iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
@@ -53,6 +53,9 @@ class DerivAlphaFullCPU(BaseCPU):
     trapLatency = Param.Tick("Trap latency")
     fetchTrapLatency = Param.Tick("Fetch trap latency")
 
+    backComSize = Param.Unsigned("Time buffer size for backwards communication")
+    forwardComSize = Param.Unsigned("Time buffer size for forward communication")
+
     predType = Param.String("Branch predictor type ('local', 'tournament')")
     localPredictorSize = Param.Unsigned("Size of local predictor")
     localCtrBits = Param.Unsigned("Bits per counter")
diff --git a/src/python/m5/objects/OzoneCPU.py b/src/python/m5/objects/OzoneCPU.py
index f2d9aea84..88fb63c74 100644
--- a/src/python/m5/objects/OzoneCPU.py
+++ b/src/python/m5/objects/OzoneCPU.py
@@ -7,11 +7,11 @@ class DerivOzoneCPU(BaseCPU):
 
     numThreads = Param.Unsigned("number of HW thread contexts")
 
-    if not build_env['FULL_SYSTEM']:
-        mem = Param.FunctionalMemory(NULL, "memory")
-
     checker = Param.BaseCPU("Checker CPU")
 
+    icache_port = Port("Instruction Port")
+    dcache_port = Port("Data Port")
+
     width = Param.Unsigned("Width")
     frontEndWidth = Param.Unsigned("Front end width")
     backEndWidth = Param.Unsigned("Back end width")
diff --git a/src/python/m5/objects/Pci.py b/src/python/m5/objects/Pci.py
index 9e1e91b13..29014bb37 100644
--- a/src/python/m5/objects/Pci.py
+++ b/src/python/m5/objects/Pci.py
@@ -1,5 +1,5 @@
 from m5.config import *
-from Device import BasicPioDevice, DmaDevice
+from Device import BasicPioDevice, DmaDevice, PioDevice
 
 class PciConfigData(SimObject):
     type = 'PciConfigData'
@@ -38,18 +38,22 @@ class PciConfigData(SimObject):
     MaximumLatency = Param.UInt8(0x00, "Maximum Latency")
     MinimumGrant = Param.UInt8(0x00, "Minimum Grant")
 
-class PciConfigAll(BasicPioDevice):
+class PciConfigAll(PioDevice):
     type = 'PciConfigAll'
+    pio_latency = Param.Tick(1, "Programmed IO latency in simticks")
+    bus = Param.UInt8(0x00, "PCI bus to act as config space for")
+    size = Param.MemorySize32('16MB', "Size of config space")
+
 
 class PciDevice(DmaDevice):
     type = 'PciDevice'
     abstract = True
+    config = Port("PCI configuration space port")
     pci_bus = Param.Int("PCI bus")
     pci_dev = Param.Int("PCI device number")
     pci_func = Param.Int("PCI function code")
     pio_latency = Param.Tick(1, "Programmed IO latency in simticks")
     configdata = Param.PciConfigData(Parent.any, "PCI Config data")
-    configspace = Param.PciConfigAll(Parent.any, "PCI Configspace")
 
 class PciFake(PciDevice):
     type = 'PciFake'
diff --git a/src/python/m5/objects/PhysicalMemory.py b/src/python/m5/objects/PhysicalMemory.py
index bed90d555..9cc7510a2 100644
--- a/src/python/m5/objects/PhysicalMemory.py
+++ b/src/python/m5/objects/PhysicalMemory.py
@@ -3,6 +3,7 @@ from MemObject import *
 
 class PhysicalMemory(MemObject):
     type = 'PhysicalMemory'
+    port = Port("the access port")
     range = Param.AddrRange("Device Address")
     file = Param.String('', "memory mapped file")
     latency = Param.Latency(Parent.clock, "latency of an access")
diff --git a/src/python/m5/objects/System.py b/src/python/m5/objects/System.py
index 9a1e1d690..386f39277 100644
--- a/src/python/m5/objects/System.py
+++ b/src/python/m5/objects/System.py
@@ -1,9 +1,12 @@
 from m5 import build_env
 from m5.config import *
 
+class MemoryMode(Enum): vals = ['invalid', 'atomic', 'timing']
+
 class System(SimObject):
     type = 'System'
     physmem = Param.PhysicalMemory(Parent.any, "phsyical memory")
+    mem_mode = Param.MemoryMode('atomic', "The mode the memory system is in")
     if build_env['FULL_SYSTEM']:
         boot_cpu_frequency = Param.Frequency(Self.cpu[0].clock.frequency,
                                              "boot processor frequency")
diff --git a/src/sim/builder.cc b/src/sim/builder.cc
index 121275c83..9074cc899 100644
--- a/src/sim/builder.cc
+++ b/src/sim/builder.cc
@@ -33,17 +33,14 @@
 #include "base/inifile.hh"
 #include "base/misc.hh"
 #include "sim/builder.hh"
-#include "sim/configfile.hh"
-#include "sim/config_node.hh"
 #include "sim/host.hh"
 #include "sim/sim_object.hh"
 #include "sim/root.hh"
 
 using namespace std;
 
-SimObjectBuilder::SimObjectBuilder(ConfigNode *_configNode)
-    : ParamContext(_configNode->getPath(), NoAutoInit),
-      configNode(_configNode)
+SimObjectBuilder::SimObjectBuilder(const std::string &_iniSection)
+    : ParamContext(_iniSection, NoAutoInit)
 {
 }
 
@@ -78,8 +75,7 @@ SimObjectBuilder::parseParams(IniFile &iniFile)
 void
 SimObjectBuilder::printErrorProlog(ostream &os)
 {
-    ccprintf(os, "Error creating object '%s' of type '%s':\n",
-             iniSection, configNode->getType());
+    ccprintf(os, "Error creating object '%s':\n", iniSection);
 }
 
 
@@ -112,9 +108,13 @@ SimObjectClass::SimObjectClass(const string &className, CreateFunc createFunc)
 //
 //
 SimObject *
-SimObjectClass::createObject(IniFile &configDB, ConfigNode *configNode)
+SimObjectClass::createObject(IniFile &configDB, const std::string &iniSection)
 {
-    const string &type = configNode->getType();
+    string type;
+    if (!configDB.find(iniSection, "type", type)) {
+        // no C++ type associated with this object
+        return NULL;
+    }
 
     // look up className to get appropriate createFunc
     if (classMap->find(type) == classMap->end())
@@ -125,7 +125,7 @@ SimObjectClass::createObject(IniFile &configDB, ConfigNode *configNode)
 
     // call createFunc with config hierarchy node to get object
     // builder instance (context with parameters for object creation)
-    SimObjectBuilder *objectBuilder = (*createFunc)(configNode);
+    SimObjectBuilder *objectBuilder = (*createFunc)(iniSection);
 
     assert(objectBuilder != NULL);
 
@@ -166,7 +166,7 @@ SimObjectClass::describeAllClasses(ostream &os)
         os << "[" << className << "]\n";
 
         // create dummy object builder just to instantiate parameters
-        SimObjectBuilder *objectBuilder = (*createFunc)(NULL);
+        SimObjectBuilder *objectBuilder = (*createFunc)("");
 
         // now get the object builder to describe ite params
         objectBuilder->describeParams(os);
diff --git a/src/sim/builder.hh b/src/sim/builder.hh
index 8d0846155..2997fe5c3 100644
--- a/src/sim/builder.hh
+++ b/src/sim/builder.hh
@@ -55,14 +55,8 @@ class SimObject;
 //
 class SimObjectBuilder : public ParamContext
 {
-  private:
-    // The corresponding node in the configuration hierarchy.
-    // (optional: may be null if the created object is not in the
-    // hierarchy)
-    ConfigNode *configNode;
-
   public:
-    SimObjectBuilder(ConfigNode *_configNode);
+    SimObjectBuilder(const std::string &_iniSection);
 
     virtual ~SimObjectBuilder();
 
@@ -77,9 +71,6 @@ class SimObjectBuilder : public ParamContext
     // configuration hierarchy node label and position)
     virtual const std::string &getInstanceName() { return iniSection; }
 
-    // return the configuration hierarchy node for this context.
-    virtual ConfigNode *getConfigNode() { return configNode; }
-
     // Create the actual SimObject corresponding to the parameter
     // values in this context.  This function is overridden in derived
     // classes to call a specific constructor for a particular
@@ -125,7 +116,7 @@ class SimObjectClass
     // for the object (specified by the second string argument), and
     // an optional config hierarchy node (specified by the third
     // argument).  A pointer to the new SimObjectBuilder is returned.
-    typedef SimObjectBuilder *(*CreateFunc)(ConfigNode *configNode);
+    typedef SimObjectBuilder *(*CreateFunc)(const std::string &iniSection);
 
     static std::map<std::string,CreateFunc> *classMap;
 
@@ -137,7 +128,8 @@ class SimObjectClass
 
     // create SimObject given name of class and pointer to
     // configuration hierarchy node
-    static SimObject *createObject(IniFile &configDB, ConfigNode *configNode);
+    static SimObject *createObject(IniFile &configDB,
+                                   const std::string &iniSection);
 
     // print descriptions of all parameters registered with all
     // SimObject classes
@@ -156,15 +148,15 @@ class OBJ_CLASS##Builder : public SimObjectBuilder		\
 
 #define END_DECLARE_SIM_OBJECT_PARAMS(OBJ_CLASS)		\
                                                                 \
-    OBJ_CLASS##Builder(ConfigNode *configNode);			\
+    OBJ_CLASS##Builder(const std::string &iniSection);          \
     virtual ~OBJ_CLASS##Builder() {}				\
                                                                 \
     OBJ_CLASS *create();					\
 };
 
 #define BEGIN_INIT_SIM_OBJECT_PARAMS(OBJ_CLASS)			\
-OBJ_CLASS##Builder::OBJ_CLASS##Builder(ConfigNode *configNode)	\
-    : SimObjectBuilder(configNode),
+    OBJ_CLASS##Builder::OBJ_CLASS##Builder(const std::string &iSec) \
+    : SimObjectBuilder(iSec),
 
 
 #define END_INIT_SIM_OBJECT_PARAMS(OBJ_CLASS)			\
@@ -176,9 +168,9 @@ OBJ_CLASS *OBJ_CLASS##Builder::create()
 
 #define REGISTER_SIM_OBJECT(CLASS_NAME, OBJ_CLASS)		\
 SimObjectBuilder *						\
-new##OBJ_CLASS##Builder(ConfigNode *configNode)			\
+new##OBJ_CLASS##Builder(const std::string &iniSection)          \
 {								\
-    return new OBJ_CLASS##Builder(configNode);			\
+    return new OBJ_CLASS##Builder(iniSection);			\
 }								\
                                                                 \
 SimObjectClass the##OBJ_CLASS##Class(CLASS_NAME,		\
diff --git a/src/sim/byteswap.hh b/src/sim/byteswap.hh
index a3138a25e..f1f244150 100644
--- a/src/sim/byteswap.hh
+++ b/src/sim/byteswap.hh
@@ -25,7 +25,8 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Gabe Black
+ * Authors: Ali Saidi
+ *          Nathan Binkert
  */
 
 //The purpose of this file is to provide endainness conversion utility
@@ -35,6 +36,7 @@
 #ifndef __SIM_BYTE_SWAP_HH__
 #define __SIM_BYTE_SWAP_HH__
 
+#include "base/misc.hh"
 #include "sim/host.hh"
 
 // This lets us figure out what the byte order of the host system is
@@ -48,6 +50,10 @@
 #include <machine/endian.h>
 #endif
 
+#if defined(__APPLE__)
+#include <libkern/OSByteOrder.h>
+#endif
+
 //These functions actually perform the swapping for parameters
 //of various bit lengths
 static inline uint64_t
@@ -55,6 +61,8 @@ swap_byte64(uint64_t x)
 {
 #if defined(linux)
     return bswap_64(x);
+#elif defined(__APPLE__)
+    return OSSwapInt64(x);
 #else
     return  (uint64_t)((((uint64_t)(x) & 0xff) << 56) |
             ((uint64_t)(x) & 0xff00ULL) << 40 |
@@ -72,6 +80,8 @@ swap_byte32(uint32_t x)
 {
 #if defined(linux)
     return bswap_32(x);
+#elif defined(__APPLE__)
+    return OSSwapInt32(x);
 #else
     return  (uint32_t)(((uint32_t)(x) & 0xff) << 24 |
             ((uint32_t)(x) & 0xff00) << 8 | ((uint32_t)(x) & 0xff0000) >> 8 |
@@ -84,31 +94,31 @@ swap_byte16(uint16_t x)
 {
 #if defined(linux)
     return bswap_16(x);
+#elif defined(__APPLE__)
+    return OSSwapInt16(x);
 #else
     return (uint16_t)(((uint16_t)(x) & 0xff) << 8 |
                       ((uint16_t)(x) & 0xff00) >> 8);
 #endif
 }
 
-//This lets the compiler figure out how to call the swap_byte functions above
-//for different data types.
-static inline uint64_t swap_byte(uint64_t x) {return swap_byte64(x);}
-static inline int64_t swap_byte(int64_t x) {return swap_byte64((uint64_t)x);}
-static inline uint32_t swap_byte(uint32_t x) {return swap_byte32(x);}
-static inline int32_t swap_byte(int32_t x) {return swap_byte32((uint32_t)x);}
-//This is to prevent the following two functions from compiling on
-//64bit machines. It won't detect everything, so it should be changed.
-#ifndef __x86_64__
-static inline long swap_byte(long x) {return swap_byte32((long)x);}
-static inline unsigned long swap_byte(unsigned long x)
-                                { return swap_byte32((unsigned long)x);}
-#endif
-static inline uint16_t swap_byte(uint16_t x) {return swap_byte32(x);}
-static inline int16_t swap_byte(int16_t x) {return swap_byte16((uint16_t)x);}
-static inline uint8_t swap_byte(uint8_t x) {return x;}
-static inline int8_t swap_byte(int8_t x) {return x;}
-static inline double swap_byte(double x) {return swap_byte64((uint64_t)x);}
-static inline float swap_byte(float x) {return swap_byte32((uint32_t)x);}
+// This function lets the compiler figure out how to call the
+// swap_byte functions above for different data types.  Since the
+// sizeof() values are known at compiel time, it should inline to a
+// direct call to the right swap_byteNN() function.
+template <typename T>
+static inline T swap_byte(T x) {
+    if (sizeof(T) == 8)
+        return swap_byte64((uint64_t)x);
+    else if (sizeof(T) == 4)
+        return swap_byte32((uint32_t)x);
+    else if (sizeof(T) == 2)
+        return swap_byte16((uint16_t)x);
+    else if (sizeof(T) == 1)
+        return x;
+    else
+        panic("Can't byte-swap values larger than 64 bits");
+}
 
 //The conversion functions with fixed endianness on both ends don't need to
 //be in a namespace
diff --git a/src/sim/debug.cc b/src/sim/debug.cc
index b82219f7d..be9566836 100644
--- a/src/sim/debug.cc
+++ b/src/sim/debug.cc
@@ -127,12 +127,12 @@ DebugContext::checkParams()
 // handy function to schedule DebugBreakEvent on main event queue
 // (callable from debugger)
 //
-extern "C" void sched_break_cycle(Tick when)
+void sched_break_cycle(Tick when)
 {
     new DebugBreakEvent(&mainEventQueue, when);
 }
 
-extern "C" void eventq_dump()
+void eventq_dump()
 {
     mainEventQueue.dump();
 }
diff --git a/src/sim/faults.hh b/src/sim/faults.hh
index 23385c649..00264d8fc 100644
--- a/src/sim/faults.hh
+++ b/src/sim/faults.hh
@@ -54,11 +54,7 @@ class FaultBase : public RefCounted
 {
   public:
     virtual FaultName name() = 0;
-#if FULL_SYSTEM
     virtual void invoke(ThreadContext * tc);
-#else
-    virtual void invoke(ThreadContext * tc);
-#endif
 //    template<typename T>
 //    bool isA() {return dynamic_cast<T *>(this);}
     virtual bool isMachineCheckFault() {return false;}
diff --git a/src/sim/main.cc b/src/sim/main.cc
index 741926056..d0725ab37 100644
--- a/src/sim/main.cc
+++ b/src/sim/main.cc
@@ -41,7 +41,7 @@
 #include <libgen.h>
 #include <stdlib.h>
 #include <signal.h>
-#include <unistd.h>
+#include <getopt.h>
 
 #include <list>
 #include <string>
@@ -57,10 +57,12 @@
 #include "base/time.hh"
 #include "cpu/base.hh"
 #include "cpu/smt.hh"
+#include "mem/mem_object.hh"
+#include "mem/port.hh"
 #include "sim/async.hh"
 #include "sim/builder.hh"
-#include "sim/configfile.hh"
 #include "sim/host.hh"
+#include "sim/serialize.hh"
 #include "sim/sim_events.hh"
 #include "sim/sim_exit.hh"
 #include "sim/sim_object.hh"
@@ -113,40 +115,11 @@ abortHandler(int sigtype)
 #endif
 }
 
-
-const char *briefCopyright =
-"Copyright (c) 2001-2006\n"
-"The Regents of The University of Michigan\n"
-"All Rights Reserved\n";
-
-/// Print welcome message.
-void
-sayHello(ostream &out)
-{
-    extern const char *compileDate;     // from date.cc
-
-    ccprintf(out, "M5 Simulator System\n");
-    // display copyright
-    ccprintf(out, "%s\n", briefCopyright);
-    ccprintf(out, "M5 compiled %d\n", compileDate);
-    ccprintf(out, "M5 started %s\n", Time::start);
-
-    char *host = getenv("HOSTNAME");
-    if (!host)
-        host = getenv("HOST");
-
-    if (host)
-        ccprintf(out, "M5 executing on %s\n", host);
-}
-
-
-extern "C" { void init_main(); }
+extern "C" { void init_cc_main(); }
 
 int
 main(int argc, char **argv)
 {
-    sayHello(cerr);
-
     signal(SIGFPE, SIG_IGN);		// may occur on misspeculated paths
     signal(SIGTRAP, SIG_IGN);
     signal(SIGUSR1, dumpStatsHandler);		// dump intermediate stats
@@ -157,119 +130,145 @@ main(int argc, char **argv)
     Py_SetProgramName(argv[0]);
 
     // default path to m5 python code is the currently executing
-    // file... Python ZipImporter will find embedded zip archive
-    char *pythonpath = argv[0];
-
-    bool interactive = false;
-    bool getopt_done = false;
-    do {
-        switch (getopt(argc, argv, "+p:i")) {
-            // -p <path> prepends <path> to PYTHONPATH instead of
-            // using built-in zip archive.  Useful when
-            // developing/debugging changes to built-in Python
-            // libraries, as the new Python can be tested without
-            // building a new m5 binary.
-          case 'p':
-            pythonpath = optarg;
-            break;
-
-            // -i forces entry into interactive mode after the
-            // supplied script is executed (just like the -i option to
-            // the Python interpreter).
-          case 'i':
-            interactive = true;
-            break;
-
-          case -1:
-            getopt_done = true;
-            break;
-
-          default:
-            fatal("Unrecognized option %c\n", optopt);
-        }
-    } while (!getopt_done);
-
-    // Fix up argc & argv to hide arguments we just processed.
-    // getopt() sets optind to the index of the first non-processed
-    // argv element.
-    argc -= optind;
-    argv += optind;
-
-    // Set up PYTHONPATH to make sure the m5 module is found
-    string newpath(pythonpath);
+    // file... Python ZipImporter will find embedded zip archive.
+    // The M5_ARCHIVE environment variable can be used to override this.
+    char *m5_archive = getenv("M5_ARCHIVE");
+    string pythonpath = m5_archive ? m5_archive : argv[0];
 
     char *oldpath = getenv("PYTHONPATH");
     if (oldpath != NULL) {
-        newpath += ":";
-        newpath += oldpath;
+        pythonpath += ":";
+        pythonpath += oldpath;
     }
 
-    if (setenv("PYTHONPATH", newpath.c_str(), true) == -1)
+    if (setenv("PYTHONPATH", pythonpath.c_str(), true) == -1)
         fatal("setenv: %s\n", strerror(errno));
 
     // initialize embedded Python interpreter
     Py_Initialize();
     PySys_SetArgv(argc, argv);
 
-    // initialize SWIG 'main' module
-    init_main();
+    // initialize SWIG 'cc_main' module
+    init_cc_main();
 
-    if (argc > 0) {
-        // extra arg(s): first is script file, remaining ones are args
-        // to script file
-        char *filename = argv[0];
-        FILE *fp = fopen(filename, "r");
-        if (!fp) {
-            fatal("cannot open file '%s'\n", filename);
-        }
+    PyRun_SimpleString("import m5");
+    PyRun_SimpleString("m5.main()");
 
-        PyRun_AnyFile(fp, filename);
-    } else {
-        // no script file argument... force interactive prompt
-        interactive = true;
-    }
+    // clean up Python intepreter.
+    Py_Finalize();
+}
+
+
+void
+setOutputDir(const string &dir)
+{
+    simout.setDirectory(dir);
+}
+
+
+IniFile inifile;
+
+SimObject *
+createSimObject(const string &name)
+{
+    return SimObjectClass::createObject(inifile, name);
+}
 
-    if (interactive) {
-        // The following code to import readline was copied from Python
-        // 2.4.3's Modules/main.c.
-        // Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006
-        // Python Software Foundation; All Rights Reserved
-        // We should only enable this if we're actually using an
-        // interactive prompt.
-        PyObject *v;
-        v = PyImport_ImportModule("readline");
-        if (v == NULL)
-            PyErr_Clear();
-        else
-            Py_DECREF(v);
-
-        PyRun_InteractiveLoop(stdin, "stdin");
+
+/**
+ * Pointer to the Python function that maps names to SimObjects.
+ */
+PyObject *resolveFunc = NULL;
+
+/**
+ * Convert a pointer to the Python object that SWIG wraps around a C++
+ * SimObject pointer back to the actual C++ pointer.  See main.i.
+ */
+extern "C" SimObject *convertSwigSimObjectPtr(PyObject *);
+
+
+SimObject *
+resolveSimObject(const string &name)
+{
+    PyObject *pyPtr = PyEval_CallFunction(resolveFunc, "(s)", name.c_str());
+    if (pyPtr == NULL) {
+        PyErr_Print();
+        panic("resolveSimObject: failure on call to Python for %s", name);
     }
 
-    // clean up Python intepreter.
-    Py_Finalize();
+    SimObject *simObj = convertSwigSimObjectPtr(pyPtr);
+    if (simObj == NULL)
+        panic("resolveSimObject: failure on pointer conversion for %s", name);
+
+    return simObj;
 }
 
 
-/// Initialize C++ configuration.  Exported to Python via SWIG; invoked
-/// from m5.instantiate().
+/**
+ * Load config.ini into C++ database.  Exported to Python via SWIG;
+ * invoked from m5.instantiate().
+ */
 void
-initialize()
+loadIniFile(PyObject *_resolveFunc)
 {
+    resolveFunc = _resolveFunc;
     configStream = simout.find("config.out");
 
     // The configuration database is now complete; start processing it.
-    IniFile inifile;
     inifile.load("config.ini");
 
     // Initialize statistics database
     Stats::InitSimStats();
+}
 
-    // Now process the configuration hierarchy and create the SimObjects.
-    ConfigHierarchy configHierarchy(inifile);
-    configHierarchy.build();
-    configHierarchy.createSimObjects();
 
+/**
+ * Look up a MemObject port.  Helper function for connectPorts().
+ */
+Port *
+lookupPort(SimObject *so, const std::string &name, int i)
+{
+    MemObject *mo = dynamic_cast<MemObject *>(so);
+    if (mo == NULL) {
+        warn("error casting SimObject %s to MemObject", so->name());
+        return NULL;
+    }
+
+    Port *p = mo->getPort(name, i);
+    if (p == NULL)
+        warn("error looking up port %s on object %s", name, so->name());
+    return p;
+}
+
+
+/**
+ * Connect the described MemObject ports.  Called from Python via SWIG.
+ */
+int
+connectPorts(SimObject *o1, const std::string &name1, int i1,
+             SimObject *o2, const std::string &name2, int i2)
+{
+    Port *p1 = lookupPort(o1, name1, i1);
+    Port *p2 = lookupPort(o2, name2, i2);
+
+    if (p1 == NULL || p2 == NULL) {
+        warn("connectPorts: port lookup error");
+        return 0;
+    }
+
+    p1->setPeer(p2);
+    p2->setPeer(p1);
+
+    return 1;
+}
+
+/**
+ * Do final initialization steps after object construction but before
+ * start of simulation.
+ */
+void
+finalInit()
+{
     // Parse and check all non-config-hierarchy parameters.
     ParamContext::parseAllContexts(inifile);
     ParamContext::checkAllContexts();
@@ -277,28 +276,16 @@ initialize()
     // Echo all parameter settings to stats file as well.
     ParamContext::showAllContexts(*configStream);
 
-    // Any objects that can't connect themselves until after construction should
-    // do so now
-    SimObject::connectAll();
-
     // Do a second pass to finish initializing the sim objects
     SimObject::initAll();
 
     // Restore checkpointed state, if any.
+#if 0
     configHierarchy.unserializeSimObjects();
-
-    // Done processing the configuration database.
-    // Check for unreferenced entries.
-    if (inifile.printUnreferenced())
-        panic("unreferenced sections/entries in the intermediate ini file");
+#endif
 
     SimObject::regAllStats();
 
-    // uncomment the following to get PC-based execution-time profile
-#ifdef DO_PROFILE
-    init_profile((char *)&_init, (char *)&_fini);
-#endif
-
     // Check to make sure that the stats package is properly initialized
     Stats::check();
 
@@ -393,6 +380,37 @@ simulate(Tick num_cycles = -1)
     // not reached... only exit is return on SimLoopExitEvent
 }
 
+Event *
+createCountedDrain()
+{
+    return new CountedDrainEvent();
+}
+
+void
+cleanupCountedDrain(Event *counted_drain)
+{
+    CountedDrainEvent *event =
+        dynamic_cast<CountedDrainEvent *>(counted_drain);
+    if (event == NULL) {
+        fatal("Called cleanupCountedDrain() on an event that was not "
+              "a CountedDrainEvent.");
+    }
+    assert(event->getCount() == 0);
+    delete event;
+}
+
+void
+serializeAll(const std::string &cpt_dir)
+{
+    Serializable::serializeAll(cpt_dir);
+}
+
+void
+unserializeAll(const std::string &cpt_dir)
+{
+    Serializable::unserializeAll(cpt_dir);
+}
+
 /**
  * Queue of C++ callbacks to invoke on simulator exit.
  */
@@ -407,6 +425,16 @@ registerExitCallback(Callback *callback)
     exitCallbacks.add(callback);
 }
 
+BaseCPU *
+convertToBaseCPUPtr(SimObject *obj)
+{
+    BaseCPU *ptr = dynamic_cast<BaseCPU *>(obj);
+
+    if (ptr == NULL)
+        warn("Casting to BaseCPU pointer failed");
+    return ptr;
+}
+
 /**
  * Do C++ simulator exit processing.  Exported to SWIG to be invoked
  * when simulator terminates via Python's atexit mechanism.
diff --git a/src/sim/param.cc b/src/sim/param.cc
index 7f648b8e1..b1c50946b 100644
--- a/src/sim/param.cc
+++ b/src/sim/param.cc
@@ -39,8 +39,6 @@
 #include "base/range.hh"
 #include "base/str.hh"
 #include "base/trace.hh"
-#include "sim/config_node.hh"
-#include "sim/configfile.hh"
 #include "sim/param.hh"
 #include "sim/sim_object.hh"
 
@@ -521,7 +519,9 @@ parseSimObjectParam(ParamContext *context, const string &s, SimObject *&value)
         obj = NULL;
     }
     else {
-        obj = context->resolveSimObject(s);
+        // defined in main.cc
+        extern SimObject *resolveSimObject(const string &);
+        obj = resolveSimObject(s);
 
         if (obj == NULL)
             return false;
@@ -696,22 +696,6 @@ ParamContext::printErrorProlog(ostream &os)
 }
 
 //
-// Resolve an object name to a SimObject pointer.  The object will be
-// created as a side-effect if necessary.  If the name contains a
-// colon (e.g., "iq:IQ"), then the object is local (invisible to
-// outside this context).  If there is no colon, the name needs to be
-// resolved through the configuration hierarchy (only possible for
-// SimObjectBuilder objects, which return non-NULL for configNode()).
-//
-SimObject *
-ParamContext::resolveSimObject(const string &name)
-{
-    ConfigNode *n = getConfigNode();
-    return n ? n->resolveSimObject(name) : NULL;
-}
-
-
-//
 // static method: call parseParams() on all registered contexts
 //
 void
diff --git a/src/sim/param.hh b/src/sim/param.hh
index 49db17df9..1bc55c125 100644
--- a/src/sim/param.hh
+++ b/src/sim/param.hh
@@ -36,10 +36,10 @@
 #include <string>
 #include <vector>
 
-#include "sim/configfile.hh"
 #include "sim/startup.hh"
 
 // forward decls
+class IniFile;
 class BaseParam;
 class SimObject;
 
@@ -132,18 +132,10 @@ class ParamContext : protected StartupCallback
     // print context information for parameter error
     virtual void printErrorProlog(std::ostream &);
 
-    // resolve a SimObject name in this context to an object pointer.
-    virtual SimObject *resolveSimObject(const std::string &name);
-
     // generate the name for this instance of this context (used as a
     // prefix to create unique names in resolveSimObject()
     virtual const std::string &getInstanceName() { return iniSection; }
 
-    // return the configuration hierarchy node for this context.  Bare
-    // ParamContext objects have no corresponding node, so the default
-    // implementation returns NULL.
-    virtual ConfigNode *getConfigNode() { return NULL; }
-
     // Parse all parameters registered with all ParamContext objects.
     static void parseAllContexts(IniFile &iniFile);
 
diff --git a/src/sim/process.cc b/src/sim/process.cc
index 5080c3ac1..f989300a3 100644
--- a/src/sim/process.cc
+++ b/src/sim/process.cc
@@ -326,11 +326,10 @@ LiveProcess::argsInit(int intSize, int pageSize)
     // set bottom of stack
     stack_min = stack_base - space_needed;
     // align it
-    stack_min &= ~(intSize-1);
+    stack_min = roundDown(stack_min, pageSize);
     stack_size = stack_base - stack_min;
     // map memory
-    pTable->allocate(roundDown(stack_min, pageSize),
-                     roundUp(stack_size, pageSize));
+    pTable->allocate(stack_min, roundUp(stack_size, pageSize));
 
     // map out initial stack contents
     Addr argv_array_base = stack_min + intSize; // room for argc
@@ -359,7 +358,10 @@ LiveProcess::argsInit(int intSize, int pageSize)
     Addr prog_entry = objFile->entryPoint();
     threadContexts[0]->setPC(prog_entry);
     threadContexts[0]->setNextPC(prog_entry + sizeof(MachInst));
+
+#if THE_ISA != ALPHA_ISA //e.g. MIPS or Sparc
     threadContexts[0]->setNextNPC(prog_entry + (2 * sizeof(MachInst)));
+#endif
 
     num_processes++;
 }
diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc
index ae52cdd41..fcf0b957a 100644
--- a/src/sim/pseudo_inst.cc
+++ b/src/sim/pseudo_inst.cc
@@ -37,7 +37,6 @@
 #include "sim/pseudo_inst.hh"
 #include "arch/vtophys.hh"
 #include "cpu/base.hh"
-#include "cpu/sampler/sampler.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/quiesce_event.hh"
 #include "kern/kernel_stats.hh"
@@ -52,8 +51,6 @@
 
 using namespace std;
 
-extern Sampler *SampCPU;
-
 using namespace Stats;
 using namespace TheISA;
 
@@ -209,12 +206,7 @@ namespace AlphaPseudo
     {
         if (!doCheckpointInsts)
             return;
-
-
-        Tick when = curTick + delay * Clock::Int::ns;
-        Tick repeat = period * Clock::Int::ns;
-
-        Checkpoint::setup(when, repeat);
+        exitSimLoop("checkpoint");
     }
 
     uint64_t
@@ -286,7 +278,6 @@ namespace AlphaPseudo
 
     void switchcpu(ThreadContext *tc)
     {
-        if (SampCPU)
-            SampCPU->switchCPUs();
+        exitSimLoop("switchcpu");
     }
 }
diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc
index 5270802d1..6a1d084b7 100644
--- a/src/sim/serialize.cc
+++ b/src/sim/serialize.cc
@@ -25,7 +25,8 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Erik Hallnor
+ * Authors: Nathan Binkert
+ *          Erik Hallnor
  *          Steve Reinhardt
  */
 
@@ -44,7 +45,6 @@
 #include "base/output.hh"
 #include "base/str.hh"
 #include "base/trace.hh"
-#include "sim/config_node.hh"
 #include "sim/eventq.hh"
 #include "sim/param.hh"
 #include "sim/serialize.hh"
@@ -231,8 +231,9 @@ Globals::unserialize(Checkpoint *cp)
 }
 
 void
-Serializable::serializeAll()
+Serializable::serializeAll(const std::string &cpt_dir)
 {
+    setCheckpointDir(cpt_dir);
     string dir = Checkpoint::dir();
     if (mkdir(dir.c_str(), 0775) == -1 && errno != EEXIST)
             fatal("couldn't mkdir %s\n", dir);
@@ -244,56 +245,42 @@ Serializable::serializeAll()
 
     globals.serialize(outstream);
     SimObject::serializeAll(outstream);
-
-    assert(Serializable::ckptPrevCount + 1 == Serializable::ckptCount);
-    Serializable::ckptPrevCount++;
-    if (ckptMaxCount && ++ckptCount >= ckptMaxCount)
-        exitSimLoop(curTick + 1, "Maximum number of checkpoints dropped");
-
 }
 
-
 void
-Serializable::unserializeGlobals(Checkpoint *cp)
+Serializable::unserializeAll(const std::string &cpt_dir)
 {
-    globals.unserialize(cp);
-}
-
-
-class SerializeEvent : public Event
-{
-  protected:
-    Tick repeat;
+    setCheckpointDir(cpt_dir);
+    string dir = Checkpoint::dir();
+    string cpt_file = dir + Checkpoint::baseFilename;
+    string section = "";
 
-  public:
-    SerializeEvent(Tick _when, Tick _repeat);
-    virtual void process();
-    virtual void serialize(std::ostream &os)
-    {
-        panic("Cannot serialize the SerializeEvent");
-    }
+    DPRINTFR(Config, "Loading checkpoint dir '%s'\n",
+             dir);
+    Checkpoint *cp = new Checkpoint(dir, section);
+    unserializeGlobals(cp);
 
-};
-
-SerializeEvent::SerializeEvent(Tick _when, Tick _repeat)
-    : Event(&mainEventQueue, Serialize_Pri), repeat(_repeat)
-{
-    setFlags(AutoDelete);
-    schedule(_when);
+    SimObject::unserializeAll(cp);
 }
 
 void
-SerializeEvent::process()
+Serializable::unserializeGlobals(Checkpoint *cp)
 {
-    Serializable::serializeAll();
-    if (repeat)
-        schedule(curTick + repeat);
+    globals.unserialize(cp);
 }
 
 const char *Checkpoint::baseFilename = "m5.cpt";
 
 static string checkpointDirBase;
 
+void
+setCheckpointDir(const std::string &name)
+{
+    checkpointDirBase = name;
+    if (checkpointDirBase[checkpointDirBase.size() - 1] != '/')
+        checkpointDirBase += "/";
+}
+
 string
 Checkpoint::dir()
 {
@@ -304,75 +291,11 @@ Checkpoint::dir()
 }
 
 void
-Checkpoint::setup(Tick when, Tick period)
+debug_serialize(const std::string &cpt_dir)
 {
-    new SerializeEvent(when, period);
+    Serializable::serializeAll(cpt_dir);
 }
 
-class SerializeParamContext : public ParamContext
-{
-  private:
-    SerializeEvent *event;
-
-  public:
-    SerializeParamContext(const string &section);
-    ~SerializeParamContext();
-    void checkParams();
-};
-
-SerializeParamContext serialParams("serialize");
-
-Param<string> serialize_dir(&serialParams, "dir",
-                            "dir to stick checkpoint in "
-                            "(sprintf format with cycle #)");
-
-Param<Counter> serialize_cycle(&serialParams,
-                                "cycle",
-                                "cycle to serialize",
-                                0);
-
-Param<Counter> serialize_period(&serialParams,
-                                "period",
-                                "period to repeat serializations",
-                                0);
-
-Param<int> serialize_count(&serialParams, "count",
-                           "maximum number of checkpoints to drop");
-
-SerializeParamContext::SerializeParamContext(const string &section)
-    : ParamContext(section), event(NULL)
-{ }
-
-SerializeParamContext::~SerializeParamContext()
-{
-}
-
-void
-SerializeParamContext::checkParams()
-{
-    checkpointDirBase = simout.resolve(serialize_dir);
-
-    // guarantee that directory ends with a '/'
-    if (checkpointDirBase[checkpointDirBase.size() - 1] != '/')
-        checkpointDirBase += "/";
-
-    if (serialize_cycle > 0)
-        Checkpoint::setup(serialize_cycle, serialize_period);
-
-    Serializable::ckptMaxCount = serialize_count;
-}
-
-void
-debug_serialize()
-{
-    Serializable::serializeAll();
-}
-
-void
-debug_serialize(Tick when)
-{
-    new SerializeEvent(when, 0);
-}
 
 ////////////////////////////////////////////////////////////////////////
 //
@@ -442,9 +365,8 @@ Serializable::create(Checkpoint *cp, const std::string &section)
 }
 
 
-Checkpoint::Checkpoint(const std::string &cpt_dir, const std::string &path,
-                       const ConfigNode *_configNode)
-    : db(new IniFile), basePath(path), configNode(_configNode), cptDir(cpt_dir)
+Checkpoint::Checkpoint(const std::string &cpt_dir, const std::string &path)
+    : db(new IniFile), basePath(path), cptDir(cpt_dir)
 {
     string filename = cpt_dir + "/" + Checkpoint::baseFilename;
     if (!db->load(filename)) {
@@ -470,9 +392,6 @@ Checkpoint::findObj(const std::string &section, const std::string &entry,
     if (!db->find(section, entry, path))
         return false;
 
-    if ((value = configNode->resolveSimObject(path)) != NULL)
-        return true;
-
     if ((value = objMap[path]) != NULL)
         return true;
 
diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh
index 1eb721cf4..880fb0785 100644
--- a/src/sim/serialize.hh
+++ b/src/sim/serialize.hh
@@ -25,7 +25,8 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * Authors: Erik Hallnor
+ * Authors: Nathan Binkert
+ *          Erik Hallnor
  *          Steve Reinhardt
  */
 
@@ -42,8 +43,8 @@
 #include <map>
 
 #include "sim/host.hh"
-#include "sim/configfile.hh"
 
+class IniFile;
 class Serializable;
 class Checkpoint;
 
@@ -125,7 +126,8 @@ class Serializable
     static int ckptCount;
     static int ckptMaxCount;
     static int ckptPrevCount;
-    static void serializeAll();
+    static void serializeAll(const std::string &cpt_dir);
+    static void unserializeAll(const std::string &cpt_dir);
     static void unserializeGlobals(Checkpoint *cp);
 };
 
@@ -177,7 +179,7 @@ class SerializableClass
     // an optional config hierarchy node (specified by the third
     // argument).  A pointer to the new SerializableBuilder is returned.
     typedef Serializable *(*CreateFunc)(Checkpoint *cp,
-                                         const std::string &section);
+                                        const std::string &section);
 
     static std::map<std::string,CreateFunc> *classMap;
 
@@ -191,7 +193,7 @@ class SerializableClass
     // create Serializable given name of class and pointer to
     // configuration hierarchy node
     static Serializable *createObject(Checkpoint *cp,
-                                       const std::string &section);
+                                      const std::string &section);
 };
 
 //
@@ -203,18 +205,19 @@ class SerializableClass
 SerializableClass the##OBJ_CLASS##Class(CLASS_NAME,			   \
                                          OBJ_CLASS::createForUnserialize);
 
+void
+setCheckpointDir(const std::string &name);
+
 class Checkpoint
 {
   private:
 
     IniFile *db;
     const std::string basePath;
-    const ConfigNode *configNode;
     std::map<std::string, Serializable*> objMap;
 
   public:
-    Checkpoint(const std::string &cpt_dir, const std::string &path,
-               const ConfigNode *_configNode);
+    Checkpoint(const std::string &cpt_dir, const std::string &path);
 
     const std::string cptDir;
 
@@ -238,9 +241,6 @@ class Checkpoint
 
     // Filename for base checkpoint file within directory.
     static const char *baseFilename;
-
-    // Set up a checkpoint creation event or series of events.
-    static void setup(Tick when, Tick period = 0);
 };
 
 #endif // __SERIALIZE_HH__
diff --git a/src/sim/sim_events.cc b/src/sim/sim_events.cc
index b7901832d..d9e8bdeaa 100644
--- a/src/sim/sim_events.cc
+++ b/src/sim/sim_events.cc
@@ -78,6 +78,14 @@ exitSimLoop(const std::string &message, int exit_code)
     exitSimLoop(curTick, message, exit_code);
 }
 
+void
+CountedDrainEvent::process()
+{
+    if (--count == 0) {
+        exitSimLoop("Finished drain");
+    }
+}
+
 //
 // constructor: automatically schedules at specified time
 //
diff --git a/src/sim/sim_events.hh b/src/sim/sim_events.hh
index 4f305ad38..3c4a9dd05 100644
--- a/src/sim/sim_events.hh
+++ b/src/sim/sim_events.hh
@@ -44,6 +44,11 @@ class SimLoopExitEvent : public Event
     int code;
 
   public:
+    // Default constructor.  Only really used for derived classes.
+    SimLoopExitEvent()
+        : Event(&mainEventQueue, Sim_Exit_Pri)
+    { }
+
     SimLoopExitEvent(Tick _when, const std::string &_cause, int c = 0)
         : Event(&mainEventQueue, Sim_Exit_Pri), cause(_cause),
           code(c)
@@ -62,6 +67,22 @@ class SimLoopExitEvent : public Event
     virtual const char *description();
 };
 
+class CountedDrainEvent : public SimLoopExitEvent
+{
+  private:
+    // Count of how many objects have not yet drained
+    int count;
+  public:
+    CountedDrainEvent()
+        : count(0)
+    { }
+    void process();
+
+    void setCount(int _count) { count = _count; }
+
+    int getCount() { return count; }
+};
+
 //
 // Event class to terminate simulation after 'n' related events have
 // occurred using a shared counter: used to terminate when *all*
diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc
index 117ca9325..d12b06b7a 100644
--- a/src/sim/sim_object.cc
+++ b/src/sim/sim_object.cc
@@ -37,8 +37,6 @@
 #include "base/misc.hh"
 #include "base/trace.hh"
 #include "base/stats/events.hh"
-#include "base/serializer.hh"
-#include "sim/configfile.hh"
 #include "sim/host.hh"
 #include "sim/sim_object.hh"
 #include "sim/stats.hh"
@@ -74,6 +72,7 @@ SimObject::SimObject(Params *p)
 
     doRecordEvent = !Stats::event_ignore.match(name());
     simObjectList.push_back(this);
+    state = Running;
 }
 
 //
@@ -89,6 +88,7 @@ SimObject::SimObject(const string &_name)
 
     doRecordEvent = !Stats::event_ignore.match(name());
     simObjectList.push_back(this);
+    state = Running;
 }
 
 void
@@ -220,6 +220,24 @@ SimObject::serializeAll(ostream &os)
    }
 }
 
+void
+SimObject::unserializeAll(Checkpoint *cp)
+{
+    SimObjectList::reverse_iterator ri = simObjectList.rbegin();
+    SimObjectList::reverse_iterator rend = simObjectList.rend();
+
+    for (; ri != rend; ++ri) {
+        SimObject *obj = *ri;
+        DPRINTFR(Config, "Unserializing '%s'\n",
+                 obj->name());
+        if(cp->sectionExists(obj->name()))
+            obj->unserialize(cp, obj->name());
+        else
+            warn("Not unserializing '%s': no section found in checkpoint.\n",
+                 obj->name());
+   }
+}
+
 #ifdef DEBUG
 //
 // static function: flag which objects should have the debugger break
@@ -237,7 +255,6 @@ SimObject::debugObjectBreak(const string &objs)
    }
 }
 
-extern "C"
 void
 debugObjectBreak(const char *objs)
 {
@@ -252,10 +269,35 @@ SimObject::recordEvent(const std::string &stat)
         Stats::recordEvent(stat);
 }
 
+unsigned int
+SimObject::drain(Event *drain_event)
+{
+    state = Drained;
+    return 0;
+}
+
+void
+SimObject::resume()
+{
+    state = Running;
+}
+
+void
+SimObject::setMemoryMode(State new_mode)
+{
+    panic("setMemoryMode() should only be called on systems");
+}
+
+void
+SimObject::switchOut()
+{
+    panic("Unimplemented!");
+}
+
 void
-SimObject::drain(Serializer *serializer)
+SimObject::takeOverFrom(BaseCPU *cpu)
 {
-    serializer->signalDrained();
+    panic("Unimplemented!");
 }
 
 DEFINE_SIM_OBJECT_CLASS_NAME("SimObject", SimObject)
diff --git a/src/sim/sim_object.hh b/src/sim/sim_object.hh
index 84e9376a0..38f2bdd23 100644
--- a/src/sim/sim_object.hh
+++ b/src/sim/sim_object.hh
@@ -44,7 +44,8 @@
 #include "sim/serialize.hh"
 #include "sim/startup.hh"
 
-class Serializer;
+class BaseCPU;
+class Event;
 
 /*
  * Abstract superclass for simulation objects.  Represents things that
@@ -58,15 +59,25 @@ class SimObject : public Serializable, protected StartupCallback
         std::string name;
     };
 
+    enum State {
+        Running,
+        Draining,
+        Drained
+    };
+  private:
+    State state;
+
   protected:
     Params *_params;
 
+    void changeState(State new_state) { state = new_state; }
+
   public:
     const Params *params() const { return _params; }
 
-  private:
-    friend class Serializer;
+    State getState() { return state; }
 
+  private:
     typedef std::vector<SimObject *> SimObjectList;
 
     // list of all instantiated simulation objects
@@ -100,13 +111,18 @@ class SimObject : public Serializable, protected StartupCallback
 
     // static: call nameOut() & serialize() on all SimObjects
     static void serializeAll(std::ostream &);
+    static void unserializeAll(Checkpoint *cp);
 
     // Methods to drain objects in order to take checkpoints
     // Or switch from timing -> atomic memory model
-    virtual void drain(Serializer *serializer);
-    virtual void resume() { return;} ;
-    virtual void serializationComplete()
-    { assert(0 && "Unimplemented"); };
+    // Drain returns 0 if the simobject can drain immediately or
+    // the number of times the drain_event's process function will be called
+    // before the object will be done draining. Normally this should be 1
+    virtual unsigned int drain(Event *drain_event);
+    virtual void resume();
+    virtual void setMemoryMode(State new_mode);
+    virtual void switchOut();
+    virtual void takeOverFrom(BaseCPU *cpu);
 
 #ifdef DEBUG
   public:
diff --git a/src/sim/stat_control.cc b/src/sim/stat_control.cc
index f7fc03d74..041830ab7 100644
--- a/src/sim/stat_control.cc
+++ b/src/sim/stat_control.cc
@@ -221,8 +221,7 @@ SetupEvent(int flags, Tick when, Tick repeat)
 
 /* namespace Stats */ }
 
-extern "C" void
-debugDumpStats()
+void debugDumpStats()
 {
     Stats::DumpNow();
 }
diff --git a/src/sim/syscall_emul.cc b/src/sim/syscall_emul.cc
index 848b6f869..e72890612 100644
--- a/src/sim/syscall_emul.cc
+++ b/src/sim/syscall_emul.cc
@@ -27,7 +27,6 @@
  *
  * Authors: Steve Reinhardt
  *          Ali Saidi
- *          Korey Sewell
  */
 
 #include <fcntl.h>
@@ -92,7 +91,9 @@ SyscallReturn
 exitFunc(SyscallDesc *desc, int callnum, Process *process,
          ThreadContext *tc)
 {
-    exitSimLoop("target called exit()", tc->getSyscallArg(0) & 0xff);
+    if (tc->exit()) {
+        exitSimLoop("target called exit()", tc->getSyscallArg(0) & 0xff);
+    }
 
     return 1;
 }
diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh
index f027dbf24..a3ff006ef 100644
--- a/src/sim/syscall_emul.hh
+++ b/src/sim/syscall_emul.hh
@@ -27,14 +27,13 @@
  *
  * Authors: Steve Reinhardt
  *          Kevin Lim
- *          Korey Sewell
  */
 
 #ifndef __SIM_SYSCALL_EMUL_HH__
 #define __SIM_SYSCALL_EMUL_HH__
 
-#define BSD_HOST (defined(__APPLE__) || defined(__OpenBSD__) || \
-                  defined(__FreeBSD__))
+#define NO_STAT64 (defined(__APPLE__) || defined(__OpenBSD__) || \
+                   defined(__FreeBSD__) || defined(__CYGWIN__))
 
 ///
 /// @file syscall_emul.hh
@@ -507,7 +506,7 @@ fstat64Func(SyscallDesc *desc, int callnum, Process *process,
         return -EBADF;
     }
 
-#if BSD_HOST
+#if NO_STAT64
     struct stat  hostBuf;
     int result = fstat(process->sim_fd(fd), &hostBuf);
 #else
@@ -557,7 +556,7 @@ lstat64Func(SyscallDesc *desc, int callnum, Process *process,
     if (!tc->getMemPort()->tryReadString(path, tc->getSyscallArg(0)))
       return -EFAULT;
 
-#if BSD_HOST
+#if NO_STAT64
     struct stat hostBuf;
     int result = lstat(path.c_str(), &hostBuf);
 #else
diff --git a/src/sim/system.cc b/src/sim/system.cc
index b3c7870fd..ad70b9b03 100644
--- a/src/sim/system.cc
+++ b/src/sim/system.cc
@@ -63,7 +63,7 @@ System::System(Params *p)
 #else
       page_ptr(0),
 #endif
-      _params(p)
+      memoryMode(p->mem_mode), _params(p)
 {
     // add self to global system list
     systemList.push_back(this);
@@ -119,8 +119,6 @@ System::System(Params *p)
     DPRINTF(Loader, "Kernel end   = %#x\n", kernelEnd);
     DPRINTF(Loader, "Kernel entry = %#x\n", kernelEntry);
     DPRINTF(Loader, "Kernel loaded...\n");
-
-    kernelBinning = new Kernel::Binning(this);
 #endif // FULL_SYSTEM
 
     // increment the number of running systms
@@ -145,6 +143,14 @@ int rgdb_wait = -1;
 
 #endif // FULL_SYSTEM
 
+
+void
+System::setMemoryMode(MemoryMode mode)
+{
+    assert(getState() == Drained);
+    memoryMode = mode;
+}
+
 int
 System::registerThreadContext(ThreadContext *tc, int id)
 {
@@ -245,13 +251,15 @@ System::printSystems()
     }
 }
 
-extern "C"
 void
 printSystems()
 {
     System::printSystems();
 }
 
+const char *System::MemoryModeStrings[3] = {"invalid", "atomic",
+    "timing"};
+
 #if FULL_SYSTEM
 
 // In full system mode, only derived classes (e.g. AlphaLinuxSystem)
@@ -264,12 +272,15 @@ DEFINE_SIM_OBJECT_CLASS_NAME("System", System)
 BEGIN_DECLARE_SIM_OBJECT_PARAMS(System)
 
     SimObjectParam<PhysicalMemory *> physmem;
+    SimpleEnumParam<System::MemoryMode> mem_mode;
 
 END_DECLARE_SIM_OBJECT_PARAMS(System)
 
 BEGIN_INIT_SIM_OBJECT_PARAMS(System)
 
-    INIT_PARAM(physmem, "physical memory")
+    INIT_PARAM(physmem, "physical memory"),
+    INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)",
+            System::MemoryModeStrings)
 
 END_INIT_SIM_OBJECT_PARAMS(System)
 
@@ -278,6 +289,7 @@ CREATE_SIM_OBJECT(System)
     System::Params *p = new System::Params;
     p->name = getInstanceName();
     p->physmem = physmem;
+    p->mem_mode = mem_mode;
     return new System(p);
 }
 
diff --git a/src/sim/system.hh b/src/sim/system.hh
index 059dc92dc..a1b53c2eb 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -61,6 +61,23 @@ class RemoteGDB;
 class System : public SimObject
 {
   public:
+    enum MemoryMode {
+        Invalid=0,
+        Atomic,
+        Timing
+    };
+
+    static const char *MemoryModeStrings[3];
+
+
+    MemoryMode getMemoryMode() { assert(memoryMode); return memoryMode; }
+
+    /** Change the memory mode of the system. This should only be called by the
+     * python!!
+     * @param mode Mode to change to (atomic/timing)
+     */
+    void setMemoryMode(MemoryMode mode);
+
     PhysicalMemory *physmem;
     PCEventQueue pcEventQueue;
 
@@ -108,6 +125,8 @@ class System : public SimObject
 
   protected:
 
+    MemoryMode memoryMode;
+
 #if FULL_SYSTEM
     /**
      * Fix up an address used to match PCs for hooking simulator
@@ -153,6 +172,7 @@ class System : public SimObject
     {
         std::string name;
         PhysicalMemory *physmem;
+        MemoryMode mem_mode;
 
 #if FULL_SYSTEM
         Tick boot_cpu_frequency;
diff --git a/util/rundiff b/util/rundiff
index 533f448b1..c34bb53a3 100755
--- a/util/rundiff
+++ b/util/rundiff
@@ -39,7 +39,7 @@
 # "filename" is a pipe (|).  Thus to compare the instruction traces
 # from two versions of m5 (m5a and m5b), you can do this:
 #
-# rundiff 'm5a --trace:flags=InstExec |' 'm5b --trace:flags=InstExec |'
+# rundiff 'm5a --traceflags=InstExec |' 'm5b --traceflags=InstExec |'
 #
 
 use strict;
diff --git a/util/tracediff b/util/tracediff
index f11431293..a7efc260d 100755
--- a/util/tracediff
+++ b/util/tracediff
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright (c) 2003-2005 The Regents of The University of Michigan
+# Copyright (c) 2003-2006 The Regents of The University of Michigan
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -36,7 +36,7 @@
 # If you want to pass different arguments to the two instances of m5,
 # you can embed them in the simulator arguments like this:
 #
-# % tracediff "m5.opt --foo.bar=1" "m5.opt --foo.bar=2" [common args]
+# % tracediff "m5.opt --option1" "m5.opt --option2" [common args]
 #
 
 if (@ARGV < 2) {
author	Gabe Black <gblack@eecs.umich.edu>	2006-07-18 18:23:23 -0400
committer	Gabe Black <gblack@eecs.umich.edu>	2006-07-18 18:23:23 -0400
commit	44974a4462e019cfc5c65d20ad620faa9bc7f8cf (patch)
tree	94f25a8a565021f97cbf6f28a37accdf157bbafc
parent	15a8f050605919579e81b6abb98a0b596334216d (diff)
parent	fe9e851e8c0a52ee412350036c94cc61c9b8dc04 (diff)
download	gem5-44974a4462e019cfc5c65d20ad620faa9bc7f8cf.tar.xz