130 files changed, 1604 insertions, 1481 deletions
diff --git a/SConstruct b/SConstruct
index 50089700a..dac4d137c 100644
--- a/SConstruct
+++ b/SConstruct
@@ -347,7 +347,10 @@ sticky_opts.AddOptions(
     ('CC', 'C compiler', os.environ.get('CC', env['CC'])),
     ('CXX', 'C++ compiler', os.environ.get('CXX', env['CXX'])),
     BoolOption('BATCH', 'Use batch pool for build and tests', False),
-    ('BATCH_CMD', 'Batch pool submission command name', 'qdo')
+    ('BATCH_CMD', 'Batch pool submission command name', 'qdo'),
+    ('PYTHONHOME',
+     'Override the default PYTHONHOME for this system (use with caution)',
+     '%s:%s' % (sys.prefix, sys.exec_prefix))
     )
 
 # Non-sticky options only apply to the current build.
@@ -359,7 +362,7 @@ nonsticky_opts.AddOptions(
 # These options get exported to #defines in config/*.hh (see src/SConscript).
 env.ExportOptions = ['FULL_SYSTEM', 'ALPHA_TLASER', 'USE_FENV', \
                      'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP', \
-                     'USE_CHECKER']
+                     'USE_CHECKER', 'PYTHONHOME']
 
 # Define a handy 'no-op' action
 def no_action(target, source, env):
@@ -399,8 +402,13 @@ def config_emitter(target, source, env):
     option = str(target[0])
     # True target is config header file
     target = os.path.join('config', option.lower() + '.hh')
-    # Force value to 0/1 even if it's a Python bool
-    val = int(eval(str(env[option])))
+    val = env[option]
+    if isinstance(val, bool):
+        # Force value to 0/1
+        val = int(val)
+    elif isinstance(val, str):
+        val = '"' + val + '"'
+        
     # Sources are option name & value (packaged in SCons Value nodes)
     return ([target], [Value(option), Value(val)])
 
diff --git a/configs/common/Benchmarks.py b/configs/common/Benchmarks.py
index bb1ac1ab5..1f272517a 100644
--- a/configs/common/Benchmarks.py
+++ b/configs/common/Benchmarks.py
@@ -28,7 +28,7 @@
 
 from SysPaths import *
 
-class Machine:
+class SysConfig:
     def __init__(self, script=None, mem=None, disk=None):
         self.scriptname = script
         self.diskname = disk
@@ -52,54 +52,54 @@ class Machine:
         else:
             return env.get('LINUX_IMAGE', disk('linux-latest.img'))
 
-#Benchmarks are defined as a key in a dict which is a list of Machines
+# Benchmarks are defined as a key in a dict which is a list of SysConfigs
 # The first defined machine is the test system, the others are driving systems
-# Currently there is only support for 1 or 2 machines
 
-Benchmarks = {}
-Benchmarks['PovrayBench']       = [Machine('povray-bench.rcS', '512MB', 'povray.img')]
-Benchmarks['PovrayAutumn']      = [Machine('povray-autumn.rcS', '512MB', 'povray.img')]
-Benchmarks['NetperfStream']     = [Machine('netperf-stream-client.rcS'),
-                                   Machine('netperf-server.rcS')]
-Benchmarks['NetperfStreamNT']   = [Machine('netperf-stream-nt-client.rcS'),
-                                   Machine('netperf-server.rcS')]
-Benchmarks['NetperfMaerts']     = [Machine('netperf-maerts-client.rcS'),
-                                   Machine('netperf-server.rcS')]
-Benchmarks['SurgeStandard']     = [Machine('surge-server.rcS', '512MB'),
-                                   Machine('surge-client.rcS', '256MB')]
-Benchmarks['SurgeSpecweb']      = [Machine('spec-surge-server.rcS', '512MB'),
-                                   Machine('spec-surge-client.rcS', '256MB')]
-Benchmarks['Nhfsstone']         = [Machine('nfs-server-nhfsstone.rcS', '512MB'),
-                                   Machine('nfs-client-nhfsstone.rcS')]
-Benchmarks['Nfs']               = [Machine('nfs-server.rcS', '900MB'),
-                                   Machine('nfs-client-dbench.rcS')]
-Benchmarks['NfsTcp']            = [Machine('nfs-server.rcS', '900MB'),
-                                   Machine('nfs-client-tcp.rcS')]
-Benchmarks['IScsiInitiator']    = [Machine('iscsi-client.rcS', '512MB'),
-                                   Machine('iscsi-server.rcS', '512MB')]
-Benchmarks['IScsiTarget']       = [Machine('iscsi-server.rcS', '512MB'),
-                                   Machine('iscsi-client.rcS', '512MB')]
-Benchmarks['Validation']        = [Machine('iscsi-server.rcS', '512MB'),
-                                   Machine('iscsi-client.rcS', '512MB')]
-Benchmarks['Ping']              = [Machine('ping-server.rcS',),
-                                   Machine('ping-client.rcS')]
+Benchmarks = {
+    'PovrayBench':  [SysConfig('povray-bench.rcS', '512MB', 'povray.img')],
+    'PovrayAutumn': [SysConfig('povray-autumn.rcS', '512MB', 'povray.img')],
 
+    'NetperfStream':	[SysConfig('netperf-stream-client.rcS'),
+                         SysConfig('netperf-server.rcS')],
+    'NetperfStreamNT':	[SysConfig('netperf-stream-nt-client.rcS'),
+                         SysConfig('netperf-server.rcS')],
+    'NetperfMaerts':	[SysConfig('netperf-maerts-client.rcS'),
+                         SysConfig('netperf-server.rcS')],
+    'SurgeStandard':	[SysConfig('surge-server.rcS', '512MB'),
+                         SysConfig('surge-client.rcS', '256MB')],
+    'SurgeSpecweb':	[SysConfig('spec-surge-server.rcS', '512MB'),
+                         SysConfig('spec-surge-client.rcS', '256MB')],
+    'Nhfsstone':	[SysConfig('nfs-server-nhfsstone.rcS', '512MB'),
+                         SysConfig('nfs-client-nhfsstone.rcS')],
+    'Nfs':		[SysConfig('nfs-server.rcS', '900MB'),
+                         SysConfig('nfs-client-dbench.rcS')],
+    'NfsTcp':		[SysConfig('nfs-server.rcS', '900MB'),
+                         SysConfig('nfs-client-tcp.rcS')],
+    'IScsiInitiator':	[SysConfig('iscsi-client.rcS', '512MB'),
+                         SysConfig('iscsi-server.rcS', '512MB')],
+    'IScsiTarget':	[SysConfig('iscsi-server.rcS', '512MB'),
+                         SysConfig('iscsi-client.rcS', '512MB')],
+    'Validation':	[SysConfig('iscsi-server.rcS', '512MB'),
+                         SysConfig('iscsi-client.rcS', '512MB')],
+    'Ping':		[SysConfig('ping-server.rcS',),
+                         SysConfig('ping-client.rcS')],
 
-Benchmarks['ValAccDelay']	= [Machine('devtime.rcS', '512MB')]
-Benchmarks['ValAccDelay2']	= [Machine('devtimewmr.rcS', '512MB')]
-Benchmarks['ValMemLat']         = [Machine('micro_memlat.rcS', '512MB')]
-Benchmarks['ValMemLat2MB']	= [Machine('micro_memlat2mb.rcS', '512MB')]
-Benchmarks['ValMemLat8MB']	= [Machine('micro_memlat8mb.rcS', '512MB')]
-Benchmarks['ValMemLat']         = [Machine('micro_memlat8.rcS', '512MB')]
-Benchmarks['ValTlbLat']         = [Machine('micro_tlblat.rcS', '512MB')]
-Benchmarks['ValSysLat']         = [Machine('micro_syscall.rcS', '512MB')]
-Benchmarks['ValCtxLat']         = [Machine('micro_ctx.rcS', '512MB')]
-Benchmarks['ValStream']         = [Machine('micro_stream.rcS', '512MB')]
-Benchmarks['ValStreamScale']	= [Machine('micro_streamscale.rcS', '512MB')]
-Benchmarks['ValStreamCopy']	= [Machine('micro_streamcopy.rcS', '512MB')]
+    'ValAccDelay':	[SysConfig('devtime.rcS', '512MB')],
+    'ValAccDelay2':	[SysConfig('devtimewmr.rcS', '512MB')],
+    'ValMemLat':	[SysConfig('micro_memlat.rcS', '512MB')],
+    'ValMemLat2MB':	[SysConfig('micro_memlat2mb.rcS', '512MB')],
+    'ValMemLat8MB':	[SysConfig('micro_memlat8mb.rcS', '512MB')],
+    'ValMemLat':	[SysConfig('micro_memlat8.rcS', '512MB')],
+    'ValTlbLat':	[SysConfig('micro_tlblat.rcS', '512MB')],
+    'ValSysLat':	[SysConfig('micro_syscall.rcS', '512MB')],
+    'ValCtxLat':	[SysConfig('micro_ctx.rcS', '512MB')],
+    'ValStream':	[SysConfig('micro_stream.rcS', '512MB')],
+    'ValStreamScale':	[SysConfig('micro_streamscale.rcS', '512MB')],
+    'ValStreamCopy':	[SysConfig('micro_streamcopy.rcS', '512MB')],
 
-
-Benchmarks['bnAn']              = [Machine('/z/saidi/work/m5.newmem.head/configs/boot/bn-app.rcS', '128MB', '/z/saidi/work/bottleneck/bnimg.img')]
+    'bnAn': [SysConfig('/z/saidi/work/m5.newmem.head/configs/boot/bn-app.rcS',
+                       '128MB', '/z/saidi/work/bottleneck/bnimg.img')]
+}
 
 benchs = Benchmarks.keys()
 benchs.sort()
diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py
index 470dc8867..05888b10b 100644
--- a/configs/common/FSConfig.py
+++ b/configs/common/FSConfig.py
@@ -49,7 +49,7 @@ def makeLinuxAlphaSystem(mem_mode, mdesc = None):
     self = LinuxAlphaSystem()
     if not mdesc:
         # generic system
-        mdesc = Machine()
+        mdesc = SysConfig()
     self.readfile = mdesc.script()
     self.iobus = Bus(bus_id=0)
     self.membus = Bus(bus_id=1)
diff --git a/configs/example/fs.py b/configs/example/fs.py
index 460fb68fb..a5b8772af 100644
--- a/configs/example/fs.py
+++ b/configs/example/fs.py
@@ -42,6 +42,8 @@ parser = optparse.OptionParser()
 
 parser.add_option("-d", "--detailed", action="store_true")
 parser.add_option("-t", "--timing", action="store_true")
+parser.add_option("-n", "--num_cpus", type="int", default=1)
+parser.add_option("--caches", action="store_true")
 parser.add_option("-m", "--maxtick", type="int")
 parser.add_option("--maxtime", type="float")
 parser.add_option("--dual", action="store_true",
@@ -64,53 +66,63 @@ if args:
     print "Error: script doesn't take any positional arguments"
     sys.exit(1)
 
+class MyCache(BaseCache):
+    assoc = 2
+    block_size = 64
+    latency = 1
+    mshrs = 10
+    tgts_per_mshr = 5
+    protocol = CoherenceProtocol(protocol='moesi')
+
+# client system CPU is always simple... note this is an assignment of
+# a class, not an instance.
+ClientCPUClass = AtomicSimpleCPU
+client_mem_mode = 'atomic'
+
 if options.detailed:
-    cpu = DerivO3CPU()
-    cpu2 = DerivO3CPU()
-    mem_mode = 'timing'
+    ServerCPUClass = DerivO3CPU
+    server_mem_mode = 'timing'
 elif options.timing:
-    cpu = TimingSimpleCPU()
-    cpu2 = TimingSimpleCPU()
-    mem_mode = 'timing'
+    ServerCPUClass = TimingSimpleCPU
+    server_mem_mode = 'timing'
 else:
-    cpu = AtomicSimpleCPU()
-    cpu2 = AtomicSimpleCPU()
-    mem_mode = 'atomic'
+    ServerCPUClass = AtomicSimpleCPU
+    server_mem_mode = 'atomic'
 
-cpu.clock = '2GHz'
-cpu2.clock = '2GHz'
-cpu.cpu_id = 0
-cpu2.cpu_id = 0
+ServerCPUClass.clock = '2GHz'
+ClientCPUClass.clock = '2GHz'
 
 if options.benchmark:
-    if options.benchmark not in Benchmarks:
+    try:
+        bm = Benchmarks[options.benchmark]
+    except KeyError:
         print "Error benchmark %s has not been defined." % options.benchmark
         print "Valid benchmarks are: %s" % DefinedBenchmarks
         sys.exit(1)
-
-    bm = Benchmarks[options.benchmark]
 else:
     if options.dual:
-        bm = [Machine(), Machine()]
+        bm = [SysConfig(), SysConfig()]
     else:
-        bm = [Machine()]
+        bm = [SysConfig()]
+
+server_sys = makeLinuxAlphaSystem(server_mem_mode, bm[0])
+np = options.num_cpus
+server_sys.cpu = [ServerCPUClass(cpu_id=i) for i in xrange(np)]
+for i in xrange(np):
+    if options.caches:
+        server_sys.cpu[i].addPrivateSplitL1Caches(MyCache(size = '32kB'),
+                                                  MyCache(size = '64kB'))
+    server_sys.cpu[i].connectMemPorts(server_sys.membus)
+    server_sys.cpu[i].mem = server_sys.physmem
 
 if len(bm) == 2:
-    s1 = makeLinuxAlphaSystem(mem_mode, bm[0])
-    s1.cpu = cpu
-    cpu.connectMemPorts(s1.membus)
-    cpu.mem = s1.physmem
-    s2 = makeLinuxAlphaSystem(mem_mode, bm[1])
-    s2.cpu = cpu2
-    cpu2.connectMemPorts(s2.membus)
-    cpu2.mem = s2.physmem
-    root = makeDualRoot(s1, s2, options.etherdump)
+    client_sys = makeLinuxAlphaSystem(client_mem_mode, bm[1])
+    client_sys.cpu = ClientCPUClass(cpu_id=0)
+    client_sys.cpu.connectMemPorts(client_sys.membus)
+    client_sys.cpu.mem = client_sys.physmem
+    root = makeDualRoot(server_sys, client_sys, options.etherdump)
 elif len(bm) == 1:
-    root = Root(clock = '1THz',
-                system = makeLinuxAlphaSystem(mem_mode, bm[0]))
-    root.system.cpu = cpu
-    cpu.connectMemPorts(root.system.membus)
-    cpu.mem = root.system.physmem
+    root = Root(clock = '1THz', system = server_sys)
 else:
     print "Error I don't know how to create more than 2 systems."
     sys.exit(1)
diff --git a/configs/example/memtest.py b/configs/example/memtest.py
new file mode 100644
index 000000000..141ecfd8e
--- /dev/null
+++ b/configs/example/memtest.py
@@ -0,0 +1,138 @@
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Ron Dreslinski
+
+import m5
+from m5.objects import *
+import os, optparse, sys
+m5.AddToPath('../common')
+
+parser = optparse.OptionParser()
+
+parser.add_option("--caches", action="store_true")
+parser.add_option("-t", "--timing", action="store_true")
+parser.add_option("-m", "--maxtick", type="int")
+parser.add_option("-l", "--maxloads", default = "1000000000000", type="int")
+parser.add_option("-n", "--numtesters", default = "8", type="int")
+parser.add_option("-p", "--protocol",
+                  default="moesi",
+                  help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)")
+
+(options, args) = parser.parse_args()
+
+if args:
+     print "Error: script doesn't take any positional arguments"
+     sys.exit(1)
+
+# --------------------
+# Base L1 Cache
+# ====================
+
+class L1(BaseCache):
+    latency = 1
+    block_size = 64
+    mshrs = 12
+    tgts_per_mshr = 8
+    protocol = CoherenceProtocol(protocol=options.protocol)
+
+# ----------------------
+# Base L2 Cache
+# ----------------------
+
+class L2(BaseCache):
+    block_size = 64
+    latency = 10
+    mshrs = 92
+    tgts_per_mshr = 16
+    write_buffers = 8
+
+#MAX CORES IS 8 with the false sharing method
+if options.numtesters > 8:
+     print "Error: NUmber of testers limited to 8 because of false sharing"
+     sys,exit(1)
+
+if options.timing:
+     cpus = [ MemTest(atomic=False, max_loads=options.maxloads, percent_functional=50,
+                      percent_uncacheable=10, progress_interval=1000)
+              for i in xrange(options.numtesters) ]
+else:
+     cpus = [ MemTest(atomic=True, max_loads=options.maxloads, percent_functional=50,
+                      percent_uncacheable=10, progress_interval=1000)
+              for i in xrange(options.numtesters) ]
+# system simulated
+system = System(cpu = cpus, funcmem = PhysicalMemory(),
+                physmem = PhysicalMemory(latency = "50ps"), membus = Bus(clock="500GHz", width=16))
+
+# l2cache & bus
+if options.caches:
+    system.toL2Bus = Bus(clock="500GHz", width=16)
+    system.l2c = L2(size='64kB', assoc=8)
+    system.l2c.cpu_side = system.toL2Bus.port
+
+    # connect l2c to membus
+    system.l2c.mem_side = system.membus.port
+
+which_port = 0
+# add L1 caches
+for cpu in cpus:
+    if options.caches:
+         cpu.l1c = L1(size = '32kB', assoc = 4)
+         cpu.test = cpu.l1c.cpu_side
+         cpu.l1c.mem_side = system.toL2Bus.port
+    else:
+         cpu.test = system.membus.port
+    if  which_port == 0:
+         system.funcmem.port = cpu.functional
+         which_port = 1
+    else:
+         system.funcmem.functional = cpu.functional
+
+
+# connect memory to membus
+system.physmem.port = system.membus.port
+
+
+# -----------------------
+# run simulation
+# -----------------------
+
+root = Root( system = system )
+if options.timing:
+    root.system.mem_mode = 'timing'
+else:
+    root.system.mem_mode = 'atomic'
+
+# instantiate configuration
+m5.instantiate(root)
+
+# simulate until program terminates
+if options.maxtick:
+    exit_event = m5.simulate(options.maxtick)
+else:
+    exit_event = m5.simulate()
+
+print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()
diff --git a/src/arch/alpha/isa/main.isa b/src/arch/alpha/isa/main.isa
index 1270bf8d8..2024b1117 100644
--- a/src/arch/alpha/isa/main.isa
+++ b/src/arch/alpha/isa/main.isa
@@ -45,20 +45,22 @@ output header {{
 #include <iostream>
 #include <iomanip>
 
+#include "arch/alpha/faults.hh"
 #include "config/ss_compatible_fp.hh"
 #include "cpu/static_inst.hh"
-#include "arch/alpha/faults.hh"
 #include "mem/request.hh"  // some constructors use MemReq flags
+#include "mem/packet.hh"
 }};
 
 output decoder {{
+#include <cmath>
+
 #include "base/cprintf.hh"
 #include "base/fenv.hh"
 #include "base/loader/symtab.hh"
 #include "config/ss_compatible_fp.hh"
 #include "cpu/thread_context.hh"  // for Jump::branchTarget()
-
-#include <math.h>
+#include "mem/packet.hh"
 
 using namespace AlphaISA;
 }};
@@ -73,8 +75,9 @@ output exec {{
 #include "config/ss_compatible_fp.hh"
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/sim_exit.hh"
-#include "mem/packet_impl.hh"
 
 using namespace AlphaISA;
 }};
diff --git a/src/arch/alpha/isa/mem.isa b/src/arch/alpha/isa/mem.isa
index fe69c36a5..02291ed6b 100644
--- a/src/arch/alpha/isa/mem.isa
+++ b/src/arch/alpha/isa/mem.isa
@@ -186,7 +186,7 @@ def template InitiateAccDeclare {{
 
 
 def template CompleteAccDeclare {{
-    Fault completeAcc(Packet *, %(CPU_exec_context)s *,
+    Fault completeAcc(PacketPtr, %(CPU_exec_context)s *,
                       Trace::InstRecord *) const;
 }};
 
@@ -313,7 +313,7 @@ def template LoadInitiateAcc {{
 
 
 def template LoadCompleteAcc {{
-    Fault %(class_name)s::completeAcc(Packet *pkt,
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
                                       %(CPU_exec_context)s *xc,
                                       Trace::InstRecord *traceData) const
     {
@@ -437,7 +437,7 @@ def template StoreInitiateAcc {{
 
 
 def template StoreCompleteAcc {{
-    Fault %(class_name)s::completeAcc(Packet *pkt,
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
                                       %(CPU_exec_context)s *xc,
                                       Trace::InstRecord *traceData) const
     {
@@ -460,7 +460,7 @@ def template StoreCompleteAcc {{
 
 
 def template StoreCondCompleteAcc {{
-    Fault %(class_name)s::completeAcc(Packet *pkt,
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
                                       %(CPU_exec_context)s *xc,
                                       Trace::InstRecord *traceData) const
     {
@@ -535,7 +535,7 @@ def template MiscInitiateAcc {{
 
 
 def template MiscCompleteAcc {{
-    Fault %(class_name)s::completeAcc(Packet *pkt,
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
                                       %(CPU_exec_context)s *xc,
                                       Trace::InstRecord *traceData) const
     {
diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa
index d6b0c2938..e786cfbe2 100644
--- a/src/arch/mips/isa/formats/mem.isa
+++ b/src/arch/mips/isa/formats/mem.isa
@@ -162,7 +162,7 @@ def template InitiateAccDeclare {{
 
 
 def template CompleteAccDeclare {{
-    Fault completeAcc(Packet *, %(CPU_exec_context)s *, Trace::InstRecord *) const;
+    Fault completeAcc(PacketPtr, %(CPU_exec_context)s *, Trace::InstRecord *) const;
 }};
 
 
@@ -288,7 +288,7 @@ def template LoadInitiateAcc {{
 
 
 def template LoadCompleteAcc {{
-    Fault %(class_name)s::completeAcc(Packet *pkt,
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
                                       %(CPU_exec_context)s *xc,
                                       Trace::InstRecord *traceData) const
     {
@@ -412,7 +412,7 @@ def template StoreInitiateAcc {{
 
 
 def template StoreCompleteAcc {{
-    Fault %(class_name)s::completeAcc(Packet *pkt,
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
                                       %(CPU_exec_context)s *xc,
                                       Trace::InstRecord *traceData) const
     {
@@ -434,7 +434,7 @@ def template StoreCompleteAcc {{
 }};
 
 def template StoreCondCompleteAcc {{
-    Fault %(class_name)s::completeAcc(Packet *pkt,
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
                                       %(CPU_exec_context)s *xc,
                                       Trace::InstRecord *traceData) const
     {
@@ -509,7 +509,7 @@ def template MiscInitiateAcc {{
 
 
 def template MiscCompleteAcc {{
-    Fault %(class_name)s::completeAcc(Packet *pkt,
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
                                       %(CPU_exec_context)s *xc,
                                       Trace::InstRecord *traceData) const
     {
diff --git a/src/arch/mips/isa/includes.isa b/src/arch/mips/isa/includes.isa
index f58c8adaa..274fdcaa1 100644
--- a/src/arch/mips/isa/includes.isa
+++ b/src/arch/mips/isa/includes.isa
@@ -38,23 +38,25 @@ output header {{
 #include <iostream>
 #include <iomanip>
 
-#include "cpu/static_inst.hh"
 #include "arch/mips/isa_traits.hh"
+#include "cpu/static_inst.hh"
+#include "mem/packet.hh"
 }};
 
 output decoder {{
+#include <cmath>
+#if defined(linux)
+#include <fenv.h>
+#endif
+
+#include "arch/mips/faults.hh"
+#include "arch/mips/isa_traits.hh"
+#include "arch/mips/isa_traits.hh"
 #include "arch/mips/isa_traits.hh"
+#include "arch/mips/utility.hh"
 #include "base/cprintf.hh"
 #include "base/loader/symtab.hh"
 #include "cpu/thread_context.hh"
-#include "arch/mips/faults.hh"
-#include "arch/mips/isa_traits.hh"
-#include "arch/mips/utility.hh"
-
-#include <math.h>
-#if defined(linux)
-#include <fenv.h>
-#endif
 
 using namespace MipsISA;
 }};
@@ -74,8 +76,9 @@ output exec {{
 #endif
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/sim_exit.hh"
-#include "mem/packet_impl.hh"
 
 using namespace MipsISA;
 }};
diff --git a/src/base/traceflags.py b/src/base/traceflags.py
index 757c9e7b7..298d22c2b 100644
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@@ -182,7 +182,8 @@ compoundFlagMap = {
     'EthernetNoData' : [ 'Ethernet', 'EthernetPIO', 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ],
     'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ],
     'O3CPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'O3CPU', 'Activity','Scoreboard','Writeback'],
-    'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU']
+    'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU'],
+    'All' : baseFlags
 }
 
 #############################################################
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index f6d56eef6..9cb6b032e 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -34,10 +34,8 @@
 #include "cpu/base.hh"
 #include "cpu/checker/cpu.hh"
 #include "cpu/simple_thread.hh"
-#include "cpu/thread_context.hh"
 #include "cpu/static_inst.hh"
-#include "mem/packet_impl.hh"
-#include "sim/byteswap.hh"
+#include "cpu/thread_context.hh"
 
 #if FULL_SYSTEM
 #include "arch/vtophys.hh"
@@ -171,7 +169,7 @@ CheckerCPU::read(Addr addr, T &data, unsigned flags)
     // translate to physical address
     translateDataReadReq(memReq);
 
-    Packet *pkt = new Packet(memReq, Packet::ReadReq, Packet::Broadcast);
+    PacketPtr pkt = new Packet(memReq, Packet::ReadReq, Packet::Broadcast);
 
     pkt->dataStatic(&data);
 
@@ -258,7 +256,7 @@ CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
         T inst_data;
 /*
         // This code would work if the LSQ allowed for snooping.
-        Packet *pkt = new Packet(memReq, Packet::ReadReq, Packet::Broadcast);
+        PacketPtr pkt = new Packet(memReq, Packet::ReadReq, Packet::Broadcast);
         pkt.dataStatic(&inst_data);
 
         dcachePort->sendFunctional(pkt);
diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh
index ad4f2c560..36c7349e6 100644
--- a/src/cpu/checker/cpu_impl.hh
+++ b/src/cpu/checker/cpu_impl.hh
@@ -37,8 +37,6 @@
 #include "cpu/simple_thread.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/static_inst.hh"
-#include "mem/packet_impl.hh"
-#include "sim/byteswap.hh"
 #include "sim/sim_object.hh"
 #include "sim/stats.hh"
 
@@ -183,7 +181,7 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
         }
 
         if (fault == NoFault) {
-            Packet *pkt = new Packet(memReq, Packet::ReadReq,
+            PacketPtr pkt = new Packet(memReq, Packet::ReadReq,
                                      Packet::Broadcast);
 
             pkt->dataStatic(&machInst);
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 024cd7e41..91e073cf0 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -38,42 +38,42 @@
 
 #include "base/misc.hh"
 #include "base/statistics.hh"
-//#include "cpu/simple_thread.hh"
 #include "cpu/memtest/memtest.hh"
+//#include "cpu/simple_thread.hh"
 //#include "mem/cache/base_cache.hh"
+#include "mem/mem_object.hh"
+#include "mem/port.hh"
+#include "mem/packet.hh"
 //#include "mem/physical.hh"
+#include "mem/request.hh"
 #include "sim/builder.hh"
 #include "sim/sim_events.hh"
 #include "sim/stats.hh"
-#include "mem/packet.hh"
-#include "mem/request.hh"
-#include "mem/port.hh"
-#include "mem/mem_object.hh"
 
 using namespace std;
 
 int TESTER_ALLOCATOR=0;
 
 bool
-MemTest::CpuPort::recvTiming(Packet *pkt)
+MemTest::CpuPort::recvTiming(PacketPtr pkt)
 {
     memtest->completeRequest(pkt);
     return true;
 }
 
 Tick
-MemTest::CpuPort::recvAtomic(Packet *pkt)
+MemTest::CpuPort::recvAtomic(PacketPtr pkt)
 {
     panic("MemTest doesn't expect recvAtomic callback!");
     return curTick;
 }
 
 void
-MemTest::CpuPort::recvFunctional(Packet *pkt)
+MemTest::CpuPort::recvFunctional(PacketPtr pkt)
 {
     //Do nothing if we see one come through
-    if (curTick != 0)//Supress warning durring initialization
-        warn("Functional Writes not implemented in MemTester\n");
+//    if (curTick != 0)//Supress warning durring initialization
+//        warn("Functional Writes not implemented in MemTester\n");
     //Need to find any response values that intersect and update
     return;
 }
@@ -94,7 +94,7 @@ MemTest::CpuPort::recvRetry()
 }
 
 void
-MemTest::sendPkt(Packet *pkt) {
+MemTest::sendPkt(PacketPtr pkt) {
     if (atomic) {
         cachePort.sendAtomic(pkt);
         pkt->makeAtomicResponse();
@@ -113,7 +113,7 @@ MemTest::MemTest(const string &name,
 //		 PhysicalMemory *check_mem,
                  unsigned _memorySize,
                  unsigned _percentReads,
-//		 unsigned _percentCopies,
+                 unsigned _percentFunctional,
                  unsigned _percentUncacheable,
                  unsigned _progressInterval,
                  unsigned _percentSourceUnaligned,
@@ -130,7 +130,7 @@ MemTest::MemTest(const string &name,
 //      checkMem(check_mem),
       size(_memorySize),
       percentReads(_percentReads),
-//      percentCopies(_percentCopies),
+      percentFunctional(_percentFunctional),
       percentUncacheable(_percentUncacheable),
       progressInterval(_progressInterval),
       nextProgressMessage(_progressInterval),
@@ -204,7 +204,7 @@ printData(ostream &os, uint8_t *data, int nbytes)
 }
 
 void
-MemTest::completeRequest(Packet *pkt)
+MemTest::completeRequest(PacketPtr pkt)
 {
     MemTestSenderState *state =
         dynamic_cast<MemTestSenderState *>(pkt->senderState);
@@ -345,8 +345,8 @@ MemTest::tick()
     } else {
         paddr = ((base) ? baseAddr1 : baseAddr2) + offset;
     }
-    //bool probe = (random() % 2 == 1) && !req->isUncacheable();
-    bool probe = false;
+    bool probe = (random() % 100 < percentFunctional) && !(flags & UNCACHEABLE);
+    //bool probe = false;
 
     paddr &= ~((1 << access_size) - 1);
     req->setPhys(paddr, 1 << access_size, flags);
@@ -381,13 +381,14 @@ MemTest::tick()
                  << dec << curTick << endl;
         }
 
-        Packet *pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+        PacketPtr pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
         pkt->dataDynamicArray(new uint8_t[req->getSize()]);
         MemTestSenderState *state = new MemTestSenderState(result);
         pkt->senderState = state;
 
         if (probe) {
             cachePort.sendFunctional(pkt);
+            pkt->makeAtomicResponse();
             completeRequest(pkt);
         } else {
 //	    req->completionEvent = new MemCompleteEvent(req, result, this);
@@ -420,7 +421,7 @@ MemTest::tick()
                  << dec << curTick << endl;
         }
 */
-        Packet *pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+        PacketPtr pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
         uint8_t *pkt_data = new uint8_t[req->getSize()];
         pkt->dataDynamicArray(pkt_data);
         memcpy(pkt_data, &data, req->getSize());
@@ -431,6 +432,7 @@ MemTest::tick()
 
         if (probe) {
             cachePort.sendFunctional(pkt);
+            pkt->makeAtomicResponse();
             completeRequest(pkt);
         } else {
 //	    req->completionEvent = new MemCompleteEvent(req, NULL, this);
@@ -499,7 +501,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(MemTest)
 //    SimObjectParam<PhysicalMemory *> check_mem;
     Param<unsigned> memory_size;
     Param<unsigned> percent_reads;
-//    Param<unsigned> percent_copies;
+    Param<unsigned> percent_functional;
     Param<unsigned> percent_uncacheable;
     Param<unsigned> progress_interval;
     Param<unsigned> percent_source_unaligned;
@@ -518,7 +520,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest)
 //    INIT_PARAM(check_mem, "check memory"),
     INIT_PARAM(memory_size, "memory size"),
     INIT_PARAM(percent_reads, "target read percentage"),
-//    INIT_PARAM(percent_copies, "target copy percentage"),
+    INIT_PARAM(percent_functional, "percentage of access that are functional"),
     INIT_PARAM(percent_uncacheable, "target uncacheable percentage"),
     INIT_PARAM(progress_interval, "progress report interval (in accesses)"),
     INIT_PARAM(percent_source_unaligned,
@@ -535,7 +537,7 @@ END_INIT_SIM_OBJECT_PARAMS(MemTest)
 CREATE_SIM_OBJECT(MemTest)
 {
     return new MemTest(getInstanceName(), /*cache->getInterface(),*/ /*main_mem,*/
-                       /*check_mem,*/ memory_size, percent_reads, /*percent_copies,*/
+                       /*check_mem,*/ memory_size, percent_reads, percent_functional,
                        percent_uncacheable, progress_interval,
                        percent_source_unaligned, percent_dest_unaligned,
                        trace_addr, max_loads, atomic);
diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh
index 5de41f0d8..edde4a3b2 100644
--- a/src/cpu/memtest/memtest.hh
+++ b/src/cpu/memtest/memtest.hh
@@ -55,7 +55,7 @@ class MemTest : public MemObject
 //	    PhysicalMemory *check_mem,
             unsigned _memorySize,
             unsigned _percentReads,
-//	    unsigned _percentCopies,
+            unsigned _percentFunctional,
             unsigned _percentUncacheable,
             unsigned _progressInterval,
             unsigned _percentSourceUnaligned,
@@ -102,11 +102,11 @@ class MemTest : public MemObject
 
       protected:
 
-        virtual bool recvTiming(Packet *pkt);
+        virtual bool recvTiming(PacketPtr pkt);
 
-        virtual Tick recvAtomic(Packet *pkt);
+        virtual Tick recvAtomic(PacketPtr pkt);
 
-        virtual void recvFunctional(Packet *pkt);
+        virtual void recvFunctional(PacketPtr pkt);
 
         virtual void recvStatusChange(Status status);
 
@@ -133,7 +133,7 @@ class MemTest : public MemObject
     };
 
 //    Request *dataReq;
-    Packet  *retryPkt;
+    PacketPtr retryPkt;
 //    MemInterface *cacheInterface;
 //    PhysicalMemory *mainMem;
 //    PhysicalMemory *checkMem;
@@ -144,7 +144,7 @@ class MemTest : public MemObject
     unsigned size;		// size of testing memory region
 
     unsigned percentReads;	// target percentage of read accesses
-//    unsigned percentCopies;	// target percentage of copy accesses
+    unsigned percentFunctional;	// target percentage of functional accesses
     unsigned percentUncacheable;
 
     int id;
@@ -184,9 +184,9 @@ class MemTest : public MemObject
     Stats::Scalar<> numCopiesStat;
 
     // called by MemCompleteEvent::process()
-    void completeRequest(Packet *pkt);
+    void completeRequest(PacketPtr pkt);
 
-    void sendPkt(Packet *pkt);
+    void sendPkt(PacketPtr pkt);
 
     void doRetry();
 
diff --git a/src/cpu/o3/alpha/dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh
index 9dee610b6..294aadde8 100644
--- a/src/cpu/o3/alpha/dyn_inst.hh
+++ b/src/cpu/o3/alpha/dyn_inst.hh
@@ -86,7 +86,7 @@ class AlphaDynInst : public BaseDynInst<Impl>
     Fault initiateAcc();
 
     /** Completes the access.  Only valid for memory operations. */
-    Fault completeAcc(Packet *pkt);
+    Fault completeAcc(PacketPtr pkt);
 
   private:
     /** Initializes variables. */
diff --git a/src/cpu/o3/alpha/dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh
index 2d1b4b309..b273a7b9b 100644
--- a/src/cpu/o3/alpha/dyn_inst_impl.hh
+++ b/src/cpu/o3/alpha/dyn_inst_impl.hh
@@ -100,7 +100,7 @@ AlphaDynInst<Impl>::initiateAcc()
 
 template <class Impl>
 Fault
-AlphaDynInst<Impl>::completeAcc(Packet *pkt)
+AlphaDynInst<Impl>::completeAcc(PacketPtr pkt)
 {
     this->fault = this->staticInst->completeAcc(pkt, this, this->traceData);
 
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 4c9a8e91f..367508288 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -850,9 +850,6 @@ template <class Impl>
 void
 FullO3CPU<Impl>::resume()
 {
-#if FULL_SYSTEM
-    assert(system->getMemoryMode() == System::Timing);
-#endif
     fetch.resume();
     decode.resume();
     rename.resume();
@@ -864,6 +861,10 @@ FullO3CPU<Impl>::resume()
     if (_status == SwitchedOut || _status == Idle)
         return;
 
+#if FULL_SYSTEM
+    assert(system->getMemoryMode() == System::Timing);
+#endif
+
     if (!tickEvent.scheduled())
         tickEvent.schedule(curTick);
     _status = Running;
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 280bf0e71..5555bff85 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -36,7 +36,7 @@
 #include "base/statistics.hh"
 #include "base/timebuf.hh"
 #include "cpu/pc_event.hh"
-#include "mem/packet_impl.hh"
+#include "mem/packet.hh"
 #include "mem/port.hh"
 #include "sim/eventq.hh"
 
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 147c670de..e7bf83b20 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -78,9 +78,12 @@ DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status)
 
 template<class Impl>
 bool
-DefaultFetch<Impl>::IcachePort::recvTiming(Packet *pkt)
+DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt)
 {
-    fetch->processCacheCompletion(pkt);
+    if (pkt->isResponse()) {
+        fetch->processCacheCompletion(pkt);
+    }
+    //else Snooped a coherence request, just return
     return true;
 }
 
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 7b7d1eb8e..317e23b14 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -63,7 +63,14 @@ template <class Impl>
 bool
 LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
 {
-    lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
+    if (pkt->isResponse()) {
+        lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
+    }
+    else {
+    //else it is a coherence request, maybe you need to do something
+        warn("Recieved a coherence request (Invalidate?), 03CPU doesn't"
+             "update LSQ for these\n");
+    }
     return true;
 }
 
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 11a02e7c7..1b207fdbc 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -40,7 +40,7 @@
 #include "config/full_system.hh"
 #include "base/hashmap.hh"
 #include "cpu/inst_seq.hh"
-#include "mem/packet_impl.hh"
+#include "mem/packet.hh"
 #include "mem/port.hh"
 
 /**
@@ -219,7 +219,7 @@ class LSQUnit {
     void writeback(DynInstPtr &inst, PacketPtr pkt);
 
     /** Handles completing the send of a store to memory. */
-    void storePostSend(Packet *pkt);
+    void storePostSend(PacketPtr pkt);
 
     /** Completes the store at the specified index. */
     void completeStore(int store_idx);
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 3f9db912f..d940d7cb3 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -763,7 +763,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
 
 template <class Impl>
 void
-LSQUnit<Impl>::storePostSend(Packet *pkt)
+LSQUnit<Impl>::storePostSend(PacketPtr pkt)
 {
     if (isStalled() &&
         storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh
index 06bdfcec4..aa30bfa1e 100755
--- a/src/cpu/o3/mips/dyn_inst.hh
+++ b/src/cpu/o3/mips/dyn_inst.hh
@@ -87,7 +87,7 @@ class MipsDynInst : public BaseDynInst<Impl>
     Fault initiateAcc();
 
     /** Completes the access.  Only valid for memory operations. */
-    Fault completeAcc(Packet *pkt);
+    Fault completeAcc(PacketPtr pkt);
 
   private:
     /** Initializes variables. */
diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh
index 57dec1ccf..5bc01b9b3 100755
--- a/src/cpu/o3/mips/dyn_inst_impl.hh
+++ b/src/cpu/o3/mips/dyn_inst_impl.hh
@@ -100,7 +100,7 @@ MipsDynInst<Impl>::initiateAcc()
 
 template <class Impl>
 Fault
-MipsDynInst<Impl>::completeAcc(Packet *pkt)
+MipsDynInst<Impl>::completeAcc(PacketPtr pkt)
 {
     this->fault = this->staticInst->completeAcc(pkt, this, this->traceData);
 
diff --git a/src/cpu/ozone/dyn_inst.hh b/src/cpu/ozone/dyn_inst.hh
index 75ac464ec..e7390626e 100644
--- a/src/cpu/ozone/dyn_inst.hh
+++ b/src/cpu/ozone/dyn_inst.hh
@@ -133,7 +133,7 @@ class OzoneDynInst : public BaseDynInst<Impl>
 
     Fault initiateAcc();
 
-    Fault completeAcc(Packet *pkt);
+    Fault completeAcc(PacketPtr pkt);
 
     // The register accessor methods provide the index of the
     // instruction's operand (e.g., 0 or 1), not the architectural
diff --git a/src/cpu/ozone/dyn_inst_impl.hh b/src/cpu/ozone/dyn_inst_impl.hh
index db1460eba..0a1e1c139 100644
--- a/src/cpu/ozone/dyn_inst_impl.hh
+++ b/src/cpu/ozone/dyn_inst_impl.hh
@@ -108,7 +108,7 @@ OzoneDynInst<Impl>::initiateAcc()
 
 template <class Impl>
 Fault
-OzoneDynInst<Impl>::completeAcc(Packet *pkt)
+OzoneDynInst<Impl>::completeAcc(PacketPtr pkt)
 {
     this->fault = this->staticInst->completeAcc(pkt, this, this->traceData);
 
diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh
index 59cf9785c..2bdca35b9 100644
--- a/src/cpu/ozone/front_end.hh
+++ b/src/cpu/ozone/front_end.hh
@@ -129,7 +129,7 @@ class FrontEnd
                 const bool is_branch = false, const bool branch_taken = false);
     DynInstPtr getInst();
 
-    void processCacheCompletion(Packet *pkt);
+    void processCacheCompletion(PacketPtr pkt);
 
     void addFreeRegs(int num_freed);
 
diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh
index 9eff8619d..36e87ec9c 100644
--- a/src/cpu/ozone/front_end_impl.hh
+++ b/src/cpu/ozone/front_end_impl.hh
@@ -74,7 +74,7 @@ FrontEnd<Impl>::IcachePort::recvStatusChange(Status status)
 
 template<class Impl>
 bool
-FrontEnd<Impl>::IcachePort::recvTiming(Packet *pkt)
+FrontEnd<Impl>::IcachePort::recvTiming(PacketPtr pkt)
 {
     fe->processCacheCompletion(pkt);
     return true;
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh
index 9b93ce74f..8307da521 100644
--- a/src/cpu/ozone/lw_lsq.hh
+++ b/src/cpu/ozone/lw_lsq.hh
@@ -222,7 +222,7 @@ class OzoneLWLSQ {
     void writeback(DynInstPtr &inst, PacketPtr pkt);
 
     /** Handles completing the send of a store to memory. */
-    void storePostSend(Packet *pkt, DynInstPtr &inst);
+    void storePostSend(PacketPtr pkt, DynInstPtr &inst);
 
     /** Completes the store at the specified index. */
     void completeStore(DynInstPtr &inst);
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index e523712da..1f3f18502 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -832,7 +832,7 @@ OzoneLWLSQ<Impl>::dumpInsts()
 
 template <class Impl>
 void
-OzoneLWLSQ<Impl>::storePostSend(Packet *pkt, DynInstPtr &inst)
+OzoneLWLSQ<Impl>::storePostSend(PacketPtr pkt, DynInstPtr &inst)
 {
     if (isStalled() &&
         inst->seqNum == stallingStoreIsn) {
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index fe421ae6c..38a8ba097 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -32,7 +32,8 @@
 #include "arch/utility.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/simple/atomic.hh"
-#include "mem/packet_impl.hh"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/builder.hh"
 #include "sim/system.hh"
 
@@ -92,21 +93,21 @@ AtomicSimpleCPU::init()
 }
 
 bool
-AtomicSimpleCPU::CpuPort::recvTiming(Packet *pkt)
+AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt)
 {
     panic("AtomicSimpleCPU doesn't expect recvTiming callback!");
     return true;
 }
 
 Tick
-AtomicSimpleCPU::CpuPort::recvAtomic(Packet *pkt)
+AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt)
 {
-    panic("AtomicSimpleCPU doesn't expect recvAtomic callback!");
+    //Snooping a coherence request, just return
     return curTick;
 }
 
 void
-AtomicSimpleCPU::CpuPort::recvFunctional(Packet *pkt)
+AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt)
 {
     //No internal storage to update, just return
     return;
@@ -259,7 +260,7 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
 {
     // use the CPU's statically allocated read request and packet objects
     Request *req = data_read_req;
-    Packet  *pkt = data_read_pkt;
+    PacketPtr pkt = data_read_pkt;
 
     req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
 
@@ -341,7 +342,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 {
     // use the CPU's statically allocated write request and packet objects
     Request *req = data_write_req;
-    Packet  *pkt = data_write_pkt;
+    PacketPtr pkt = data_write_pkt;
 
     req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
 
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index 52afd76ef..0edca9369 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -92,11 +92,11 @@ class AtomicSimpleCPU : public BaseSimpleCPU
 
       protected:
 
-        virtual bool recvTiming(Packet *pkt);
+        virtual bool recvTiming(PacketPtr pkt);
 
-        virtual Tick recvAtomic(Packet *pkt);
+        virtual Tick recvAtomic(PacketPtr pkt);
 
-        virtual void recvFunctional(Packet *pkt);
+        virtual void recvFunctional(PacketPtr pkt);
 
         virtual void recvStatusChange(Status status);
 
@@ -110,12 +110,12 @@ class AtomicSimpleCPU : public BaseSimpleCPU
     CpuPort icachePort;
     CpuPort dcachePort;
 
-    Request *ifetch_req;
-    Packet  *ifetch_pkt;
-    Request *data_read_req;
-    Packet  *data_read_pkt;
-    Request *data_write_req;
-    Packet  *data_write_pkt;
+    Request  *ifetch_req;
+    PacketPtr ifetch_pkt;
+    Request  *data_read_req;
+    PacketPtr data_read_pkt;
+    Request  *data_write_req;
+    PacketPtr data_write_pkt;
 
     bool dcache_access;
     Tick dcache_latency;
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 769e400df..cbb3980cb 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -47,7 +47,7 @@
 #include "cpu/static_inst.hh"
 #include "cpu/thread_context.hh"
 #include "kern/kernel_stats.hh"
-#include "mem/packet_impl.hh"
+#include "mem/packet.hh"
 #include "sim/builder.hh"
 #include "sim/byteswap.hh"
 #include "sim/debug.hh"
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index ad5c0e5d6..97df0e5d5 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -32,7 +32,8 @@
 #include "arch/utility.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/simple/timing.hh"
-#include "mem/packet_impl.hh"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/builder.hh"
 #include "sim/system.hh"
 
@@ -65,14 +66,14 @@ TimingSimpleCPU::init()
 }
 
 Tick
-TimingSimpleCPU::CpuPort::recvAtomic(Packet *pkt)
+TimingSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt)
 {
     panic("TimingSimpleCPU doesn't expect recvAtomic callback!");
     return curTick;
 }
 
 void
-TimingSimpleCPU::CpuPort::recvFunctional(Packet *pkt)
+TimingSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt)
 {
     //No internal storage to update, jusst return
     return;
@@ -89,7 +90,7 @@ TimingSimpleCPU::CpuPort::recvStatusChange(Status status)
 
 
 void
-TimingSimpleCPU::CpuPort::TickEvent::schedule(Packet *_pkt, Tick t)
+TimingSimpleCPU::CpuPort::TickEvent::schedule(PacketPtr _pkt, Tick t)
 {
     pkt = _pkt;
     Event::schedule(t);
@@ -268,7 +269,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
 
     // Now do the access.
     if (fault == NoFault) {
-        Packet *pkt =
+        PacketPtr pkt =
             new Packet(req, Packet::ReadReq, Packet::Broadcast);
         pkt->dataDynamic<T>(new T);
 
@@ -470,7 +471,7 @@ TimingSimpleCPU::advanceInst(Fault fault)
 
 
 void
-TimingSimpleCPU::completeIfetch(Packet *pkt)
+TimingSimpleCPU::completeIfetch(PacketPtr pkt)
 {
     // received a response from the icache: execute the received
     // instruction
@@ -526,19 +527,25 @@ TimingSimpleCPU::IcachePort::ITickEvent::process()
 }
 
 bool
-TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt)
+TimingSimpleCPU::IcachePort::recvTiming(PacketPtr pkt)
 {
-    // delay processing of returned data until next CPU clock edge
-    Tick time = pkt->req->getTime();
-    while (time < curTick)
-        time += lat;
+    if (pkt->isResponse()) {
+        // delay processing of returned data until next CPU clock edge
+        Tick time = pkt->req->getTime();
+        while (time < curTick)
+            time += lat;
 
-    if (time == curTick)
-        cpu->completeIfetch(pkt);
-    else
-        tickEvent.schedule(pkt, time);
+        if (time == curTick)
+            cpu->completeIfetch(pkt);
+        else
+            tickEvent.schedule(pkt, time);
 
-    return true;
+        return true;
+    }
+    else {
+        //Snooping a Coherence Request, do nothing
+        return true;
+    }
 }
 
 void
@@ -548,7 +555,7 @@ TimingSimpleCPU::IcachePort::recvRetry()
     // waiting to transmit
     assert(cpu->ifetch_pkt != NULL);
     assert(cpu->_status == IcacheRetry);
-    Packet *tmp = cpu->ifetch_pkt;
+    PacketPtr tmp = cpu->ifetch_pkt;
     if (sendTiming(tmp)) {
         cpu->_status = IcacheWaitResponse;
         cpu->ifetch_pkt = NULL;
@@ -556,7 +563,7 @@ TimingSimpleCPU::IcachePort::recvRetry()
 }
 
 void
-TimingSimpleCPU::completeDataAccess(Packet *pkt)
+TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
 {
     // received a response from the dcache: complete the load or store
     // instruction
@@ -598,19 +605,25 @@ TimingSimpleCPU::completeDrain()
 }
 
 bool
-TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt)
+TimingSimpleCPU::DcachePort::recvTiming(PacketPtr pkt)
 {
-    // delay processing of returned data until next CPU clock edge
-    Tick time = pkt->req->getTime();
-    while (time < curTick)
-        time += lat;
+    if (pkt->isResponse()) {
+        // delay processing of returned data until next CPU clock edge
+        Tick time = pkt->req->getTime();
+        while (time < curTick)
+            time += lat;
 
-    if (time == curTick)
-        cpu->completeDataAccess(pkt);
-    else
-        tickEvent.schedule(pkt, time);
+        if (time == curTick)
+            cpu->completeDataAccess(pkt);
+        else
+            tickEvent.schedule(pkt, time);
 
-    return true;
+        return true;
+    }
+    else {
+        //Snooping a coherence req, do nothing
+        return true;
+    }
 }
 
 void
@@ -626,7 +639,7 @@ TimingSimpleCPU::DcachePort::recvRetry()
     // waiting to transmit
     assert(cpu->dcache_pkt != NULL);
     assert(cpu->_status == DcacheRetry);
-    Packet *tmp = cpu->dcache_pkt;
+    PacketPtr tmp = cpu->dcache_pkt;
     if (sendTiming(tmp)) {
         cpu->_status = DcacheWaitResponse;
         // memory system takes ownership of packet
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 988ddeded..577e13e40 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -84,9 +84,9 @@ class TimingSimpleCPU : public BaseSimpleCPU
 
       protected:
 
-        virtual Tick recvAtomic(Packet *pkt);
+        virtual Tick recvAtomic(PacketPtr pkt);
 
-        virtual void recvFunctional(Packet *pkt);
+        virtual void recvFunctional(PacketPtr pkt);
 
         virtual void recvStatusChange(Status status);
 
@@ -96,13 +96,13 @@ class TimingSimpleCPU : public BaseSimpleCPU
 
         struct TickEvent : public Event
         {
-            Packet *pkt;
+            PacketPtr pkt;
             TimingSimpleCPU *cpu;
 
             TickEvent(TimingSimpleCPU *_cpu)
                 :Event(&mainEventQueue), cpu(_cpu) {}
             const char *description() { return "Timing CPU clock event"; }
-            void schedule(Packet *_pkt, Tick t);
+            void schedule(PacketPtr _pkt, Tick t);
         };
 
     };
@@ -117,7 +117,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
 
       protected:
 
-        virtual bool recvTiming(Packet *pkt);
+        virtual bool recvTiming(PacketPtr pkt);
 
         virtual void recvRetry();
 
@@ -144,7 +144,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
 
       protected:
 
-        virtual bool recvTiming(Packet *pkt);
+        virtual bool recvTiming(PacketPtr pkt);
 
         virtual void recvRetry();
 
@@ -163,8 +163,8 @@ class TimingSimpleCPU : public BaseSimpleCPU
     IcachePort icachePort;
     DcachePort dcachePort;
 
-    Packet *ifetch_pkt;
-    Packet *dcache_pkt;
+    PacketPtr ifetch_pkt;
+    PacketPtr dcache_pkt;
 
     int cpu_id;
     Tick previousTick;
@@ -192,8 +192,8 @@ class TimingSimpleCPU : public BaseSimpleCPU
     Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
 
     void fetch();
-    void completeIfetch(Packet *);
-    void completeDataAccess(Packet *);
+    void completeIfetch(PacketPtr );
+    void completeDataAccess(PacketPtr );
     void advanceInst(Fault fault);
   private:
     void completeDrain();
diff --git a/src/dev/alpha_console.cc b/src/dev/alpha_console.cc
index 181bbf934..40868de51 100644
--- a/src/dev/alpha_console.cc
+++ b/src/dev/alpha_console.cc
@@ -48,6 +48,8 @@
 #include "dev/platform.hh"
 #include "dev/simconsole.hh"
 #include "dev/simple_disk.hh"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "mem/physical.hh"
 #include "sim/builder.hh"
 #include "sim/sim_object.hh"
@@ -92,7 +94,7 @@ AlphaConsole::startup()
 }
 
 Tick
-AlphaConsole::read(Packet *pkt)
+AlphaConsole::read(PacketPtr pkt)
 {
 
     /** XXX Do we want to push the addr munging to a bus brige or something? So
@@ -193,7 +195,7 @@ AlphaConsole::read(Packet *pkt)
 }
 
 Tick
-AlphaConsole::write(Packet *pkt)
+AlphaConsole::write(PacketPtr pkt)
 {
     assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
diff --git a/src/dev/alpha_console.hh b/src/dev/alpha_console.hh
index b6360d40f..7d6d1e679 100644
--- a/src/dev/alpha_console.hh
+++ b/src/dev/alpha_console.hh
@@ -118,8 +118,8 @@ class AlphaConsole : public BasicPioDevice
     /**
      * memory mapped reads and writes
      */
-    virtual Tick read(Packet *pkt);
-    virtual Tick write(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
+    virtual Tick write(PacketPtr pkt);
 
     /**
      * standard serialization routines for checkpointing
diff --git a/src/dev/baddev.cc b/src/dev/baddev.cc
index e4297be19..1bab93492 100644
--- a/src/dev/baddev.cc
+++ b/src/dev/baddev.cc
@@ -53,13 +53,13 @@ BadDevice::BadDevice(Params *p)
 }
 
 Tick
-BadDevice::read(Packet *pkt)
+BadDevice::read(PacketPtr pkt)
 {
     panic("Device %s not imlpmented\n", devname);
 }
 
 Tick
-BadDevice::write(Packet *pkt)
+BadDevice::write(PacketPtr pkt)
 {
     panic("Device %s not imlpmented\n", devname);
 }
diff --git a/src/dev/baddev.hh b/src/dev/baddev.hh
index d7d778af4..040fff8e5 100644
--- a/src/dev/baddev.hh
+++ b/src/dev/baddev.hh
@@ -67,8 +67,8 @@ class BadDevice : public BasicPioDevice
       */
     BadDevice(Params *p);
 
-    virtual Tick read(Packet *pkt);
-    virtual Tick write(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
+    virtual Tick write(PacketPtr pkt);
 };
 
 #endif // __DEV_BADDEV_HH__
diff --git a/src/dev/i8254xGBe.cc b/src/dev/i8254xGBe.cc
index 77c731899..943e6881f 100644
--- a/src/dev/i8254xGBe.cc
+++ b/src/dev/i8254xGBe.cc
@@ -47,7 +47,7 @@ IGbE::IGbE(Params *p)
 
 
 Tick
-IGbE::writeConfig(Packet *pkt)
+IGbE::writeConfig(PacketPtr pkt)
 {
     int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
     if (offset < PCI_DEVICE_SPECIFIC)
@@ -63,7 +63,7 @@ IGbE::writeConfig(Packet *pkt)
 }
 
 Tick
-IGbE::read(Packet *pkt)
+IGbE::read(PacketPtr pkt)
 {
     int bar;
     Addr daddr;
@@ -88,7 +88,7 @@ IGbE::read(Packet *pkt)
 }
 
 Tick
-IGbE::write(Packet *pkt)
+IGbE::write(PacketPtr pkt)
 {
     int bar;
     Addr daddr;
diff --git a/src/dev/i8254xGBe.hh b/src/dev/i8254xGBe.hh
index 88931eb6d..161d8befc 100644
--- a/src/dev/i8254xGBe.hh
+++ b/src/dev/i8254xGBe.hh
@@ -59,10 +59,10 @@ class IGbE : public PciDev
     IGbE(Params *params);
     ~IGbE() {;}
 
-    virtual Tick read(Packet *pkt);
-    virtual Tick write(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
+    virtual Tick write(PacketPtr pkt);
 
-    virtual Tick writeConfig(Packet *pkt);
+    virtual Tick writeConfig(PacketPtr pkt);
 
     bool ethRxPkt(EthPacketPtr packet);
     void ethTxDone();
diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc
index 8007fda5e..1afad529f 100644
--- a/src/dev/ide_ctrl.cc
+++ b/src/dev/ide_ctrl.cc
@@ -43,6 +43,7 @@
 #include "dev/pcireg.h"
 #include "dev/platform.hh"
 #include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/builder.hh"
 #include "sim/sim_object.hh"
 #include "sim/byteswap.hh"
@@ -228,7 +229,7 @@ IdeController::setDmaComplete(IdeDisk *disk)
 ////
 
 Tick
-IdeController::readConfig(Packet *pkt)
+IdeController::readConfig(PacketPtr pkt)
 {
     int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
     if (offset < PCI_DEVICE_SPECIFIC)
@@ -301,7 +302,7 @@ IdeController::readConfig(Packet *pkt)
 
 
 Tick
-IdeController::writeConfig(Packet *pkt)
+IdeController::writeConfig(PacketPtr pkt)
 {
     int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
     if (offset < PCI_DEVICE_SPECIFIC) {
@@ -408,7 +409,7 @@ IdeController::writeConfig(Packet *pkt)
 
 
 Tick
-IdeController::read(Packet *pkt)
+IdeController::read(PacketPtr pkt)
 {
     Addr offset;
     IdeChannel channel;
@@ -494,7 +495,7 @@ IdeController::read(Packet *pkt)
 }
 
 Tick
-IdeController::write(Packet *pkt)
+IdeController::write(PacketPtr pkt)
 {
     Addr offset;
     IdeChannel channel;
diff --git a/src/dev/ide_ctrl.hh b/src/dev/ide_ctrl.hh
index 5842d322e..291446d1c 100644
--- a/src/dev/ide_ctrl.hh
+++ b/src/dev/ide_ctrl.hh
@@ -204,8 +204,8 @@ class IdeController : public PciDev
     IdeController(Params *p);
     ~IdeController();
 
-    virtual Tick writeConfig(Packet *pkt);
-    virtual Tick readConfig(Packet *pkt);
+    virtual Tick writeConfig(PacketPtr pkt);
+    virtual Tick readConfig(PacketPtr pkt);
 
     void setDmaComplete(IdeDisk *disk);
 
@@ -214,14 +214,14 @@ class IdeController : public PciDev
      * @param pkt Packet describing what is to be read
      * @return The amount of time to complete this request
      */
-    virtual Tick read(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
 
     /**
      * Write a done field for a given target.
      * @param pkt Packet describing what is to be written
      * @return The amount of time to complete this request
      */
-    virtual Tick write(Packet *pkt);
+    virtual Tick write(PacketPtr pkt);
 
     /**
      * Serialize this object to the given output stream.
diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc
index 408d8de3e..9671d77cc 100644
--- a/src/dev/io_device.cc
+++ b/src/dev/io_device.cc
@@ -42,7 +42,7 @@ PioPort::PioPort(PioDevice *dev, System *s, std::string pname)
 
 
 Tick
-PioPort::recvAtomic(Packet *pkt)
+PioPort::recvAtomic(PacketPtr pkt)
 {
     return pkt->isRead() ? device->read(pkt) : device->write(pkt);
 }
@@ -97,7 +97,7 @@ DmaPort::DmaPort(DmaDevice *dev, System *s)
 { }
 
 bool
-DmaPort::recvTiming(Packet *pkt)
+DmaPort::recvTiming(PacketPtr pkt)
 {
 
 
@@ -165,7 +165,7 @@ DmaPort::drain(Event *de)
 void
 DmaPort::recvRetry()
 {
-    Packet* pkt = transmitList.front();
+    PacketPtr pkt = transmitList.front();
     bool result = true;
     while (result && transmitList.size()) {
         DPRINTF(DMA, "Retry on  Packet %#x with senderState: %#x\n",
@@ -194,7 +194,7 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
     for (ChunkGenerator gen(addr, size, peerBlockSize());
          !gen.done(); gen.next()) {
             Request *req = new Request(gen.addr(), gen.size(), 0);
-            Packet *pkt = new Packet(req, cmd, Packet::Broadcast);
+            PacketPtr pkt = new Packet(req, cmd, Packet::Broadcast);
 
             // Increment the data pointer on a write
             if (data)
@@ -211,7 +211,7 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
 
 
 void
-DmaPort::sendDma(Packet *pkt, bool front)
+DmaPort::sendDma(PacketPtr pkt, bool front)
 {
     // some kind of selction between access methods
     // more work is going to have to be done to make
diff --git a/src/dev/io_device.hh b/src/dev/io_device.hh
index df4f494cb..aa242d170 100644
--- a/src/dev/io_device.hh
+++ b/src/dev/io_device.hh
@@ -33,9 +33,9 @@
 #define __DEV_IO_DEVICE_HH__
 
 #include "mem/mem_object.hh"
-#include "mem/packet_impl.hh"
-#include "sim/sim_object.hh"
+#include "mem/packet.hh"
 #include "mem/tport.hh"
+#include "sim/sim_object.hh"
 
 class Event;
 class Platform;
@@ -56,7 +56,7 @@ class PioPort : public SimpleTimingPort
     /** The device that this port serves. */
     PioDevice *device;
 
-    virtual Tick recvAtomic(Packet *pkt);
+    virtual Tick recvAtomic(PacketPtr pkt);
 
     virtual void getDeviceAddressRanges(AddrRangeList &resp,
                                         AddrRangeList &snoop);
@@ -91,7 +91,7 @@ class DmaPort : public Port
     };
 
     DmaDevice *device;
-    std::list<Packet*> transmitList;
+    std::list<PacketPtr> transmitList;
 
     /** The system that device/port are in. This is used to select which mode
      * we are currently operating in. */
@@ -107,10 +107,10 @@ class DmaPort : public Port
      * here.*/
     Event *drainEvent;
 
-    virtual bool recvTiming(Packet *pkt);
-    virtual Tick recvAtomic(Packet *pkt)
+    virtual bool recvTiming(PacketPtr pkt);
+    virtual Tick recvAtomic(PacketPtr pkt)
     { panic("dma port shouldn't be used for pio access."); }
-    virtual void recvFunctional(Packet *pkt)
+    virtual void recvFunctional(PacketPtr pkt)
     { panic("dma port shouldn't be used for pio access."); }
 
     virtual void recvStatusChange(Status status)
@@ -122,7 +122,7 @@ class DmaPort : public Port
                                         AddrRangeList &snoop)
     { resp.clear(); snoop.clear(); }
 
-    void sendDma(Packet *pkt, bool front = false);
+    void sendDma(PacketPtr pkt, bool front = false);
 
   public:
     DmaPort(DmaDevice *dev, System *s);
@@ -163,14 +163,14 @@ class PioDevice : public MemObject
      * @param pkt Packet describing this request
      * @return number of ticks it took to complete
      */
-    virtual Tick read(Packet *pkt) = 0;
+    virtual Tick read(PacketPtr pkt) = 0;
 
     /** Pure virtual function that the device must implement. Called when a
      * write command is recieved by the port.
      * @param pkt Packet describing this request
      * @return number of ticks it took to complete
      */
-    virtual Tick write(Packet *pkt) = 0;
+    virtual Tick write(PacketPtr pkt) = 0;
 
   public:
     /** Params struct which is extended through each device based on
@@ -256,7 +256,7 @@ class DmaDevice : public PioDevice
     virtual ~DmaDevice();
 
     void dmaWrite(Addr addr, int size, Event *event, uint8_t *data)
-    { dmaPort->dmaAction(Packet::WriteReq, addr, size, event, data) ; }
+    { dmaPort->dmaAction(Packet::WriteInvalidateReq, addr, size, event, data) ; }
 
     void dmaRead(Addr addr, int size, Event *event, uint8_t *data = NULL)
     { dmaPort->dmaAction(Packet::ReadReq, addr, size, event, data); }
diff --git a/src/dev/isa_fake.cc b/src/dev/isa_fake.cc
index 9622f015c..4f1771ff9 100644
--- a/src/dev/isa_fake.cc
+++ b/src/dev/isa_fake.cc
@@ -40,6 +40,7 @@
 #include "base/trace.hh"
 #include "dev/isa_fake.hh"
 #include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/builder.hh"
 #include "sim/system.hh"
 
@@ -52,7 +53,7 @@ IsaFake::IsaFake(Params *p)
 }
 
 Tick
-IsaFake::read(Packet *pkt)
+IsaFake::read(PacketPtr pkt)
 {
     assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
@@ -79,7 +80,7 @@ IsaFake::read(Packet *pkt)
 }
 
 Tick
-IsaFake::write(Packet *pkt)
+IsaFake::write(PacketPtr pkt)
 {
     DPRINTF(Tsunami, "write - va=%#x size=%d \n", pkt->getAddr(), pkt->getSize());
     pkt->result = Packet::Success;
diff --git a/src/dev/isa_fake.hh b/src/dev/isa_fake.hh
index 5166882f8..366061c25 100644
--- a/src/dev/isa_fake.hh
+++ b/src/dev/isa_fake.hh
@@ -36,9 +36,10 @@
 #ifndef __ISA_FAKE_HH__
 #define __ISA_FAKE_HH__
 
-#include "dev/tsunami.hh"
 #include "base/range.hh"
 #include "dev/io_device.hh"
+#include "dev/tsunami.hh"
+#include "mem/packet.hh"
 
 /**
  * IsaFake is a device that returns -1 on all reads and
@@ -68,14 +69,14 @@ class IsaFake : public BasicPioDevice
      * @param pkt The memory request.
      * @param data Where to put the data.
      */
-    virtual Tick read(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
 
     /**
      * All writes are simply ignored.
      * @param pkt The memory request.
      * @param data the data to not write.
      */
-    virtual Tick write(Packet *pkt);
+    virtual Tick write(PacketPtr pkt);
 };
 
 #endif // __TSUNAMI_FAKE_HH__
diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc
index 704afcf7d..19c553d87 100644
--- a/src/dev/ns_gige.cc
+++ b/src/dev/ns_gige.cc
@@ -43,6 +43,7 @@
 #include "dev/ns_gige.hh"
 #include "dev/pciconfigall.hh"
 #include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/builder.hh"
 #include "sim/debug.hh"
 #include "sim/host.hh"
@@ -466,7 +467,7 @@ NSGigE::regStats()
  * This is to write to the PCI general configuration registers
  */
 Tick
-NSGigE::writeConfig(Packet *pkt)
+NSGigE::writeConfig(PacketPtr pkt)
 {
     int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
     if (offset < PCI_DEVICE_SPECIFIC)
@@ -494,7 +495,7 @@ NSGigE::writeConfig(Packet *pkt)
  * spec sheet
  */
 Tick
-NSGigE::read(Packet *pkt)
+NSGigE::read(PacketPtr pkt)
 {
     assert(ioEnable);
 
@@ -718,7 +719,7 @@ NSGigE::read(Packet *pkt)
 }
 
 Tick
-NSGigE::write(Packet *pkt)
+NSGigE::write(PacketPtr pkt)
 {
     assert(ioEnable);
 
diff --git a/src/dev/ns_gige.hh b/src/dev/ns_gige.hh
index 080c0b1f3..a40fe3b29 100644
--- a/src/dev/ns_gige.hh
+++ b/src/dev/ns_gige.hh
@@ -375,10 +375,10 @@ class NSGigE : public PciDev
     ~NSGigE();
     const Params *params() const { return (const Params *)_params; }
 
-    virtual Tick writeConfig(Packet *pkt);
+    virtual Tick writeConfig(PacketPtr pkt);
 
-    virtual Tick read(Packet *pkt);
-    virtual Tick write(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
+    virtual Tick write(PacketPtr pkt);
 
     bool cpuIntrPending() const;
     void cpuIntrAck() { cpuIntrClear(); }
diff --git a/src/dev/pciconfigall.cc b/src/dev/pciconfigall.cc
index 68013eab8..39c8f0fa0 100644
--- a/src/dev/pciconfigall.cc
+++ b/src/dev/pciconfigall.cc
@@ -38,6 +38,7 @@
 #include "dev/pcireg.h"
 #include "dev/platform.hh"
 #include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/builder.hh"
 #include "sim/system.hh"
 
@@ -51,7 +52,7 @@ PciConfigAll::PciConfigAll(Params *p)
 
 
 Tick
-PciConfigAll::read(Packet *pkt)
+PciConfigAll::read(PacketPtr pkt)
 {
     assert(pkt->result == Packet::Unknown);
 
@@ -78,7 +79,7 @@ PciConfigAll::read(Packet *pkt)
 }
 
 Tick
-PciConfigAll::write(Packet *pkt)
+PciConfigAll::write(PacketPtr pkt)
 {
     assert(pkt->result == Packet::Unknown);
     panic("Attempting to write to config space on non-existant device\n");
diff --git a/src/dev/pciconfigall.hh b/src/dev/pciconfigall.hh
index 07eaf8112..720a2f005 100644
--- a/src/dev/pciconfigall.hh
+++ b/src/dev/pciconfigall.hh
@@ -73,7 +73,7 @@ class PciConfigAll : public PioDevice
      * @param pkt Contains information about the read operation
      * @return Amount of time to do the read
      */
-    virtual Tick read(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
 
     /**
      * Write to PCI config spcae. If the device does not exit the simulator
@@ -83,7 +83,7 @@ class PciConfigAll : public PioDevice
      * @return Amount of time to do the read
      */
 
-    virtual Tick write(Packet *pkt);
+    virtual Tick write(PacketPtr pkt);
 
     void addressRanges(AddrRangeList &range_list);
 
diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc
index b16ddb31a..8c0d03817 100644
--- a/src/dev/pcidev.cc
+++ b/src/dev/pcidev.cc
@@ -47,6 +47,7 @@
 #include "dev/pcidev.hh"
 #include "dev/tsunamireg.h"
 #include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/builder.hh"
 #include "sim/byteswap.hh"
 #include "sim/param.hh"
@@ -65,7 +66,7 @@ PciDev::PciConfigPort::PciConfigPort(PciDev *dev, int busid, int devid,
 
 
 Tick
-PciDev::PciConfigPort::recvAtomic(Packet *pkt)
+PciDev::PciConfigPort::recvAtomic(PacketPtr pkt)
 {
     assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= configAddr &&
@@ -122,7 +123,7 @@ PciDev::drain(Event *de)
 }
 
 Tick
-PciDev::readConfig(Packet *pkt)
+PciDev::readConfig(PacketPtr pkt)
 {
     int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
     if (offset >= PCI_DEVICE_SPECIFIC)
@@ -171,7 +172,7 @@ PciDev::addressRanges(AddrRangeList &range_list)
 }
 
 Tick
-PciDev::writeConfig(Packet *pkt)
+PciDev::writeConfig(PacketPtr pkt)
 {
     int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
     if (offset >= PCI_DEVICE_SPECIFIC)
diff --git a/src/dev/pcidev.hh b/src/dev/pcidev.hh
index 22dd6296e..fbfdbb65c 100644
--- a/src/dev/pcidev.hh
+++ b/src/dev/pcidev.hh
@@ -40,6 +40,7 @@
 #include "dev/io_device.hh"
 #include "dev/pcireg.h"
 #include "dev/platform.hh"
+#include "sim/byteswap.hh"
 
 #define BAR_IO_MASK 0x3
 #define BAR_MEM_MASK 0xF
@@ -83,7 +84,7 @@ class PciDev : public DmaDevice
       protected:
         PciDev *device;
 
-        virtual Tick recvAtomic(Packet *pkt);
+        virtual Tick recvAtomic(PacketPtr pkt);
 
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
                                             AddrRangeList &snoop);
@@ -201,7 +202,7 @@ class PciDev : public DmaDevice
      * for normal operations that it does not need to override.
      * @param pkt packet containing the write the offset into config space
      */
-    virtual Tick writeConfig(Packet *pkt);
+    virtual Tick writeConfig(PacketPtr pkt);
 
 
     /**
@@ -210,7 +211,7 @@ class PciDev : public DmaDevice
      * for normal operations that it does not need to override.
      * @param pkt packet containing the write the offset into config space
      */
-    virtual Tick readConfig(Packet *pkt);
+    virtual Tick readConfig(PacketPtr pkt);
 
   public:
     Addr pciToDma(Addr pciAddr) const
diff --git a/src/dev/sinic.cc b/src/dev/sinic.cc
index 61fb3aa24..f61c161aa 100644
--- a/src/dev/sinic.cc
+++ b/src/dev/sinic.cc
@@ -38,6 +38,7 @@
 #include "dev/etherlink.hh"
 #include "dev/sinic.hh"
 #include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/builder.hh"
 #include "sim/debug.hh"
 #include "sim/eventq.hh"
@@ -313,7 +314,7 @@ Device::prepareWrite(int cpu, int index)
  * I/O read of device register
  */
 Tick
-Device::read(Packet *pkt)
+Device::read(PacketPtr pkt)
 {
     assert(config.command & PCI_CMD_MSE);
     assert(pkt->getAddr() >= BARAddrs[0] && pkt->getSize() < BARSize[0]);
@@ -400,7 +401,7 @@ Device::iprRead(Addr daddr, int cpu, uint64_t &result)
  * I/O write of device register
  */
 Tick
-Device::write(Packet *pkt)
+Device::write(PacketPtr pkt)
 {
     assert(config.command & PCI_CMD_MSE);
     assert(pkt->getAddr() >= BARAddrs[0] && pkt->getSize() < BARSize[0]);
diff --git a/src/dev/sinic.hh b/src/dev/sinic.hh
index eece4ba6b..3904c2a8a 100644
--- a/src/dev/sinic.hh
+++ b/src/dev/sinic.hh
@@ -264,8 +264,8 @@ class Device : public Base
  * Memory Interface
  */
   public:
-    virtual Tick read(Packet *pkt);
-    virtual Tick write(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
+    virtual Tick write(PacketPtr pkt);
     virtual void resume();
 
     void prepareIO(int cpu, int index);
diff --git a/src/dev/tsunami_cchip.cc b/src/dev/tsunami_cchip.cc
index 3feb7439f..74a68566c 100644
--- a/src/dev/tsunami_cchip.cc
+++ b/src/dev/tsunami_cchip.cc
@@ -39,12 +39,14 @@
 
 #include "arch/alpha/ev5.hh"
 #include "base/trace.hh"
+#include "cpu/intr_control.hh"
+#include "cpu/thread_context.hh"
+#include "dev/tsunami.hh"
 #include "dev/tsunami_cchip.hh"
 #include "dev/tsunamireg.h"
-#include "dev/tsunami.hh"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "mem/port.hh"
-#include "cpu/thread_context.hh"
-#include "cpu/intr_control.hh"
 #include "sim/builder.hh"
 #include "sim/system.hh"
 
@@ -72,7 +74,7 @@ TsunamiCChip::TsunamiCChip(Params *p)
 }
 
 Tick
-TsunamiCChip::read(Packet *pkt)
+TsunamiCChip::read(PacketPtr pkt)
 {
     DPRINTF(Tsunami, "read  va=%#x size=%d\n", pkt->getAddr(), pkt->getSize());
 
@@ -182,7 +184,7 @@ TsunamiCChip::read(Packet *pkt)
 }
 
 Tick
-TsunamiCChip::write(Packet *pkt)
+TsunamiCChip::write(PacketPtr pkt)
 {
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     Addr daddr = pkt->getAddr() - pioAddr;
diff --git a/src/dev/tsunami_cchip.hh b/src/dev/tsunami_cchip.hh
index ef43f621f..297a94129 100644
--- a/src/dev/tsunami_cchip.hh
+++ b/src/dev/tsunami_cchip.hh
@@ -94,9 +94,9 @@ class TsunamiCChip : public BasicPioDevice
      */
     TsunamiCChip(Params *p);
 
-    virtual Tick read(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
 
-    virtual Tick write(Packet *pkt);
+    virtual Tick write(PacketPtr pkt);
 
     /**
      * post an RTC interrupt to the CPU
diff --git a/src/dev/tsunami_io.cc b/src/dev/tsunami_io.cc
index e3da10eb5..73af6c2ef 100644
--- a/src/dev/tsunami_io.cc
+++ b/src/dev/tsunami_io.cc
@@ -47,6 +47,8 @@
 #include "dev/tsunami.hh"
 #include "dev/tsunami_io.hh"
 #include "dev/tsunamireg.h"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "mem/port.hh"
 #include "sim/builder.hh"
 #include "sim/system.hh"
@@ -446,7 +448,7 @@ TsunamiIO::frequency() const
 }
 
 Tick
-TsunamiIO::read(Packet *pkt)
+TsunamiIO::read(PacketPtr pkt)
 {
     assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
@@ -512,7 +514,7 @@ TsunamiIO::read(Packet *pkt)
 }
 
 Tick
-TsunamiIO::write(Packet *pkt)
+TsunamiIO::write(PacketPtr pkt)
 {
     assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
diff --git a/src/dev/tsunami_io.hh b/src/dev/tsunami_io.hh
index 9084a1be8..5ea3628c1 100644
--- a/src/dev/tsunami_io.hh
+++ b/src/dev/tsunami_io.hh
@@ -328,8 +328,8 @@ class TsunamiIO : public BasicPioDevice
      */
     TsunamiIO(Params *p);
 
-    virtual Tick read(Packet *pkt);
-    virtual Tick write(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
+    virtual Tick write(PacketPtr pkt);
 
     /**
      * Post an PIC interrupt to the CPU via the CChip
diff --git a/src/dev/tsunami_pchip.cc b/src/dev/tsunami_pchip.cc
index 8a542b9b0..549db1a50 100644
--- a/src/dev/tsunami_pchip.cc
+++ b/src/dev/tsunami_pchip.cc
@@ -42,6 +42,7 @@
 #include "dev/tsunamireg.h"
 #include "dev/tsunami.hh"
 #include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/builder.hh"
 #include "sim/system.hh"
 
@@ -68,7 +69,7 @@ TsunamiPChip::TsunamiPChip(Params *p)
 }
 
 Tick
-TsunamiPChip::read(Packet *pkt)
+TsunamiPChip::read(PacketPtr pkt)
 {
     assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
@@ -150,7 +151,7 @@ TsunamiPChip::read(Packet *pkt)
 }
 
 Tick
-TsunamiPChip::write(Packet *pkt)
+TsunamiPChip::write(PacketPtr pkt)
 {
     assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
diff --git a/src/dev/tsunami_pchip.hh b/src/dev/tsunami_pchip.hh
index 2c97a1fea..d0a9c3157 100644
--- a/src/dev/tsunami_pchip.hh
+++ b/src/dev/tsunami_pchip.hh
@@ -85,8 +85,8 @@ class TsunamiPChip : public BasicPioDevice
 
     Addr calcConfigAddr(int bus, int dev, int func);
 
-    virtual Tick read(Packet *pkt);
-    virtual Tick write(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
+    virtual Tick write(PacketPtr pkt);
 
     /**
      * Serialize this object to the given output stream.
diff --git a/src/dev/uart8250.cc b/src/dev/uart8250.cc
index b92527b5a..9051a26a2 100644
--- a/src/dev/uart8250.cc
+++ b/src/dev/uart8250.cc
@@ -42,6 +42,8 @@
 #include "dev/simconsole.hh"
 #include "dev/uart8250.hh"
 #include "dev/platform.hh"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
 #include "sim/builder.hh"
 
 using namespace std;
@@ -110,7 +112,7 @@ Uart8250::Uart8250(Params *p)
 }
 
 Tick
-Uart8250::read(Packet *pkt)
+Uart8250::read(PacketPtr pkt)
 {
     assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
@@ -192,7 +194,7 @@ Uart8250::read(Packet *pkt)
 }
 
 Tick
-Uart8250::write(Packet *pkt)
+Uart8250::write(PacketPtr pkt)
 {
 
     assert(pkt->result == Packet::Unknown);
diff --git a/src/dev/uart8250.hh b/src/dev/uart8250.hh
index 44e90132f..2e768216a 100644
--- a/src/dev/uart8250.hh
+++ b/src/dev/uart8250.hh
@@ -82,8 +82,8 @@ class Uart8250 : public Uart
   public:
     Uart8250(Params *p);
 
-    virtual Tick read(Packet *pkt);
-    virtual Tick write(Packet *pkt);
+    virtual Tick read(PacketPtr pkt);
+    virtual Tick write(PacketPtr pkt);
     virtual void addressRanges(AddrRangeList &range_list);
 
 
diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index 9c14e7ee2..38dcfd2e8 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -89,7 +89,7 @@ Bridge::init()
 /** Function called by the port when the bus is receiving a Timing
  * transaction.*/
 bool
-Bridge::BridgePort::recvTiming(Packet *pkt)
+Bridge::BridgePort::recvTiming(PacketPtr pkt)
 {
     DPRINTF(BusBridge, "recvTiming: src %d dest %d addr 0x%x\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr());
@@ -99,7 +99,7 @@ Bridge::BridgePort::recvTiming(Packet *pkt)
 
 
 bool
-Bridge::BridgePort::queueForSendTiming(Packet *pkt)
+Bridge::BridgePort::queueForSendTiming(PacketPtr pkt)
 {
     if (queueFull())
         return false;
@@ -148,11 +148,12 @@ Bridge::BridgePort::trySend()
 
     assert(buf->ready <= curTick);
 
-    Packet *pkt = buf->pkt;
+    PacketPtr pkt = buf->pkt;
 
     DPRINTF(BusBridge, "trySend: origSrc %d dest %d addr 0x%x\n",
             buf->origSrc, pkt->getDest(), pkt->getAddr());
 
+    pkt->flags &= ~SNOOP_COMMIT; //CLear it if it was set
     if (sendTiming(pkt)) {
         // send successful
         sendQueue.pop_front();
@@ -197,7 +198,7 @@ Bridge::BridgePort::recvRetry()
 /** Function called by the port when the bus is receiving a Atomic
  * transaction.*/
 Tick
-Bridge::BridgePort::recvAtomic(Packet *pkt)
+Bridge::BridgePort::recvAtomic(PacketPtr pkt)
 {
     return otherPort->sendAtomic(pkt) + delay;
 }
@@ -205,7 +206,7 @@ Bridge::BridgePort::recvAtomic(Packet *pkt)
 /** Function called by the port when the bus is receiving a Functional
  * transaction.*/
 void
-Bridge::BridgePort::recvFunctional(Packet *pkt)
+Bridge::BridgePort::recvFunctional(PacketPtr pkt)
 {
     std::list<PacketBuffer*>::iterator i;
     bool pktContinue = true;
diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh
index 2ab9799c7..f7d0d12d0 100644
--- a/src/mem/bridge.hh
+++ b/src/mem/bridge.hh
@@ -70,12 +70,12 @@ class Bridge : public MemObject
 
           public:
             Tick ready;
-            Packet *pkt;
+            PacketPtr pkt;
             Packet::SenderState *origSenderState;
             short origSrc;
             bool expectResponse;
 
-            PacketBuffer(Packet *_pkt, Tick t)
+            PacketBuffer(PacketPtr _pkt, Tick t)
                 : ready(t), pkt(_pkt),
                   origSenderState(_pkt->senderState), origSrc(_pkt->getSrc()),
                   expectResponse(_pkt->needsResponse())
@@ -84,7 +84,7 @@ class Bridge : public MemObject
                     pkt->senderState = this;
             }
 
-            void fixResponse(Packet *pkt)
+            void fixResponse(PacketPtr pkt)
             {
                 assert(pkt->senderState == this);
                 pkt->setDest(origSrc);
@@ -109,7 +109,7 @@ class Bridge : public MemObject
          */
         bool queueFull() { return (sendQueue.size() == queueLimit); }
 
-        bool queueForSendTiming(Packet *pkt);
+        bool queueForSendTiming(PacketPtr pkt);
 
         void finishSend(PacketBuffer *buf);
 
@@ -146,7 +146,7 @@ class Bridge : public MemObject
 
         /** When receiving a timing request from the peer port,
             pass it to the bridge. */
-        virtual bool recvTiming(Packet *pkt);
+        virtual bool recvTiming(PacketPtr pkt);
 
         /** When receiving a retry request from the peer port,
             pass it to the bridge. */
@@ -154,11 +154,11 @@ class Bridge : public MemObject
 
         /** When receiving a Atomic requestfrom the peer port,
             pass it to the bridge. */
-        virtual Tick recvAtomic(Packet *pkt);
+        virtual Tick recvAtomic(PacketPtr pkt);
 
         /** When receiving a Functional request from the peer port,
             pass it to the bridge. */
-        virtual void recvFunctional(Packet *pkt);
+        virtual void recvFunctional(PacketPtr pkt);
 
         /** When receiving a status changefrom the peer port,
             pass it to the bridge. */
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index b11b6de58..86a148f87 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -138,7 +138,7 @@ void Bus::occupyBus(PacketPtr pkt)
 /** Function called by the port when the bus is receiving a Timing
  * transaction.*/
 bool
-Bus::recvTiming(Packet *pkt)
+Bus::recvTiming(PacketPtr pkt)
 {
     Port *port;
     DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n",
@@ -160,9 +160,12 @@ Bus::recvTiming(Packet *pkt)
     short dest = pkt->getDest();
     if (dest == Packet::Broadcast) {
         if (timingSnoop(pkt)) {
+            bool success;
+
             pkt->flags |= SNOOP_COMMIT;
-            bool success = timingSnoop(pkt);
+            success = timingSnoop(pkt);
             assert(success);
+
             if (pkt->flags & SATISFIED) {
                 //Cache-Cache transfer occuring
                 if (inRetry) {
@@ -298,7 +301,7 @@ Bus::findSnoopPorts(Addr addr, int id)
 }
 
 Tick
-Bus::atomicSnoop(Packet *pkt)
+Bus::atomicSnoop(PacketPtr pkt)
 {
     std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc());
     Tick response_time = 0;
@@ -316,7 +319,7 @@ Bus::atomicSnoop(Packet *pkt)
 }
 
 void
-Bus::functionalSnoop(Packet *pkt)
+Bus::functionalSnoop(PacketPtr pkt)
 {
     std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc());
 
@@ -328,7 +331,7 @@ Bus::functionalSnoop(Packet *pkt)
 }
 
 bool
-Bus::timingSnoop(Packet *pkt)
+Bus::timingSnoop(PacketPtr pkt)
 {
     std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc());
     bool success = true;
@@ -346,7 +349,7 @@ Bus::timingSnoop(Packet *pkt)
 /** Function called by the port when the bus is receiving a Atomic
  * transaction.*/
 Tick
-Bus::recvAtomic(Packet *pkt)
+Bus::recvAtomic(PacketPtr pkt)
 {
     DPRINTF(Bus, "recvAtomic: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
@@ -361,7 +364,7 @@ Bus::recvAtomic(Packet *pkt)
 /** Function called by the port when the bus is receiving a Functional
  * transaction.*/
 void
-Bus::recvFunctional(Packet *pkt)
+Bus::recvFunctional(PacketPtr pkt)
 {
     DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index 509b8cf9b..9fb33b7c3 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -71,15 +71,15 @@ class Bus : public MemObject
 
     /** Function called by the port when the bus is recieving a Timing
       transaction.*/
-    bool recvTiming(Packet *pkt);
+    bool recvTiming(PacketPtr pkt);
 
     /** Function called by the port when the bus is recieving a Atomic
       transaction.*/
-    Tick recvAtomic(Packet *pkt);
+    Tick recvAtomic(PacketPtr pkt);
 
     /** Function called by the port when the bus is recieving a Functional
         transaction.*/
-    void recvFunctional(Packet *pkt);
+    void recvFunctional(PacketPtr pkt);
 
     /** Timing function called by port when it is once again able to process
      * requests. */
@@ -107,16 +107,16 @@ class Bus : public MemObject
     std::vector<int> findSnoopPorts(Addr addr, int id);
 
     /** Snoop all relevant ports atomicly. */
-    Tick atomicSnoop(Packet *pkt);
+    Tick atomicSnoop(PacketPtr pkt);
 
     /** Snoop all relevant ports functionally. */
-    void functionalSnoop(Packet *pkt);
+    void functionalSnoop(PacketPtr pkt);
 
     /** Call snoop on caches, be sure to set SNOOP_COMMIT bit if you want
      * the snoop to happen
      * @return True if succeds.
      */
-    bool timingSnoop(Packet *pkt);
+    bool timingSnoop(PacketPtr pkt);
 
     /** Process address range request.
      * @param resp addresses that we can respond to
@@ -157,17 +157,17 @@ class Bus : public MemObject
 
         /** When reciving a timing request from the peer port (at id),
             pass it to the bus. */
-        virtual bool recvTiming(Packet *pkt)
+        virtual bool recvTiming(PacketPtr pkt)
         { pkt->setSrc(id); return bus->recvTiming(pkt); }
 
         /** When reciving a Atomic requestfrom the peer port (at id),
             pass it to the bus. */
-        virtual Tick recvAtomic(Packet *pkt)
+        virtual Tick recvAtomic(PacketPtr pkt)
         { pkt->setSrc(id); return bus->recvAtomic(pkt); }
 
         /** When reciving a Functional requestfrom the peer port (at id),
             pass it to the bus. */
-        virtual void recvFunctional(Packet *pkt)
+        virtual void recvFunctional(PacketPtr pkt)
         { pkt->setSrc(id); bus->recvFunctional(pkt); }
 
         /** When reciving a status changefrom the peer port (at id),
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 3f7a52fab..0694aae6e 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -33,9 +33,10 @@
  * Definition of BaseCache functions.
  */
 
-#include "mem/cache/base_cache.hh"
-#include "cpu/smt.hh"
 #include "cpu/base.hh"
+#include "cpu/smt.hh"
+#include "mem/cache/base_cache.hh"
+#include "mem/cache/miss/mshr.hh"
 
 using namespace std;
 
@@ -44,7 +45,6 @@ BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache,
     : Port(_name), cache(_cache), isCpuSide(_isCpuSide)
 {
     blocked = false;
-    cshrRetry = NULL;
     waitingOnRetry = false;
     //Start ports at null if more than one is created we should panic
     //cpuSidePort = NULL;
@@ -71,7 +71,7 @@ BaseCache::CachePort::deviceBlockSize()
 }
 
 bool
-BaseCache::CachePort::recvTiming(Packet *pkt)
+BaseCache::CachePort::recvTiming(PacketPtr pkt)
 {
     if (isCpuSide
         && !pkt->req->isUncacheable()
@@ -99,48 +99,23 @@ BaseCache::CachePort::recvTiming(Packet *pkt)
 }
 
 Tick
-BaseCache::CachePort::recvAtomic(Packet *pkt)
+BaseCache::CachePort::recvAtomic(PacketPtr pkt)
 {
     return cache->doAtomicAccess(pkt, isCpuSide);
 }
 
 void
-BaseCache::CachePort::recvFunctional(Packet *pkt)
+BaseCache::CachePort::recvFunctional(PacketPtr pkt)
 {
     //Check storage here first
-    list<Packet *>::iterator i = drainList.begin();
-    list<Packet *>::iterator end = drainList.end();
+    list<PacketPtr>::iterator i = drainList.begin();
+    list<PacketPtr>::iterator end = drainList.end();
     for (; i != end; ++i) {
-        Packet * target = *i;
+        PacketPtr target = *i;
         // If the target contains data, and it overlaps the
         // probed request, need to update data
         if (target->intersect(pkt)) {
-            uint8_t* pkt_data;
-            uint8_t* write_data;
-            int data_size;
-            if (target->getAddr() < pkt->getAddr()) {
-                int offset = pkt->getAddr() - target->getAddr();
-                            pkt_data = pkt->getPtr<uint8_t>();
-                            write_data = target->getPtr<uint8_t>() + offset;
-                            data_size = target->getSize() - offset;
-                            assert(data_size > 0);
-                            if (data_size > pkt->getSize())
-                                data_size = pkt->getSize();
-            } else {
-                int offset = target->getAddr() - pkt->getAddr();
-                pkt_data = pkt->getPtr<uint8_t>() + offset;
-                write_data = target->getPtr<uint8_t>();
-                data_size = pkt->getSize() - offset;
-                assert(data_size > pkt->getSize());
-                if (data_size > target->getSize())
-                    data_size = target->getSize();
-            }
-
-            if (pkt->isWrite()) {
-                memcpy(pkt_data, write_data, data_size);
-            } else {
-                memcpy(write_data, pkt_data, data_size);
-            }
+            fixPacket(pkt, target);
         }
     }
     cache->doFunctionalAccess(pkt, isCpuSide);
@@ -149,7 +124,7 @@ BaseCache::CachePort::recvFunctional(Packet *pkt)
 void
 BaseCache::CachePort::recvRetry()
 {
-    Packet *pkt;
+    PacketPtr pkt;
     assert(waitingOnRetry);
     if (!drainList.empty()) {
         DPRINTF(CachePort, "%s attempting to send a retry for response\n", name());
@@ -179,12 +154,23 @@ BaseCache::CachePort::recvRetry()
             return;
         }
         pkt = cache->getPacket();
-        MSHR* mshr = (MSHR*)pkt->senderState;
+        MSHR* mshr = (MSHR*) pkt->senderState;
+        //Copy the packet, it may be modified/destroyed elsewhere
+        PacketPtr copyPkt = new Packet(*pkt);
+        copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
+        mshr->pkt = copyPkt;
+
         bool success = sendTiming(pkt);
         DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
                 pkt->getAddr(), success ? "succesful" : "unsuccesful");
-        cache->sendResult(pkt, mshr, success);
+
         waitingOnRetry = !success;
+        if (waitingOnRetry) {
+            DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+        }
+
+        cache->sendResult(pkt, mshr, success);
+
         if (success && cache->doMasterRequest())
         {
             DPRINTF(CachePort, "%s has more requests\n", name());
@@ -195,20 +181,20 @@ BaseCache::CachePort::recvRetry()
     }
     else
     {
-        assert(cshrRetry);
+        assert(cache->doSlaveRequest());
         //pkt = cache->getCoherencePacket();
         //We save the packet, no reordering on CSHRS
-        pkt = cshrRetry;
+        pkt = cache->getCoherencePacket();
+        MSHR* cshr = (MSHR*)pkt->senderState;
         bool success = sendTiming(pkt);
+        cache->sendCoherenceResult(pkt, cshr, success);
         waitingOnRetry = !success;
-        if (success)
+        if (success && cache->doSlaveRequest())
         {
-            if (cache->doSlaveRequest()) {
-                //Still more to issue, rerequest in 1 cycle
-                BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this);
-                reqCpu->schedule(curTick + 1);
-            }
-            cshrRetry = NULL;
+            DPRINTF(CachePort, "%s has more requests\n", name());
+            //Still more to issue, rerequest in 1 cycle
+            BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this);
+            reqCpu->schedule(curTick + 1);
         }
     }
     if (waitingOnRetry) DPRINTF(CachePort, "%s STILL Waiting on retry\n", name());
@@ -246,7 +232,7 @@ BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort)
     pkt = NULL;
 }
 
-BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort, Packet *_pkt)
+BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort, PacketPtr _pkt)
     : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort), pkt(_pkt)
 {
     this->setFlags(AutoDelete);
@@ -289,15 +275,25 @@ BaseCache::CacheEvent::process()
 
             pkt = cachePort->cache->getPacket();
             MSHR* mshr = (MSHR*) pkt->senderState;
+            //Copy the packet, it may be modified/destroyed elsewhere
+            PacketPtr copyPkt = new Packet(*pkt);
+            copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
+            mshr->pkt = copyPkt;
+
             bool success = cachePort->sendTiming(pkt);
             DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
                     pkt->getAddr(), success ? "succesful" : "unsuccesful");
-            cachePort->cache->sendResult(pkt, mshr, success);
+
             cachePort->waitingOnRetry = !success;
-            if (cachePort->waitingOnRetry) DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name());
+            if (cachePort->waitingOnRetry) {
+                DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name());
+            }
+
+            cachePort->cache->sendResult(pkt, mshr, success);
             if (success && cachePort->cache->doMasterRequest())
             {
-                DPRINTF(CachePort, "%s still more MSHR requests to send\n", cachePort->name());
+                DPRINTF(CachePort, "%s still more MSHR requests to send\n",
+                        cachePort->name());
                 //Still more to issue, rerequest in 1 cycle
                 pkt = NULL;
                 this->schedule(curTick+1);
@@ -306,27 +302,21 @@ BaseCache::CacheEvent::process()
         else
         {
             //CSHR
-            if (!cachePort->cshrRetry) {
-                assert(cachePort->cache->doSlaveRequest());
-                pkt = cachePort->cache->getCoherencePacket();
-            }
-            else {
-                pkt = cachePort->cshrRetry;
-            }
+            assert(cachePort->cache->doSlaveRequest());
+            pkt = cachePort->cache->getCoherencePacket();
+            MSHR* cshr = (MSHR*) pkt->senderState;
             bool success = cachePort->sendTiming(pkt);
-            if (!success) {
-                //Need to send on a retry
-                cachePort->cshrRetry = pkt;
-                cachePort->waitingOnRetry = true;
-            }
-            else
+            cachePort->cache->sendCoherenceResult(pkt, cshr, success);
+            cachePort->waitingOnRetry = !success;
+            if (cachePort->waitingOnRetry)
+                DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name());
+            if (success && cachePort->cache->doSlaveRequest())
             {
-                cachePort->cshrRetry = NULL;
-                if (cachePort->cache->doSlaveRequest()) {
-                    //Still more to issue, rerequest in 1 cycle
-                    pkt = NULL;
-                    this->schedule(curTick+1);
-                }
+                DPRINTF(CachePort, "%s still more CSHR requests to send\n",
+                        cachePort->name());
+                //Still more to issue, rerequest in 1 cycle
+                pkt = NULL;
+                this->schedule(curTick+1);
             }
         }
         return;
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 455e13d9c..565280aef 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -58,7 +58,6 @@ enum BlockedCause{
     Blocked_NoTargets,
     Blocked_NoWBBuffers,
     Blocked_Coherence,
-    Blocked_Copy,
     NUM_BLOCKED_CAUSES
 };
 
@@ -86,11 +85,11 @@ class BaseCache : public MemObject
         CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide);
 
       protected:
-        virtual bool recvTiming(Packet *pkt);
+        virtual bool recvTiming(PacketPtr pkt);
 
-        virtual Tick recvAtomic(Packet *pkt);
+        virtual Tick recvAtomic(PacketPtr pkt);
 
-        virtual void recvFunctional(Packet *pkt);
+        virtual void recvFunctional(PacketPtr pkt);
 
         virtual void recvStatusChange(Status status);
 
@@ -114,24 +113,25 @@ class BaseCache : public MemObject
 
         bool waitingOnRetry;
 
-        std::list<Packet *> drainList;
+        std::list<PacketPtr> drainList;
 
-        Packet *cshrRetry;
     };
 
     struct CacheEvent : public Event
     {
         CachePort *cachePort;
-        Packet *pkt;
+        PacketPtr pkt;
 
         CacheEvent(CachePort *_cachePort);
-        CacheEvent(CachePort *_cachePort, Packet *_pkt);
+        CacheEvent(CachePort *_cachePort, PacketPtr _pkt);
         void process();
         const char *description();
     };
 
-  protected:
+  public: //Made public so coherence can get at it.
     CachePort *cpuSidePort;
+
+  protected:
     CachePort *memSidePort;
 
     bool snoopRangesSent;
@@ -141,17 +141,17 @@ class BaseCache : public MemObject
 
   private:
     //To be defined in cache_impl.hh not in base class
-    virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
+    virtual bool doTimingAccess(PacketPtr pkt, CachePort *cachePort, bool isCpuSide)
     {
         fatal("No implementation");
     }
 
-    virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide)
+    virtual Tick doAtomicAccess(PacketPtr pkt, bool isCpuSide)
     {
         fatal("No implementation");
     }
 
-    virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide)
+    virtual void doFunctionalAccess(PacketPtr pkt, bool isCpuSide)
     {
         fatal("No implementation");
     }
@@ -172,17 +172,23 @@ class BaseCache : public MemObject
         }
     }
 
-    virtual Packet *getPacket()
+    virtual PacketPtr getPacket()
     {
         fatal("No implementation");
     }
 
-    virtual Packet *getCoherencePacket()
+    virtual PacketPtr getCoherencePacket()
     {
         fatal("No implementation");
     }
 
-    virtual void sendResult(Packet* &pkt, MSHR* mshr, bool success)
+    virtual void sendResult(PacketPtr &pkt, MSHR* mshr, bool success)
+    {
+
+        fatal("No implementation");
+    }
+
+    virtual void sendCoherenceResult(PacketPtr &pkt, MSHR* mshr, bool success)
     {
 
         fatal("No implementation");
@@ -489,10 +495,13 @@ class BaseCache : public MemObject
      */
     void setSlaveRequest(RequestCause cause, Tick time)
     {
+        if (!doSlaveRequest() && !cpuSidePort->waitingOnRetry)
+        {
+            BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(cpuSidePort);
+            reqCpu->schedule(time);
+        }
         uint8_t flag = 1<<cause;
         slaveRequests |= flag;
-        assert("Implement\n" && 0);
-//	si->pktuest(time);
     }
 
     /**
@@ -510,15 +519,18 @@ class BaseCache : public MemObject
      * @param pkt The request being responded to.
      * @param time The time the response is ready.
      */
-    void respond(Packet *pkt, Tick time)
+    void respond(PacketPtr pkt, Tick time)
     {
         if (pkt->needsResponse()) {
             CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
             reqCpu->schedule(time);
         }
         else {
-            if (pkt->cmd == Packet::Writeback) delete pkt->req;
-            delete pkt;
+            if (pkt->cmd != Packet::UpgradeReq)
+            {
+                delete pkt->req;
+                delete pkt;
+            }
         }
     }
 
@@ -527,7 +539,7 @@ class BaseCache : public MemObject
      * @param pkt The request to respond to.
      * @param time The time the response is ready.
      */
-    void respondToMiss(Packet *pkt, Tick time)
+    void respondToMiss(PacketPtr pkt, Tick time)
     {
         if (!pkt->req->isUncacheable()) {
             missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += time - pkt->time;
@@ -537,8 +549,11 @@ class BaseCache : public MemObject
             reqCpu->schedule(time);
         }
         else {
-            if (pkt->cmd == Packet::Writeback) delete pkt->req;
-            delete pkt;
+            if (pkt->cmd != Packet::UpgradeReq)
+            {
+                delete pkt->req;
+                delete pkt;
+            }
         }
     }
 
@@ -546,7 +561,7 @@ class BaseCache : public MemObject
      * Suppliess the data if cache to cache transfers are enabled.
      * @param pkt The bus transaction to fulfill.
      */
-    void respondToSnoop(Packet *pkt, Tick time)
+    void respondToSnoop(PacketPtr pkt, Tick time)
     {
         assert (pkt->needsResponse());
         CacheEvent *reqMem = new CacheEvent(memSidePort, pkt);
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 41b270030..1f3b087bb 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -75,12 +75,6 @@ class Cache : public BaseCache
     /** Prefetcher */
     Prefetcher<TagStore, Buffering> *prefetcher;
 
-    /** Do fast copies in this cache. */
-    bool doCopy;
-
-    /** Block on a delayed copy. */
-    bool blockOnCopy;
-
     /**
      * The clock ratio of the outgoing bus.
      * Used for calculating critical word first.
@@ -102,21 +96,9 @@ class Cache : public BaseCache
       * A permanent mem req to always be used to cause invalidations.
       * Used to append to target list, to cause an invalidation.
       */
-    Packet * invalidatePkt;
+    PacketPtr invalidatePkt;
     Request *invalidateReq;
 
-    /**
-     * Temporarily move a block into a MSHR.
-     * @todo Remove this when LSQ/SB are fixed and implemented in memtest.
-     */
-    void pseudoFill(Addr addr);
-
-    /**
-     * Temporarily move a block into an existing MSHR.
-     * @todo Remove this when LSQ/SB are fixed and implemented in memtest.
-     */
-    void pseudoFill(MSHR *mshr);
-
   public:
 
     class Params
@@ -125,19 +107,17 @@ class Cache : public BaseCache
         TagStore *tags;
         Buffering *missQueue;
         Coherence *coherence;
-        bool doCopy;
-        bool blockOnCopy;
         BaseCache::Params baseParams;
         Prefetcher<TagStore, Buffering> *prefetcher;
         bool prefetchAccess;
         int hitLatency;
 
         Params(TagStore *_tags, Buffering *mq, Coherence *coh,
-               bool do_copy, BaseCache::Params params,
+               BaseCache::Params params,
                Prefetcher<TagStore, Buffering> *_prefetcher,
                bool prefetch_access, int hit_latency)
-            : tags(_tags), missQueue(mq), coherence(coh), doCopy(do_copy),
-              blockOnCopy(false), baseParams(params),
+            : tags(_tags), missQueue(mq), coherence(coh),
+              baseParams(params),
               prefetcher(_prefetcher), prefetchAccess(prefetch_access),
               hitLatency(hit_latency)
         {
@@ -147,12 +127,12 @@ class Cache : public BaseCache
     /** Instantiates a basic cache object. */
     Cache(const std::string &_name, Params &params);
 
-    virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort,
+    virtual bool doTimingAccess(PacketPtr pkt, CachePort *cachePort,
                         bool isCpuSide);
 
-    virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide);
+    virtual Tick doAtomicAccess(PacketPtr pkt, bool isCpuSide);
 
-    virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide);
+    virtual void doFunctionalAccess(PacketPtr pkt, bool isCpuSide);
 
     virtual void recvStatusChange(Port::Status status, bool isCpuSide);
 
@@ -163,55 +143,47 @@ class Cache : public BaseCache
      * @param pkt The request to perform.
      * @return The result of the access.
      */
-    bool access(Packet * &pkt);
+    bool access(PacketPtr &pkt);
 
     /**
      * Selects a request to send on the bus.
      * @return The memory request to service.
      */
-    virtual Packet * getPacket();
+    virtual PacketPtr getPacket();
 
     /**
      * Was the request was sent successfully?
      * @param pkt The request.
      * @param success True if the request was sent successfully.
      */
-    virtual void sendResult(Packet * &pkt, MSHR* mshr, bool success);
+    virtual void sendResult(PacketPtr &pkt, MSHR* mshr, bool success);
 
     /**
-     * Handles a response (cache line fill/write ack) from the bus.
-     * @param pkt The request being responded to.
-     */
-    void handleResponse(Packet * &pkt);
-
-    /**
-     * Start handling a copy transaction.
-     * @param pkt The copy request to perform.
+     * Was the CSHR request was sent successfully?
+     * @param pkt The request.
+     * @param success True if the request was sent successfully.
      */
-    void startCopy(Packet * &pkt);
+    virtual void sendCoherenceResult(PacketPtr &pkt, MSHR* cshr, bool success);
 
     /**
-     * Handle a delayed copy transaction.
-     * @param pkt The delayed copy request to continue.
-     * @param addr The address being responded to.
-     * @param blk The block of the current response.
-     * @param mshr The mshr being handled.
+     * Handles a response (cache line fill/write ack) from the bus.
+     * @param pkt The request being responded to.
      */
-    void handleCopy(Packet * &pkt, Addr addr, BlkType *blk, MSHR *mshr);
+    void handleResponse(PacketPtr &pkt);
 
     /**
      * Selects a coherence message to forward to lower levels of the hierarchy.
      * @return The coherence message to forward.
      */
-    virtual Packet * getCoherencePacket();
+    virtual PacketPtr getCoherencePacket();
 
     /**
      * Snoops bus transactions to maintain coherence.
      * @param pkt The current bus transaction.
      */
-    void snoop(Packet * &pkt);
+    void snoop(PacketPtr &pkt);
 
-    void snoopResponse(Packet * &pkt);
+    void snoopResponse(PacketPtr &pkt);
 
     /**
      * Invalidates the block containing address if found.
@@ -252,7 +224,7 @@ class Cache : public BaseCache
      * request.
      * @return The estimated completion time.
      */
-    Tick probe(Packet * &pkt, bool update, CachePort * otherSidePort);
+    Tick probe(PacketPtr &pkt, bool update, CachePort * otherSidePort);
 
     /**
      * Snoop for the provided request in the cache and return the estimated
@@ -263,7 +235,7 @@ class Cache : public BaseCache
      * request.
      * @return The estimated completion time.
      */
-    Tick snoopProbe(Packet * &pkt);
+    Tick snoopProbe(PacketPtr &pkt);
 };
 
 #endif // __CACHE_HH__
diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh
index 078c82d82..7b999e4b1 100644
--- a/src/mem/cache/cache_blk.hh
+++ b/src/mem/cache/cache_blk.hh
@@ -35,8 +35,11 @@
 #ifndef __CACHE_BLK_HH__
 #define __CACHE_BLK_HH__
 
+#include <list>
+
 #include "sim/root.hh"		// for Tick
 #include "arch/isa_traits.hh"	// for Addr
+#include "mem/request.hh"
 
 /**
  * Cache block status bit assignments
@@ -96,6 +99,35 @@ class CacheBlk
     /** Number of references to this block since it was brought in. */
     int refCount;
 
+  protected:
+    /**
+     * Represents that the indicated thread context has a "lock" on
+     * the block, in the LL/SC sense.
+     */
+    class Lock {
+      public:
+        int cpuNum;	// locking CPU
+        int threadNum;	// locking thread ID within CPU
+
+        // check for matching execution context
+        bool matchesContext(Request *req)
+        {
+            return (cpuNum == req->getCpuNum() &&
+                    threadNum == req->getThreadNum());
+        }
+
+        Lock(Request *req)
+            : cpuNum(req->getCpuNum()), threadNum(req->getThreadNum())
+        {
+        }
+    };
+
+    /** List of thread contexts that have performed a load-locked (LL)
+     * on the block since the last store. */
+    std::list<Lock> lockList;
+
+  public:
+
     CacheBlk()
         : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
           set(-1), refCount(0)
@@ -175,7 +207,58 @@ class CacheBlk
         return (status & BlkHWPrefetched) != 0;
     }
 
+    /**
+     * Track the fact that a local locked was issued to the block.  If
+     * multiple LLs get issued from the same context we could have
+     * redundant records on the list, but that's OK, as they'll all
+     * get blown away at the next store.
+     */
+    void trackLoadLocked(Request *req)
+    {
+        assert(req->isLocked());
+        lockList.push_front(Lock(req));
+    }
+
+    /**
+     * Clear the list of valid load locks.  Should be called whenever
+     * block is written to or invalidated.
+     */
+    void clearLoadLocks() { lockList.clear(); }
 
+    /**
+     * Handle interaction of load-locked operations and stores.
+     * @return True if write should proceed, false otherwise.  Returns
+     * false only in the case of a failed store conditional.
+     */
+    bool checkWrite(Request *req)
+    {
+        if (req->isLocked()) {
+            // it's a store conditional... have to check for matching
+            // load locked.
+            bool success = false;
+
+            for (std::list<Lock>::iterator i = lockList.begin();
+                 i != lockList.end(); ++i)
+            {
+                if (i->matchesContext(req)) {
+                    // it's a store conditional, and as far as the memory
+                    // system can tell, the requesting context's lock is
+                    // still valid.
+                    success = true;
+                    break;
+                }
+            }
+
+            req->setScResult(success ? 1 : 0);
+            clearLoadLocks();
+            return success;
+        } else {
+            // for *all* stores (conditional or otherwise) we have to
+            // clear the list of load-locks as they're all invalid now.
+            clearLoadLocks();
+            return true;
+        }
+    }
 };
 
 #endif //__CACHE_BLK_HH__
diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc
index 05a149a1c..03646ec2a 100644
--- a/src/mem/cache/cache_builder.cc
+++ b/src/mem/cache/cache_builder.cc
@@ -113,7 +113,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache)
     Param<bool> prioritizeRequests;
 //    SimObjectParam<Bus *> in_bus;
 //    SimObjectParam<Bus *> out_bus;
-    Param<bool> do_copy;
     SimObjectParam<CoherenceProtocol *> protocol;
     Param<Addr> trace_addr;
     Param<int> hash_delay;
@@ -163,7 +162,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache)
 /*    INIT_PARAM_DFLT(in_bus, "incoming bus object", NULL),
     INIT_PARAM(out_bus, "outgoing bus object"),
 */
-    INIT_PARAM_DFLT(do_copy, "perform fast copies in the cache", false),
     INIT_PARAM_DFLT(protocol, "coherence protocol to use in the cache", NULL),
     INIT_PARAM_DFLT(trace_addr, "address to trace", 0),
 
@@ -228,7 +226,7 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache)
              BUILD_NULL_PREFETCHER(t, comp, b); \
         } \
         Cache<CacheTags<t, comp>, b, c>::Params params(tagStore, mq, coh, \
-                                                       do_copy, base_params, \
+                                                       base_params, \
                                                        /*in_bus, out_bus,*/ pf,  \
                                                        prefetch_access, hit_latency); \
         Cache<CacheTags<t, comp>, b, c> *retval =			\
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 9db79b843..66a9ee554 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -56,7 +56,7 @@
 template<class TagStore, class Buffering, class Coherence>
 bool
 Cache<TagStore,Buffering,Coherence>::
-doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
+doTimingAccess(PacketPtr pkt, CachePort *cachePort, bool isCpuSide)
 {
     if (isCpuSide)
     {
@@ -82,15 +82,10 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
 template<class TagStore, class Buffering, class Coherence>
 Tick
 Cache<TagStore,Buffering,Coherence>::
-doAtomicAccess(Packet *pkt, bool isCpuSide)
+doAtomicAccess(PacketPtr pkt, bool isCpuSide)
 {
     if (isCpuSide)
     {
-        //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->isLocked())) {
-            pkt->req->setScResult(1);
-        }
-
         probe(pkt, true, NULL);
         //TEMP ALWAYS SUCCES FOR NOW
         pkt->result = Packet::Success;
@@ -109,18 +104,13 @@ doAtomicAccess(Packet *pkt, bool isCpuSide)
 template<class TagStore, class Buffering, class Coherence>
 void
 Cache<TagStore,Buffering,Coherence>::
-doFunctionalAccess(Packet *pkt, bool isCpuSide)
+doFunctionalAccess(PacketPtr pkt, bool isCpuSide)
 {
     if (isCpuSide)
     {
         //TEMP USE CPU?THREAD 0 0
         pkt->req->setThreadContext(0,0);
 
-        //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->isLocked())) {
-            assert("Can't handle LL/SC on functional path\n");
-        }
-
         probe(pkt, false, memSidePort);
         //TEMP ALWAYS SUCCESFUL FOR NOW
         pkt->result = Packet::Success;
@@ -148,7 +138,6 @@ Cache(const std::string &_name,
       prefetchAccess(params.prefetchAccess),
       tags(params.tags), missQueue(params.missQueue),
       coherence(params.coherence), prefetcher(params.prefetcher),
-      doCopy(params.doCopy), blockOnCopy(params.blockOnCopy),
       hitLatency(params.hitLatency)
 {
     tags->setCache(this);
@@ -198,7 +187,8 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
     /** @todo make the fast write alloc (wh64) work with coherence. */
     /** @todo Do we want to do fast writes for writebacks as well? */
     if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
-        (pkt->cmd == Packet::WriteReq || pkt->cmd == Packet::WriteInvalidateReq) ) {
+        (pkt->cmd == Packet::WriteReq
+         || pkt->cmd == Packet::WriteInvalidateReq) ) {
         // not outstanding misses, can do this
         MSHR* outstanding_miss = missQueue->findMSHR(pkt->getAddr());
         if (pkt->cmd == Packet::WriteInvalidateReq || !outstanding_miss) {
@@ -248,14 +238,15 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
 
 
 template<class TagStore, class Buffering, class Coherence>
-Packet *
+PacketPtr
 Cache<TagStore,Buffering,Coherence>::getPacket()
 {
     assert(missQueue->havePending());
-    Packet * pkt = missQueue->getPacket();
+    PacketPtr pkt = missQueue->getPacket();
     if (pkt) {
         if (!pkt->req->isUncacheable()) {
-            if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][0/*pkt->req->getThreadNum()*/]++;
+            if (pkt->cmd == Packet::HardPFReq)
+                misses[Packet::HardPFReq][0/*pkt->req->getThreadNum()*/]++;
             BlkType *blk = tags->findBlock(pkt);
             Packet::Command cmd = coherence->getBusCmd(pkt->cmd,
                                               (blk)? blk->status : 0);
@@ -270,19 +261,29 @@ Cache<TagStore,Buffering,Coherence>::getPacket()
 
 template<class TagStore, class Buffering, class Coherence>
 void
-Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr, bool success)
+Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr,
+                                                bool success)
 {
-    if (success && !(pkt->flags & NACKED_LINE)) {
-        missQueue->markInService(pkt, mshr);
+    if (success && !(pkt && (pkt->flags & NACKED_LINE))) {
+        if (!mshr->pkt->needsResponse()
+            && !(mshr->pkt->cmd == Packet::UpgradeReq)
+            && (pkt && (pkt->flags & SATISFIED))) {
+            //Writeback, clean up the non copy version of the packet
+            delete pkt;
+        }
+        missQueue->markInService(mshr->pkt, mshr);
         //Temp Hack for UPGRADES
-        if (pkt->cmd == Packet::UpgradeReq) {
+        if (mshr->pkt && mshr->pkt->cmd == Packet::UpgradeReq) {
+            assert(pkt);  //Upgrades need to be fixed
             pkt->flags &= ~CACHE_LINE_FILL;
             BlkType *blk = tags->findBlock(pkt);
             CacheBlk::State old_state = (blk) ? blk->status : 0;
             CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
             if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n",
-                        pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state);
+                DPRINTF(Cache, "Block for blk addr %x moving from "
+                        "state %i to %i\n",
+                        pkt->getAddr() & (((ULL(1))<<48)-1),
+                        old_state, new_state);
             //Set the state on the upgrade
             memcpy(pkt->getPtr<uint8_t>(), blk->data, blkSize);
             PacketList writebacks;
@@ -294,19 +295,28 @@ Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr, bool
         pkt->flags &= ~NACKED_LINE;
         pkt->flags &= ~SATISFIED;
         pkt->flags &= ~SNOOP_COMMIT;
+
+//Rmove copy from mshr
+        delete mshr->pkt;
+        mshr->pkt = pkt;
+
         missQueue->restoreOrigCmd(pkt);
     }
 }
 
 template<class TagStore, class Buffering, class Coherence>
 void
-Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
+Cache<TagStore,Buffering,Coherence>::handleResponse(PacketPtr &pkt)
 {
     BlkType *blk = NULL;
     if (pkt->senderState) {
+        //Delete temp copy in MSHR, restore it.
+        delete ((MSHR*)pkt->senderState)->pkt;
+        ((MSHR*)pkt->senderState)->pkt = pkt;
         if (pkt->result == Packet::Nacked) {
             //pkt->reinitFromRequest();
-            warn("NACKs from devices not connected to the same bus not implemented\n");
+            warn("NACKs from devices not connected to the same bus "
+                 "not implemented\n");
             return;
         }
         if (pkt->result == Packet::BadAddress) {
@@ -322,8 +332,10 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
             PacketList writebacks;
             CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
             if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n",
-                        pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state);
+                DPRINTF(Cache, "Block for blk addr %x moving from "
+                        "state %i to %i\n",
+                        pkt->getAddr() & (((ULL(1))<<48)-1),
+                        old_state, new_state);
             blk = tags->handleFill(blk, (MSHR*)pkt->senderState,
                                    new_state, writebacks, pkt);
             while (!writebacks.empty()) {
@@ -336,87 +348,70 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
 }
 
 template<class TagStore, class Buffering, class Coherence>
-void
-Cache<TagStore,Buffering,Coherence>::pseudoFill(Addr addr)
+PacketPtr
+Cache<TagStore,Buffering,Coherence>::getCoherencePacket()
 {
-    // Need to temporarily move this blk into MSHRs
-    MSHR *mshr = missQueue->allocateTargetList(addr);
-    int lat;
-    PacketList dummy;
-    // Read the data into the mshr
-    BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false);
-    assert(dummy.empty());
-    assert(mshr->pkt->flags & SATISFIED);
-    // can overload order since it isn't used on non pending blocks
-    mshr->order = blk->status;
-    // temporarily remove the block from the cache.
-    tags->invalidateBlk(addr);
+    return coherence->getPacket();
 }
 
 template<class TagStore, class Buffering, class Coherence>
 void
-Cache<TagStore,Buffering,Coherence>::pseudoFill(MSHR *mshr)
+Cache<TagStore,Buffering,Coherence>::sendCoherenceResult(PacketPtr &pkt,
+                                                         MSHR *cshr,
+                                                         bool success)
 {
-    // Need to temporarily move this blk into MSHRs
-    assert(mshr->pkt->cmd == Packet::ReadReq);
-    int lat;
-    PacketList dummy;
-    // Read the data into the mshr
-    BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false);
-    assert(dummy.empty());
-    assert(mshr->pkt->flags & SATISFIED);
-    // can overload order since it isn't used on non pending blocks
-    mshr->order = blk->status;
-    // temporarily remove the block from the cache.
-    tags->invalidateBlk(mshr->pkt->getAddr());
-}
-
-
-template<class TagStore, class Buffering, class Coherence>
-Packet *
-Cache<TagStore,Buffering,Coherence>::getCoherencePacket()
-{
-    return coherence->getPacket();
+    coherence->sendResult(pkt, cshr, success);
 }
 
 
 template<class TagStore, class Buffering, class Coherence>
 void
-Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
+Cache<TagStore,Buffering,Coherence>::snoop(PacketPtr &pkt)
 {
     if (pkt->req->isUncacheable()) {
         //Can't get a hit on an uncacheable address
         //Revisit this for multi level coherence
         return;
     }
+
+    //Send a timing (true) invalidate up if the protocol calls for it
+    coherence->propogateInvalidate(pkt, true);
+
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
     BlkType *blk = tags->findBlock(pkt);
     MSHR *mshr = missQueue->findMSHR(blk_addr);
-    if (coherence->hasProtocol()) { //@todo Move this into handle bus req
-        //If we find an mshr, and it is in service, we need to NACK or invalidate
+    if (coherence->hasProtocol() || pkt->isInvalidate()) {
+        //@todo Move this into handle bus req
+        //If we find an mshr, and it is in service, we need to NACK or
+        //invalidate
         if (mshr) {
             if (mshr->inService) {
                 if ((mshr->pkt->isInvalidate() || !mshr->pkt->isCacheFill())
-                    && (pkt->cmd != Packet::InvalidateReq && pkt->cmd != Packet::WriteInvalidateReq)) {
-                    //If the outstanding request was an invalidate (upgrade,readex,..)
-                    //Then we need to ACK the request until we get the data
-                    //Also NACK if the outstanding request is not a cachefill (writeback)
+                    && (pkt->cmd != Packet::InvalidateReq
+                        && pkt->cmd != Packet::WriteInvalidateReq)) {
+                    //If the outstanding request was an invalidate
+                    //(upgrade,readex,..)  Then we need to ACK the request
+                    //until we get the data Also NACK if the outstanding
+                    //request is not a cachefill (writeback)
                     assert(!(pkt->flags & SATISFIED));
                     pkt->flags |= SATISFIED;
                     pkt->flags |= NACKED_LINE;
                     ///@todo NACK's from other levels
-                    //warn("NACKs from devices not connected to the same bus not implemented\n");
+                    //warn("NACKs from devices not connected to the same bus "
+                    //"not implemented\n");
                     //respondToSnoop(pkt, curTick + hitLatency);
                     return;
                 }
                 else {
-                    //The supplier will be someone else, because we are waiting for
-                    //the data.  This should cause this cache to be forced to go to
-                    //the shared state, not the exclusive even though the shared line
-                    //won't be asserted.  But for now we will just invlidate ourselves
-                    //and allow the other cache to go into the exclusive state.
-                    //@todo Make it so a read to a pending read doesn't invalidate.
-                    //@todo Make it so that a read to a pending read can't be exclusive now.
+                    //The supplier will be someone else, because we are
+                    //waiting for the data.  This should cause this cache to
+                    //be forced to go to the shared state, not the exclusive
+                    //even though the shared line won't be asserted.  But for
+                    //now we will just invlidate ourselves and allow the other
+                    //cache to go into the exclusive state.  @todo Make it so
+                    //a read to a pending read doesn't invalidate.  @todo Make
+                    //it so that a read to a pending read can't be exclusive
+                    //now.
 
                     //Set the address so find match works
                     //panic("Don't have invalidates yet\n");
@@ -424,7 +419,8 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
 
                     //Append the invalidate on
                     missQueue->addTarget(mshr,invalidatePkt);
-                    DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1));
+                    DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n",
+                            pkt->getAddr() & (((ULL(1))<<48)-1));
                     return;
                 }
             }
@@ -432,7 +428,8 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
         //We also need to check the writeback buffers and handle those
         std::vector<MSHR *> writebacks;
         if (missQueue->findWrites(blk_addr, writebacks)) {
-            DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1));
+            DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n",
+                    pkt->getAddr() & (((ULL(1))<<48)-1));
 
             //Look through writebacks for any non-uncachable writes, use that
             for (int i=0; i<writebacks.size(); i++) {
@@ -460,7 +457,8 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
                     }
 
                     if (pkt->isInvalidate()) {
-                        //This must be an upgrade or other cache will take ownership
+                        //This must be an upgrade or other cache will take
+                        //ownership
                         missQueue->markInService(mshr->pkt, mshr);
                     }
                     return;
@@ -471,34 +469,36 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
     CacheBlk::State new_state;
     bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
     if (satisfy) {
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x and now supplying data,"
-                "new state is %i\n",
+        DPRINTF(Cache, "Cache snooped a %s request for addr %x and "
+                "now supplying data, new state is %i\n",
                 pkt->cmdString(), blk_addr, new_state);
 
         tags->handleSnoop(blk, new_state, pkt);
         respondToSnoop(pkt, curTick + hitLatency);
         return;
     }
-    if (blk) DPRINTF(Cache, "Cache snooped a %s request for addr %x, new state is %i\n",
-                     pkt->cmdString(), blk_addr, new_state);
+    if (blk)
+        DPRINTF(Cache, "Cache snooped a %s request for addr %x, "
+                "new state is %i\n", pkt->cmdString(), blk_addr, new_state);
     tags->handleSnoop(blk, new_state);
 }
 
 template<class TagStore, class Buffering, class Coherence>
 void
-Cache<TagStore,Buffering,Coherence>::snoopResponse(Packet * &pkt)
+Cache<TagStore,Buffering,Coherence>::snoopResponse(PacketPtr &pkt)
 {
     //Need to handle the response, if NACKED
     if (pkt->flags & NACKED_LINE) {
         //Need to mark it as not in service, and retry for bus
         assert(0); //Yeah, we saw a NACK come through
 
-        //For now this should never get called, we return false when we see a NACK
-        //instead, by doing this we allow the bus_blocked mechanism to handle the retry
-        //For now it retrys in just 2 cycles, need to figure out how to change that
-        //Eventually we will want to also have success come in as a parameter
-        //Need to make sure that we handle the functionality that happens on successufl
-        //return of the sendAddr function
+        //For now this should never get called, we return false when we see a
+        //NACK instead, by doing this we allow the bus_blocked mechanism to
+        //handle the retry For now it retrys in just 2 cycles, need to figure
+        //out how to change that Eventually we will want to also have success
+        //come in as a parameter Need to make sure that we handle the
+        //functionality that happens on successufl return of the sendAddr
+        //function
     }
 }
 
@@ -515,7 +515,8 @@ Cache<TagStore,Buffering,Coherence>::invalidateBlk(Addr addr)
  */
 template<class TagStore, class Buffering, class Coherence>
 Tick
-Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort* otherSidePort)
+Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update,
+                                           CachePort* otherSidePort)
 {
 //    MemDebug::cacheProbe(pkt);
     if (!pkt->req->isUncacheable()) {
@@ -530,6 +531,13 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort
         }
     }
 
+    if (!update && (pkt->isWrite() || (otherSidePort == cpuSidePort))) {
+        // Still need to change data in all locations.
+        otherSidePort->sendFunctional(pkt);
+        if (pkt->isRead() && pkt->result == Packet::Success)
+            return 0;
+    }
+
     PacketList writebacks;
     int lat;
     BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update);
@@ -538,157 +546,111 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort
             pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss",
             pkt->getAddr() & ~((Addr)blkSize - 1));
 
-    if (!blk) {
-        // Need to check for outstanding misses and writes
-        Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
-
-        // There can only be one matching outstanding miss.
-        MSHR* mshr = missQueue->findMSHR(blk_addr);
-
-        // There can be many matching outstanding writes.
-        std::vector<MSHR*> writes;
-        missQueue->findWrites(blk_addr, writes);
-
-        if (!update) {
-                otherSidePort->sendFunctional(pkt);
-
-            // Check for data in MSHR and writebuffer.
-            if (mshr) {
-                warn("Found outstanding miss on an non-update probe");
-                MSHR::TargetList *targets = mshr->getTargetList();
-                MSHR::TargetList::iterator i = targets->begin();
-                MSHR::TargetList::iterator end = targets->end();
-                for (; i != end; ++i) {
-                    Packet * target = *i;
-                    // If the target contains data, and it overlaps the
-                    // probed request, need to update data
-                    if (target->isWrite() && target->intersect(pkt)) {
-                        uint8_t* pkt_data;
-                        uint8_t* write_data;
-                        int data_size;
-                        if (target->getAddr() < pkt->getAddr()) {
-                            int offset = pkt->getAddr() - target->getAddr();
-                            pkt_data = pkt->getPtr<uint8_t>();
-                            write_data = target->getPtr<uint8_t>() + offset;
-                            data_size = target->getSize() - offset;
-                            assert(data_size > 0);
-                            if (data_size > pkt->getSize())
-                                data_size = pkt->getSize();
-                        } else {
-                            int offset = target->getAddr() - pkt->getAddr();
-                            pkt_data = pkt->getPtr<uint8_t>() + offset;
-                            write_data = target->getPtr<uint8_t>();
-                            data_size = pkt->getSize() - offset;
-                            assert(data_size > pkt->getSize());
-                            if (data_size > target->getSize())
-                                data_size = target->getSize();
-                        }
-
-                        if (pkt->isWrite()) {
-                            memcpy(pkt_data, write_data, data_size);
-                        } else {
-                            memcpy(write_data, pkt_data, data_size);
-                        }
-                    }
-                }
-            }
-            for (int i = 0; i < writes.size(); ++i) {
-                Packet * write = writes[i]->pkt;
-                if (write->intersect(pkt)) {
-                    warn("Found outstanding write on an non-update probe");
-                    uint8_t* pkt_data;
-                    uint8_t* write_data;
-                    int data_size;
-                    if (write->getAddr() < pkt->getAddr()) {
-                        int offset = pkt->getAddr() - write->getAddr();
-                        pkt_data = pkt->getPtr<uint8_t>();
-                        write_data = write->getPtr<uint8_t>() + offset;
-                        data_size = write->getSize() - offset;
-                        assert(data_size > 0);
-                        if (data_size > pkt->getSize())
-                            data_size = pkt->getSize();
-                    } else {
-                        int offset = write->getAddr() - pkt->getAddr();
-                        pkt_data = pkt->getPtr<uint8_t>() + offset;
-                        write_data = write->getPtr<uint8_t>();
-                        data_size = pkt->getSize() - offset;
-                        assert(data_size > pkt->getSize());
-                        if (data_size > write->getSize())
-                            data_size = write->getSize();
-                    }
 
-                    if (pkt->isWrite()) {
-                        memcpy(pkt_data, write_data, data_size);
-                    } else {
-                        memcpy(write_data, pkt_data, data_size);
-                    }
+    // Need to check for outstanding misses and writes
+    Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
+
+    // There can only be one matching outstanding miss.
+    MSHR* mshr = missQueue->findMSHR(blk_addr);
 
+    // There can be many matching outstanding writes.
+    std::vector<MSHR*> writes;
+    missQueue->findWrites(blk_addr, writes);
+
+    if (!update) {
+        // Check for data in MSHR and writebuffer.
+        if (mshr) {
+            MSHR::TargetList *targets = mshr->getTargetList();
+            MSHR::TargetList::iterator i = targets->begin();
+            MSHR::TargetList::iterator end = targets->end();
+            for (; i != end; ++i) {
+                PacketPtr target = *i;
+                // If the target contains data, and it overlaps the
+                // probed request, need to update data
+                if (target->intersect(pkt)) {
+                    fixPacket(pkt, target);
                 }
             }
-            return 0;
-        } else {
-            // update the cache state and statistics
-            if (mshr || !writes.empty()){
-                // Can't handle it, return pktuest unsatisfied.
-                panic("Atomic access ran into outstanding MSHR's or WB's!");
+        }
+        for (int i = 0; i < writes.size(); ++i) {
+            PacketPtr write = writes[i]->pkt;
+            if (write->intersect(pkt)) {
+                fixPacket(pkt, write);
             }
-            if (!pkt->req->isUncacheable()) {
+        }
+        if (pkt->isRead()
+            && pkt->result != Packet::Success
+            && otherSidePort == memSidePort) {
+            otherSidePort->sendFunctional(pkt);
+            assert(pkt->result == Packet::Success);
+        }
+        return 0;
+    } else if (!blk) {
+        // update the cache state and statistics
+        if (mshr || !writes.empty()){
+            // Can't handle it, return pktuest unsatisfied.
+            panic("Atomic access ran into outstanding MSHR's or WB's!");
+        }
+        if (!pkt->req->isUncacheable()) {
                 // Fetch the cache block to fill
-                BlkType *blk = tags->findBlock(pkt);
-                Packet::Command temp_cmd = coherence->getBusCmd(pkt->cmd,
-                                                   (blk)? blk->status : 0);
+            BlkType *blk = tags->findBlock(pkt);
+            Packet::Command temp_cmd = coherence->getBusCmd(pkt->cmd,
+                                                            (blk)? blk->status : 0);
 
-                Packet * busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize);
+            PacketPtr busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize);
 
-                busPkt->allocate();
+            busPkt->allocate();
 
-                busPkt->time = curTick;
+            busPkt->time = curTick;
 
-                DPRINTF(Cache, "Sending a atomic %s for %x blk_addr: %x\n",
-                        busPkt->cmdString(),
-                        busPkt->getAddr() & (((ULL(1))<<48)-1),
-                        busPkt->getAddr() & ~((Addr)blkSize - 1));
+            DPRINTF(Cache, "Sending a atomic %s for %x blk_addr: %x\n",
+                    busPkt->cmdString(),
+                    busPkt->getAddr() & (((ULL(1))<<48)-1),
+                    busPkt->getAddr() & ~((Addr)blkSize - 1));
 
-                lat = memSidePort->sendAtomic(busPkt);
+            lat = memSidePort->sendAtomic(busPkt);
 
-                //Be sure to flip the response to a request for coherence
-                if (busPkt->needsResponse()) {
-                    busPkt->makeAtomicResponse();
-                }
+            //Be sure to flip the response to a request for coherence
+            if (busPkt->needsResponse()) {
+                busPkt->makeAtomicResponse();
+            }
 
 /*		if (!(busPkt->flags & SATISFIED)) {
-                    // blocked at a higher level, just return
-                    return 0;
-                }
+// blocked at a higher level, just return
+return 0;
+}
 
 */		misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
 
-                CacheBlk::State old_state = (blk) ? blk->status : 0;
-                CacheBlk::State new_state = coherence->getNewState(busPkt, old_state);
-                    DPRINTF(Cache, "Receive response:%s for blk addr %x in state %i\n",
-                            busPkt->cmdString(),
-                            busPkt->getAddr() & (((ULL(1))<<48)-1), old_state);
-                if (old_state != new_state)
-                    DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n",
-                            busPkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state);
-
-                tags->handleFill(blk, busPkt,
-                                 new_state,
-                                 writebacks, pkt);
-                //Free the packet
-                delete busPkt;
-
-                // Handle writebacks if needed
-                while (!writebacks.empty()){
-                    Packet *wbPkt = writebacks.front();
-                    memSidePort->sendAtomic(wbPkt);
-                    writebacks.pop_front();
-                    delete wbPkt;
-                }
-                return lat + hitLatency;
-            } else {
-                return memSidePort->sendAtomic(pkt);
+            CacheBlk::State old_state = (blk) ? blk->status : 0;
+            CacheBlk::State new_state =
+                coherence->getNewState(busPkt, old_state);
+            DPRINTF(Cache,
+                        "Receive response:%s for blk addr %x in state %i\n",
+                    busPkt->cmdString(),
+                    busPkt->getAddr() & (((ULL(1))<<48)-1), old_state);
+            if (old_state != new_state)
+                    DPRINTF(Cache, "Block for blk addr %x moving from "
+                            "state %i to %i\n",
+                            busPkt->getAddr() & (((ULL(1))<<48)-1),
+                            old_state, new_state);
+
+            tags->handleFill(blk, busPkt,
+                             new_state,
+                             writebacks, pkt);
+            //Free the packet
+            delete busPkt;
+
+            // Handle writebacks if needed
+            while (!writebacks.empty()){
+                PacketPtr wbPkt = writebacks.front();
+                memSidePort->sendAtomic(wbPkt);
+                writebacks.pop_front();
+                delete wbPkt;
             }
+                return lat + hitLatency;
+        } else {
+            return memSidePort->sendAtomic(pkt);
         }
     } else {
         // There was a cache hit.
@@ -698,12 +660,8 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort
             writebacks.pop_front();
         }
 
-        if (update) {
-            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        } else if (pkt->isWrite()) {
-            // Still need to change data in all locations.
-            otherSidePort->sendFunctional(pkt);
-        }
+        hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+
         return hitLatency;
     }
     fatal("Probe not handled.\n");
@@ -714,22 +672,27 @@ template<class TagStore, class Buffering, class Coherence>
 Tick
 Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt)
 {
-        Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-        BlkType *blk = tags->findBlock(pkt);
-        MSHR *mshr = missQueue->findMSHR(blk_addr);
-        CacheBlk::State new_state = 0;
-        bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
-        if (satisfy) {
-            DPRINTF(Cache, "Cache snooped a %s request for addr %x and now supplying data,"
-                    "new state is %i\n",
-                    pkt->cmdString(), blk_addr, new_state);
+    //Send a atomic (false) invalidate up if the protocol calls for it
+    coherence->propogateInvalidate(pkt, false);
+
+    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+    BlkType *blk = tags->findBlock(pkt);
+    MSHR *mshr = missQueue->findMSHR(blk_addr);
+    CacheBlk::State new_state = 0;
+    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
+    if (satisfy) {
+        DPRINTF(Cache, "Cache snooped a %s request for addr %x and "
+                "now supplying data, new state is %i\n",
+                pkt->cmdString(), blk_addr, new_state);
 
             tags->handleSnoop(blk, new_state, pkt);
             return hitLatency;
-        }
-        if (blk) DPRINTF(Cache, "Cache snooped a %s request for addr %x, new state is %i\n",
-                     pkt->cmdString(), blk_addr, new_state);
-        tags->handleSnoop(blk, new_state);
-        return 0;
+    }
+    if (blk)
+        DPRINTF(Cache, "Cache snooped a %s request for addr %x, "
+                "new state is %i\n",
+                    pkt->cmdString(), blk_addr, new_state);
+    tags->handleSnoop(blk, new_state);
+    return 0;
 }
 
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
index e28dda3dc..52beb0880 100644
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -192,7 +192,7 @@ CoherenceProtocol::regStats()
 
 
 bool
-CoherenceProtocol::invalidateTrans(BaseCache *cache, Packet * &pkt,
+CoherenceProtocol::invalidateTrans(BaseCache *cache, PacketPtr &pkt,
                                    CacheBlk *blk, MSHR *mshr,
                                    CacheBlk::State & new_state)
 {
@@ -203,7 +203,7 @@ CoherenceProtocol::invalidateTrans(BaseCache *cache, Packet * &pkt,
 
 
 bool
-CoherenceProtocol::supplyTrans(BaseCache *cache, Packet * &pkt,
+CoherenceProtocol::supplyTrans(BaseCache *cache, PacketPtr &pkt,
                                CacheBlk *blk,
                                MSHR *mshr,
                                CacheBlk::State & new_state
@@ -214,7 +214,7 @@ CoherenceProtocol::supplyTrans(BaseCache *cache, Packet * &pkt,
 
 
 bool
-CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, Packet * &pkt,
+CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, PacketPtr &pkt,
                                             CacheBlk *blk,
                                             MSHR *mshr,
                                             CacheBlk::State & new_state)
@@ -226,7 +226,7 @@ CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, Packet * &pkt,
 
 
 bool
-CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, Packet * &pkt,
+CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, PacketPtr &pkt,
                                            CacheBlk *blk,
                                            MSHR *mshr,
                                            CacheBlk::State & new_state)
@@ -238,7 +238,7 @@ CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, Packet * &pkt,
 
 
 bool
-CoherenceProtocol::supplyAndInvalidateTrans(BaseCache *cache, Packet * &pkt,
+CoherenceProtocol::supplyAndInvalidateTrans(BaseCache *cache, PacketPtr &pkt,
                                             CacheBlk *blk,
                                             MSHR *mshr,
                                             CacheBlk::State & new_state)
@@ -248,7 +248,7 @@ CoherenceProtocol::supplyAndInvalidateTrans(BaseCache *cache, Packet * &pkt,
 }
 
 bool
-CoherenceProtocol::assertShared(BaseCache *cache, Packet * &pkt,
+CoherenceProtocol::assertShared(BaseCache *cache, PacketPtr &pkt,
                                             CacheBlk *blk,
                                             MSHR *mshr,
                                             CacheBlk::State & new_state)
@@ -463,7 +463,7 @@ CoherenceProtocol::getBusCmd(Packet::Command cmdIn, CacheBlk::State state,
 
 
 CacheBlk::State
-CoherenceProtocol::getNewState(Packet * &pkt, CacheBlk::State oldState)
+CoherenceProtocol::getNewState(PacketPtr &pkt, CacheBlk::State oldState)
 {
     CacheBlk::State state = oldState & stateMask;
     int cmd_idx = pkt->cmdToIndex();
@@ -488,7 +488,7 @@ CoherenceProtocol::getNewState(Packet * &pkt, CacheBlk::State oldState)
 
 
 bool
-CoherenceProtocol::handleBusRequest(BaseCache *cache, Packet * &pkt,
+CoherenceProtocol::handleBusRequest(BaseCache *cache, PacketPtr &pkt,
                                     CacheBlk *blk,
                                     MSHR *mshr,
                                     CacheBlk::State & new_state)
@@ -518,7 +518,7 @@ CoherenceProtocol::handleBusRequest(BaseCache *cache, Packet * &pkt,
 }
 
 bool
-CoherenceProtocol::nullTransition(BaseCache *cache, Packet * &pkt,
+CoherenceProtocol::nullTransition(BaseCache *cache, PacketPtr &pkt,
                                   CacheBlk *blk, MSHR *mshr,
                                   CacheBlk::State & new_state)
 {
@@ -530,7 +530,7 @@ CoherenceProtocol::nullTransition(BaseCache *cache, Packet * &pkt,
 
 
 bool
-CoherenceProtocol::invalidTransition(BaseCache *cache, Packet * &pkt,
+CoherenceProtocol::invalidTransition(BaseCache *cache, PacketPtr &pkt,
                                      CacheBlk *blk, MSHR *mshr,
                                      CacheBlk::State & new_state)
 {
diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh
index b5d7d80aa..b30fb053b 100644
--- a/src/mem/cache/coherence/coherence_protocol.hh
+++ b/src/mem/cache/coherence/coherence_protocol.hh
@@ -89,7 +89,7 @@ class CoherenceProtocol : public SimObject
      * @param oldState The current block state.
      * @return The new state.
      */
-    CacheBlk::State getNewState(Packet * &pkt,
+    CacheBlk::State getNewState(PacketPtr &pkt,
                                 CacheBlk::State oldState);
 
     /**
@@ -101,12 +101,12 @@ class CoherenceProtocol : public SimObject
      * @param new_state The new coherence state of the block.
      * @return True if the request should be satisfied locally.
      */
-    bool handleBusRequest(BaseCache *cache, Packet * &pkt, CacheBlk *blk,
+    bool handleBusRequest(BaseCache *cache, PacketPtr &pkt, CacheBlk *blk,
                           MSHR *mshr, CacheBlk::State &new_state);
 
   protected:
     /** Snoop function type. */
-    typedef bool (*SnoopFuncType)(BaseCache *, Packet *&, CacheBlk *,
+    typedef bool (*SnoopFuncType)(BaseCache *, PacketPtr &, CacheBlk *,
                                   MSHR *, CacheBlk::State&);
 
     //
@@ -116,49 +116,49 @@ class CoherenceProtocol : public SimObject
     /**
      * Do nothing transition.
      */
-    static bool nullTransition(BaseCache *, Packet *&, CacheBlk *,
+    static bool nullTransition(BaseCache *, PacketPtr &, CacheBlk *,
                                MSHR *, CacheBlk::State&);
 
     /**
      * Invalid transition, basically panic.
      */
-    static bool invalidTransition(BaseCache *, Packet *&, CacheBlk *,
+    static bool invalidTransition(BaseCache *, PacketPtr &, CacheBlk *,
                                   MSHR *, CacheBlk::State&);
 
     /**
      * Invalidate block, move to Invalid state.
      */
-    static bool invalidateTrans(BaseCache *, Packet *&, CacheBlk *,
+    static bool invalidateTrans(BaseCache *, PacketPtr &, CacheBlk *,
                                 MSHR *, CacheBlk::State&);
 
     /**
      * Supply data, no state transition.
      */
-    static bool supplyTrans(BaseCache *, Packet *&, CacheBlk *,
+    static bool supplyTrans(BaseCache *, PacketPtr &, CacheBlk *,
                             MSHR *, CacheBlk::State&);
 
     /**
      * Supply data and go to Shared state.
      */
-    static bool supplyAndGotoSharedTrans(BaseCache *, Packet *&, CacheBlk *,
+    static bool supplyAndGotoSharedTrans(BaseCache *, PacketPtr &, CacheBlk *,
                                          MSHR *, CacheBlk::State&);
 
     /**
      * Supply data and go to Owned state.
      */
-    static bool supplyAndGotoOwnedTrans(BaseCache *, Packet *&, CacheBlk *,
+    static bool supplyAndGotoOwnedTrans(BaseCache *, PacketPtr &, CacheBlk *,
                                         MSHR *, CacheBlk::State&);
 
     /**
      * Invalidate block, supply data, and go to Invalid state.
      */
-    static bool supplyAndInvalidateTrans(BaseCache *, Packet *&, CacheBlk *,
+    static bool supplyAndInvalidateTrans(BaseCache *, PacketPtr &, CacheBlk *,
                                          MSHR *, CacheBlk::State&);
 
     /**
      * Assert the shared line for a block that is shared/exclusive.
      */
-    static bool assertShared(BaseCache *, Packet *&, CacheBlk *,
+    static bool assertShared(BaseCache *, PacketPtr &, CacheBlk *,
                                          MSHR *, CacheBlk::State&);
 
     /**
diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh
index 71d8f36f4..5316e64b9 100644
--- a/src/mem/cache/coherence/simple_coherence.hh
+++ b/src/mem/cache/coherence/simple_coherence.hh
@@ -89,18 +89,30 @@ class SimpleCoherence
      * This policy does not forward invalidates, return NULL.
      * @return NULL.
      */
-    Packet * getPacket()
+    PacketPtr getPacket()
     {
         return NULL;
     }
 
     /**
+     * Was the CSHR request was sent successfully?
+     * @param pkt The request.
+     * @param success True if the request was sent successfully.
+     */
+    void sendResult(PacketPtr &pkt, MSHR* cshr, bool success)
+    {
+        //Don't do coherence
+        return;
+    }
+
+
+    /**
      * Return the proper state given the current state and the bus response.
      * @param pkt The bus response.
      * @param current The current block state.
      * @return The new state.
      */
-    CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State current)
+    CacheBlk::State getNewState(PacketPtr &pkt, CacheBlk::State current)
     {
         return protocol->getNewState(pkt, current);
     }
@@ -112,7 +124,7 @@ class SimpleCoherence
      * @param mshr The MSHR corresponding to the request, if any.
      * @param new_state Return the new state for the block.
      */
-    bool handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr,
+    bool handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr,
                           CacheBlk::State &new_state)
     {
 //	assert(mshr == NULL);
@@ -148,6 +160,12 @@ class SimpleCoherence
     bool allowFastWrites() { return false; }
 
     bool hasProtocol() { return true; }
+
+    void propogateInvalidate(PacketPtr pkt, bool isTiming)
+    {
+        //For now we do nothing, asssumes simple coherence is top level of cache
+        return;
+    }
 };
 
 #endif //__SIMPLE_COHERENCE_HH__
diff --git a/src/mem/cache/coherence/uni_coherence.cc b/src/mem/cache/coherence/uni_coherence.cc
index 0efe393f9..19230e35b 100644
--- a/src/mem/cache/coherence/uni_coherence.cc
+++ b/src/mem/cache/coherence/uni_coherence.cc
@@ -40,45 +40,67 @@ UniCoherence::UniCoherence()
 {
 }
 
-Packet *
+PacketPtr
 UniCoherence::getPacket()
 {
-    bool unblock = cshrs.isFull();
-    Packet* pkt = cshrs.getReq();
-    cshrs.markInService((MSHR*)pkt->senderState);
-    if (!cshrs.havePending()) {
-        cache->clearSlaveRequest(Request_Coherence);
-    }
-    if (unblock) {
-        //since CSHRs are always used as buffers, should always get rid of one
-        assert(!cshrs.isFull());
-        cache->clearBlocked(Blocked_Coherence);
-    }
+    PacketPtr pkt = cshrs.getReq();
     return pkt;
 }
 
+void
+UniCoherence::sendResult(PacketPtr &pkt, MSHR* cshr, bool success)
+{
+    if (success)
+    {
+        bool unblock = cshrs.isFull();
+//        cshrs.markInService(cshr);
+        cshrs.deallocate(cshr);
+        if (!cshrs.havePending()) {
+            cache->clearSlaveRequest(Request_Coherence);
+        }
+        if (unblock) {
+            //since CSHRs are always used as buffers, should always get rid of one
+            assert(!cshrs.isFull());
+            cache->clearBlocked(Blocked_Coherence);
+        }
+    }
+}
+
+
 /**
  * @todo add support for returning slave requests, not doing them here.
  */
 bool
-UniCoherence::handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr,
+UniCoherence::handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr,
                                CacheBlk::State &new_state)
 {
     new_state = 0;
     if (pkt->isInvalidate()) {
-        DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n",
-                pkt->getAddr(), blk);
-        // Forward to other caches
-        Packet * tmp = new Packet(pkt->req, Packet::InvalidateReq, -1);
-        cshrs.allocate(tmp);
-        cache->setSlaveRequest(Request_Coherence, curTick);
-        if (cshrs.isFull()) {
-            cache->setBlockedForSnoop(Blocked_Coherence);
+            DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n",
+                    pkt->getAddr(), blk);
+    }
+    else if (blk) {
+        new_state = blk->status;
+    }
+    return false;
+}
+
+void
+UniCoherence::propogateInvalidate(PacketPtr pkt, bool isTiming)
+{
+    if (pkt->isInvalidate()) {
+        if (isTiming) {
+            // Forward to other caches
+            PacketPtr tmp = new Packet(pkt->req, Packet::InvalidateReq, -1);
+            cshrs.allocate(tmp);
+            cache->setSlaveRequest(Request_Coherence, curTick);
+            if (cshrs.isFull())
+                cache->setBlockedForSnoop(Blocked_Coherence);
         }
-    } else {
-        if (blk) {
-            new_state = blk->status;
+        else {
+            PacketPtr tmp = new Packet(pkt->req, Packet::InvalidateReq, -1);
+            cache->cpuSidePort->sendAtomic(tmp);
+            delete tmp;
         }
     }
-    return false;
 }
diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh
index 27b6c7fb5..44c752088 100644
--- a/src/mem/cache/coherence/uni_coherence.hh
+++ b/src/mem/cache/coherence/uni_coherence.hh
@@ -92,7 +92,7 @@ class UniCoherence
      * @param current The current block state.
      * @return The new state.
      */
-    CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State current)
+    CacheBlk::State getNewState(PacketPtr &pkt, CacheBlk::State current)
     {
         if (pkt->senderState) //Blocking Buffers don't get mshrs
         {
@@ -108,11 +108,19 @@ class UniCoherence
         else
             return BlkValid | BlkWritable;
     }
+
     /**
      * Return outstanding invalidate to forward.
      * @return The next invalidate to forward to lower levels of cache.
      */
-    Packet * getPacket();
+    PacketPtr getPacket();
+
+    /**
+     * Was the CSHR request was sent successfully?
+     * @param pkt The request.
+     * @param success True if the request was sent successfully.
+     */
+    void sendResult(PacketPtr &pkt, MSHR* cshr, bool success);
 
     /**
      * Handle snooped bus requests.
@@ -122,7 +130,7 @@ class UniCoherence
      * @param new_state The new coherence state of the block.
      * @return True if the request should be satisfied locally.
      */
-    bool handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr,
+    bool handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr,
                           CacheBlk::State &new_state);
 
     /**
@@ -131,6 +139,8 @@ class UniCoherence
     bool allowFastWrites() { return true; }
 
     bool hasProtocol() { return false; }
+
+    void propogateInvalidate(PacketPtr pkt, bool isTiming);
 };
 
 #endif //__UNI_COHERENCE_HH__
diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc
index f7aacff89..bf741e547 100644
--- a/src/mem/cache/miss/blocking_buffer.cc
+++ b/src/mem/cache/miss/blocking_buffer.cc
@@ -68,7 +68,7 @@ BlockingBuffer::setPrefetcher(BasePrefetcher *_prefetcher)
     prefetcher = _prefetcher;
 }
 void
-BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time)
+BlockingBuffer::handleMiss(PacketPtr &pkt, int blk_size, Tick time)
 {
     Addr blk_addr = pkt->getAddr() & ~(Addr)(blk_size - 1);
     if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate ||
@@ -98,7 +98,7 @@ BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time)
     cache->setMasterRequest(Request_MSHR, time);
 }
 
-Packet *
+PacketPtr
 BlockingBuffer::getPacket()
 {
     if (miss.pkt && !miss.inService) {
@@ -108,7 +108,7 @@ BlockingBuffer::getPacket()
 }
 
 void
-BlockingBuffer::setBusCmd(Packet * &pkt, Packet::Command cmd)
+BlockingBuffer::setBusCmd(PacketPtr &pkt, Packet::Command cmd)
 {
     MSHR *mshr = (MSHR*) pkt->senderState;
     mshr->originalCmd = pkt->cmd;
@@ -117,13 +117,13 @@ BlockingBuffer::setBusCmd(Packet * &pkt, Packet::Command cmd)
 }
 
 void
-BlockingBuffer::restoreOrigCmd(Packet * &pkt)
+BlockingBuffer::restoreOrigCmd(PacketPtr &pkt)
 {
     pkt->cmdOverride(((MSHR*)(pkt->senderState))->originalCmd);
 }
 
 void
-BlockingBuffer::markInService(Packet * &pkt, MSHR* mshr)
+BlockingBuffer::markInService(PacketPtr &pkt, MSHR* mshr)
 {
     if (!pkt->isCacheFill() && pkt->isWrite()) {
         // Forwarding a write/ writeback, don't need to change
@@ -152,7 +152,7 @@ BlockingBuffer::markInService(Packet * &pkt, MSHR* mshr)
 }
 
 void
-BlockingBuffer::handleResponse(Packet * &pkt, Tick time)
+BlockingBuffer::handleResponse(PacketPtr &pkt, Tick time)
 {
     if (pkt->isCacheFill()) {
         // targets were handled in the cache tags
@@ -163,7 +163,7 @@ BlockingBuffer::handleResponse(Packet * &pkt, Tick time)
         if (((MSHR*)(pkt->senderState))->hasTargets()) {
             // Should only have 1 target if we had any
             assert(((MSHR*)(pkt->senderState))->getNumTargets() == 1);
-            Packet * target = ((MSHR*)(pkt->senderState))->getTarget();
+            PacketPtr target = ((MSHR*)(pkt->senderState))->getTarget();
             ((MSHR*)(pkt->senderState))->popTarget();
             if (pkt->isRead()) {
                 memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), target->getSize());
@@ -187,7 +187,7 @@ void
 BlockingBuffer::squash(int threadNum)
 {
     if (miss.threadNum == threadNum) {
-        Packet * target = miss.getTarget();
+        PacketPtr target = miss.getTarget();
         miss.popTarget();
         assert(0/*target->req->getThreadNum()*/ == threadNum);
         target = NULL;
@@ -207,7 +207,7 @@ BlockingBuffer::doWriteback(Addr addr,
 {
     // Generate request
     Request * req = new Request(addr, size, 0);
-    Packet * pkt = new Packet(req, Packet::Writeback, -1);
+    PacketPtr pkt = new Packet(req, Packet::Writeback, -1);
     pkt->allocate();
     if (data) {
         memcpy(pkt->getPtr<uint8_t>(), data, size);
@@ -228,7 +228,7 @@ BlockingBuffer::doWriteback(Addr addr,
 
 
 void
-BlockingBuffer::doWriteback(Packet * &pkt)
+BlockingBuffer::doWriteback(PacketPtr &pkt)
 {
     writebacks[0/*pkt->req->getThreadNum()*/]++;
 
diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh
index f7069696c..4408cfc4f 100644
--- a/src/mem/cache/miss/blocking_buffer.hh
+++ b/src/mem/cache/miss/blocking_buffer.hh
@@ -111,7 +111,7 @@ public:
      * @param blk_size The block size of the cache.
      * @param time The time the miss is detected.
      */
-    void handleMiss(Packet * &pkt, int blk_size, Tick time);
+    void handleMiss(PacketPtr &pkt, int blk_size, Tick time);
 
     /**
      * Fetch the block for the given address and buffer the given target.
@@ -122,7 +122,7 @@ public:
      * @param target The target for the fetch.
      */
     MSHR* fetchBlock(Addr addr, int blk_size, Tick time,
-                     Packet * &target)
+                     PacketPtr &target)
     {
         fatal("Unimplemented");
     }
@@ -131,20 +131,20 @@ public:
      * Selects a outstanding pktuest to service.
      * @return The pktuest to service, NULL if none found.
      */
-    Packet * getPacket();
+    PacketPtr getPacket();
 
     /**
      * Set the command to the given bus command.
      * @param pkt The request to update.
      * @param cmd The bus command to use.
      */
-    void setBusCmd(Packet * &pkt, Packet::Command cmd);
+    void setBusCmd(PacketPtr &pkt, Packet::Command cmd);
 
     /**
      * Restore the original command in case of a bus transmission error.
      * @param pkt The request to reset.
      */
-    void restoreOrigCmd(Packet * &pkt);
+    void restoreOrigCmd(PacketPtr &pkt);
 
     /**
      * Marks a pktuest as in service (sent on the bus). This can have side
@@ -152,14 +152,14 @@ public:
      * are successfully sent.
      * @param pkt The request that was sent on the bus.
      */
-    void markInService(Packet * &pkt, MSHR* mshr);
+    void markInService(PacketPtr &pkt, MSHR* mshr);
 
     /**
      * Frees the resources of the pktuest and unblock the cache.
      * @param pkt The request that has been satisfied.
      * @param time The time when the pktuest is satisfied.
      */
-    void handleResponse(Packet * &pkt, Tick time);
+    void handleResponse(PacketPtr &pkt, Tick time);
 
     /**
      * Removes all outstanding pktuests for a given thread number. If a request
@@ -223,7 +223,7 @@ public:
      * Perform a writeback pktuest.
      * @param pkt The writeback request.
      */
-    void doWriteback(Packet * &pkt);
+    void doWriteback(PacketPtr &pkt);
 
     /**
      * Returns true if there are outstanding pktuests.
@@ -239,7 +239,7 @@ public:
      * @param mshr The mshr to add a target to.
      * @param pkt The target to add.
      */
-    void addTarget(MSHR *mshr, Packet * &pkt)
+    void addTarget(MSHR *mshr, PacketPtr &pkt)
     {
         fatal("Shouldn't call this on a blocking buffer.");
     }
diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc
index c23b542f5..fe467a8ea 100644
--- a/src/mem/cache/miss/miss_queue.cc
+++ b/src/mem/cache/miss/miss_queue.cc
@@ -350,7 +350,7 @@ MissQueue::setPrefetcher(BasePrefetcher *_prefetcher)
 }
 
 MSHR*
-MissQueue::allocateMiss(Packet * &pkt, int size, Tick time)
+MissQueue::allocateMiss(PacketPtr &pkt, int size, Tick time)
 {
     MSHR* mshr = mq.allocate(pkt, size);
     mshr->order = order++;
@@ -370,7 +370,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time)
 
 
 MSHR*
-MissQueue::allocateWrite(Packet * &pkt, int size, Tick time)
+MissQueue::allocateWrite(PacketPtr &pkt, int size, Tick time)
 {
     MSHR* mshr = wb.allocate(pkt,size);
     mshr->order = order++;
@@ -401,7 +401,7 @@ MissQueue::allocateWrite(Packet * &pkt, int size, Tick time)
  * @todo Remove SW prefetches on mshr hits.
  */
 void
-MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time)
+MissQueue::handleMiss(PacketPtr &pkt, int blkSize, Tick time)
 {
 //    if (!cache->isTopLevel())
     if (prefetchMiss) prefetcher->handleMiss(pkt, time);
@@ -455,7 +455,7 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time)
 
 MSHR*
 MissQueue::fetchBlock(Addr addr, int blk_size, Tick time,
-                      Packet * &target)
+                      PacketPtr &target)
 {
     Addr blkAddr = addr & ~(Addr)(blk_size - 1);
     assert(mq.findMatch(addr) == NULL);
@@ -469,10 +469,10 @@ MissQueue::fetchBlock(Addr addr, int blk_size, Tick time,
     return mshr;
 }
 
-Packet *
+PacketPtr
 MissQueue::getPacket()
 {
-    Packet * pkt = mq.getReq();
+    PacketPtr pkt = mq.getReq();
     if (((wb.isFull() && wb.inServiceMSHRs == 0) || !pkt ||
          pkt->time > curTick) && wb.havePending()) {
         pkt = wb.getReq();
@@ -510,7 +510,7 @@ MissQueue::getPacket()
 }
 
 void
-MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd)
+MissQueue::setBusCmd(PacketPtr &pkt, Packet::Command cmd)
 {
     assert(pkt->senderState != 0);
     MSHR * mshr = (MSHR*)pkt->senderState;
@@ -528,13 +528,13 @@ MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd)
 }
 
 void
-MissQueue::restoreOrigCmd(Packet * &pkt)
+MissQueue::restoreOrigCmd(PacketPtr &pkt)
 {
     pkt->cmd = ((MSHR*)(pkt->senderState))->originalCmd;
 }
 
 void
-MissQueue::markInService(Packet * &pkt, MSHR* mshr)
+MissQueue::markInService(PacketPtr &pkt, MSHR* mshr)
 {
     bool unblock = false;
     BlockedCause cause = NUM_BLOCKED_CAUSES;
@@ -583,7 +583,7 @@ MissQueue::markInService(Packet * &pkt, MSHR* mshr)
 
 
 void
-MissQueue::handleResponse(Packet * &pkt, Tick time)
+MissQueue::handleResponse(PacketPtr &pkt, Tick time)
 {
     MSHR* mshr = (MSHR*)pkt->senderState;
     if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) {
@@ -632,7 +632,7 @@ MissQueue::handleResponse(Packet * &pkt, Tick time)
         if (mshr->hasTargets() && pkt->req->isUncacheable()) {
             // Should only have 1 target if we had any
             assert(num_targets == 1);
-            Packet * target = mshr->getTarget();
+            PacketPtr target = mshr->getTarget();
             mshr->popTarget();
             if (pkt->isRead()) {
                 memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(),
@@ -645,7 +645,7 @@ MissQueue::handleResponse(Packet * &pkt, Tick time)
             //Must be a no_allocate with possibly more than one target
             assert(mshr->pkt->isNoAllocate());
             while (mshr->hasTargets()) {
-                Packet * target = mshr->getTarget();
+                PacketPtr target = mshr->getTarget();
                 mshr->popTarget();
                 if (pkt->isRead()) {
                     memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(),
@@ -721,7 +721,7 @@ MissQueue::doWriteback(Addr addr,
 {
     // Generate request
     Request * req = new Request(addr, size, 0);
-    Packet * pkt = new Packet(req, Packet::Writeback, -1);
+    PacketPtr pkt = new Packet(req, Packet::Writeback, -1);
     pkt->allocate();
     if (data) {
         memcpy(pkt->getPtr<uint8_t>(), data, size);
@@ -739,7 +739,7 @@ MissQueue::doWriteback(Addr addr,
 
 
 void
-MissQueue::doWriteback(Packet * &pkt)
+MissQueue::doWriteback(PacketPtr &pkt)
 {
     writebacks[0/*pkt->req->getThreadNum()*/]++;
     allocateWrite(pkt, 0, curTick);
diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh
index 179638d2b..2e04802fb 100644
--- a/src/mem/cache/miss/miss_queue.hh
+++ b/src/mem/cache/miss/miss_queue.hh
@@ -169,7 +169,7 @@ class MissQueue
      * @param time The time the miss occurs.
      * @return A pointer to the new MSHR.
      */
-    MSHR* allocateMiss(Packet * &pkt, int size, Tick time);
+    MSHR* allocateMiss(PacketPtr &pkt, int size, Tick time);
 
     /**
      * Allocate a new WriteBuffer to handle the provided write.
@@ -178,7 +178,7 @@ class MissQueue
      * @param time The time the write occurs.
      * @return A pointer to the new write buffer.
      */
-    MSHR* allocateWrite(Packet * &pkt, int size, Tick time);
+    MSHR* allocateWrite(PacketPtr &pkt, int size, Tick time);
 
   public:
     /**
@@ -218,7 +218,7 @@ class MissQueue
      * @param blk_size The block size of the cache.
      * @param time The time the miss is detected.
      */
-    void handleMiss(Packet * &pkt, int blk_size, Tick time);
+    void handleMiss(PacketPtr &pkt, int blk_size, Tick time);
 
     /**
      * Fetch the block for the given address and buffer the given target.
@@ -229,26 +229,26 @@ class MissQueue
      * @param target The target for the fetch.
      */
     MSHR* fetchBlock(Addr addr, int blk_size, Tick time,
-                     Packet * &target);
+                     PacketPtr &target);
 
     /**
      * Selects a outstanding pktuest to service.
      * @return The pktuest to service, NULL if none found.
      */
-    Packet * getPacket();
+    PacketPtr getPacket();
 
     /**
      * Set the command to the given bus command.
      * @param pkt The request to update.
      * @param cmd The bus command to use.
      */
-    void setBusCmd(Packet * &pkt, Packet::Command cmd);
+    void setBusCmd(PacketPtr &pkt, Packet::Command cmd);
 
     /**
      * Restore the original command in case of a bus transmission error.
      * @param pkt The request to reset.
      */
-    void restoreOrigCmd(Packet * &pkt);
+    void restoreOrigCmd(PacketPtr &pkt);
 
     /**
      * Marks a pktuest as in service (sent on the bus). This can have side
@@ -256,14 +256,14 @@ class MissQueue
      * are successfully sent.
      * @param pkt The request that was sent on the bus.
      */
-    void markInService(Packet * &pkt, MSHR* mshr);
+    void markInService(PacketPtr &pkt, MSHR* mshr);
 
     /**
      * Collect statistics and free resources of a satisfied pktuest.
      * @param pkt The request that has been satisfied.
      * @param time The time when the pktuest is satisfied.
      */
-    void handleResponse(Packet * &pkt, Tick time);
+    void handleResponse(PacketPtr &pkt, Tick time);
 
     /**
      * Removes all outstanding pktuests for a given thread number. If a request
@@ -316,7 +316,7 @@ class MissQueue
      * Perform the given writeback pktuest.
      * @param pkt The writeback request.
      */
-    void doWriteback(Packet * &pkt);
+    void doWriteback(PacketPtr &pkt);
 
     /**
      * Returns true if there are outstanding pktuests.
@@ -329,7 +329,7 @@ class MissQueue
      * @param mshr The mshr to add a target to.
      * @param pkt The target to add.
      */
-    void addTarget(MSHR *mshr, Packet * &pkt)
+    void addTarget(MSHR *mshr, PacketPtr &pkt)
     {
         mq.allocateTarget(mshr, pkt);
     }
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 455798f15..fc520b4b4 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -55,7 +55,7 @@ MSHR::MSHR()
 
 void
 MSHR::allocate(Packet::Command cmd, Addr _addr, int size,
-               Packet * &target)
+               PacketPtr &target)
 {
     addr = _addr;
     if (target)
@@ -85,7 +85,7 @@ MSHR::allocate(Packet::Command cmd, Addr _addr, int size,
  * @todo When we have a "global" data flag, might want to copy data here.
  */
 void
-MSHR::allocateAsBuffer(Packet * &target)
+MSHR::allocateAsBuffer(PacketPtr &target)
 {
     addr = target->getAddr();
     threadNum = 0/*target->req->getThreadNum()*/;
@@ -111,13 +111,13 @@ MSHR::deallocate()
  * Adds a target to an MSHR
  */
 void
-MSHR::allocateTarget(Packet * &target)
+MSHR::allocateTarget(PacketPtr &target)
 {
     //If we append an invalidate and we issued a read to the bus,
     //but now have some pending writes, we need to move
     //the invalidate to before the first non-read
     if (inService && pkt->isRead() && target->isInvalidate()) {
-        std::list<Packet *> temp;
+        std::list<PacketPtr> temp;
 
         while (!targets.empty()) {
             if (!targets.front()->isRead()) break;
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 028259b35..d92aa8a85 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -49,9 +49,9 @@ class MSHR;
 class MSHR {
   public:
     /** Defines the Data structure of the MSHR targetlist. */
-    typedef std::list<Packet *> TargetList;
+    typedef std::list<PacketPtr> TargetList;
     /** Target list iterator. */
-    typedef std::list<Packet *>::iterator TargetListIterator;
+    typedef std::list<PacketPtr>::iterator TargetListIterator;
     /** A list of MSHRs. */
     typedef std::list<MSHR *> List;
     /** MSHR list iterator. */
@@ -68,7 +68,7 @@ class MSHR {
     /** Thread number of the miss. */
     int threadNum;
     /** The pktuest that is forwarded to the next level of the hierarchy. */
-    Packet * pkt;
+    PacketPtr pkt;
     /** The number of currently allocated targets. */
     short ntargets;
     /** The original pktuesting command. */
@@ -101,13 +101,13 @@ public:
      * @param pkt  The original miss.
      */
     void allocate(Packet::Command cmd, Addr addr, int size,
-                  Packet * &pkt);
+                  PacketPtr &pkt);
 
     /**
      * Allocate this MSHR as a buffer for the given pktuest.
      * @param target The memory pktuest to buffer.
      */
-    void allocateAsBuffer(Packet * &target);
+    void allocateAsBuffer(PacketPtr &target);
 
     /**
      * Mark this MSHR as free.
@@ -118,7 +118,7 @@ public:
      * Add a pktuest to the list of targets.
      * @param target The target.
      */
-    void allocateTarget(Packet * &target);
+    void allocateTarget(PacketPtr &target);
 
     /** A simple constructor. */
     MSHR();
@@ -147,7 +147,7 @@ public:
      * Returns a reference to the first target.
      * @return A pointer to the first target.
      */
-    Packet * getTarget()
+    PacketPtr getTarget()
     {
         return targets.front();
     }
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index 1876a8987..d3a7a7933 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -88,7 +88,7 @@ MSHRQueue::findMatches(Addr addr, vector<MSHR*>& matches) const
 }
 
 MSHR*
-MSHRQueue::findPending(Packet * &pkt) const
+MSHRQueue::findPending(PacketPtr &pkt) const
 {
     MSHR::ConstIterator i = pendingList.begin();
     MSHR::ConstIterator end = pendingList.end();
@@ -103,29 +103,12 @@ MSHRQueue::findPending(Packet * &pkt) const
                 return mshr;
             }
         }
-
-        //need to check destination address for copies.
-        //TEMP NOT DOING COPIES
-#if 0
-        if (mshr->pkt->cmd == Copy) {
-            Addr dest = mshr->pkt->dest;
-            if (dest < pkt->addr) {
-                if (dest + mshr->pkt->size > pkt->addr) {
-                    return mshr;
-                }
-            } else {
-                if (pkt->addr + pkt->size > dest) {
-                    return mshr;
-                }
-            }
-        }
-#endif
     }
     return NULL;
 }
 
 MSHR*
-MSHRQueue::allocate(Packet * &pkt, int size)
+MSHRQueue::allocate(PacketPtr &pkt, int size)
 {
     Addr aligned_addr = pkt->getAddr() & ~((Addr)size - 1);
     assert(!freeList.empty());
@@ -148,7 +131,7 @@ MSHRQueue::allocate(Packet * &pkt, int size)
 }
 
 MSHR*
-MSHRQueue::allocateFetch(Addr addr, int size, Packet * &target)
+MSHRQueue::allocateFetch(Addr addr, int size, PacketPtr &target)
 {
     MSHR *mshr = freeList.front();
     assert(mshr->getNumTargets() == 0);
@@ -167,7 +150,7 @@ MSHRQueue::allocateTargetList(Addr addr, int size)
     MSHR *mshr = freeList.front();
     assert(mshr->getNumTargets() == 0);
     freeList.pop_front();
-    Packet * dummy;
+    PacketPtr dummy;
     mshr->allocate(Packet::ReadReq, addr, size, dummy);
     mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
     mshr->inService = true;
@@ -213,7 +196,7 @@ void
 MSHRQueue::markInService(MSHR* mshr)
 {
     //assert(mshr == pendingList.front());
-    if (!(mshr->pkt->needsResponse() || mshr->pkt->cmd == Packet::UpgradeReq)) {
+    if (!mshr->pkt->needsResponse() && !(mshr->pkt->cmd == Packet::UpgradeReq)) {
         assert(mshr->getNumTargets() == 0);
         if ((mshr->pkt->flags & SATISFIED) && (mshr->pkt->cmd == Packet::Writeback)) {
             //Writeback hit, so delete it
@@ -254,7 +237,7 @@ MSHRQueue::squash(int threadNum)
         MSHR *mshr = *i;
         if (mshr->threadNum == threadNum) {
             while (mshr->hasTargets()) {
-                Packet * target = mshr->getTarget();
+                PacketPtr target = mshr->getTarget();
                 mshr->popTarget();
 
                 assert(0/*target->req->getThreadNum()*/ == threadNum);
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index ea5f101b7..30397d9a0 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -107,7 +107,7 @@ class MSHRQueue {
      * @param pkt The request to find.
      * @return A pointer to the earliest matching MSHR.
      */
-    MSHR* findPending(Packet * &pkt) const;
+    MSHR* findPending(PacketPtr &pkt) const;
 
     /**
      * Allocates a new MSHR for the pktuest and size. This places the request
@@ -118,7 +118,7 @@ class MSHRQueue {
      *
      * @pre There are free MSHRs.
      */
-    MSHR* allocate(Packet * &pkt, int size = 0);
+    MSHR* allocate(PacketPtr &pkt, int size = 0);
 
     /**
      * Allocate a read pktuest for the given address, and places the given
@@ -129,7 +129,7 @@ class MSHRQueue {
      * @param target The first target for the pktuest.
      * @return Pointer to the new MSHR.
      */
-    MSHR* allocateFetch(Addr addr, int size, Packet * &target);
+    MSHR* allocateFetch(Addr addr, int size, PacketPtr &target);
 
     /**
      * Allocate a target list for the given address.
@@ -153,7 +153,7 @@ class MSHRQueue {
      * @param mshr The MSHR to allocate the target to.
      * @param pkt The target request.
      */
-    void allocateTarget(MSHR* mshr, Packet * &pkt)
+    void allocateTarget(MSHR* mshr, PacketPtr &pkt)
     {
         mshr->allocateTarget(pkt);
         allocatedTargets += 1;
@@ -216,7 +216,7 @@ class MSHRQueue {
      * Returns the pktuest at the head of the pendingList.
      * @return The next pktuest to service.
      */
-    Packet * getReq() const
+    PacketPtr getReq() const
     {
         if (pendingList.empty()) {
             return NULL;
diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc
index 5e50c48bd..a1388fad6 100644
--- a/src/mem/cache/prefetch/base_prefetcher.cc
+++ b/src/mem/cache/prefetch/base_prefetcher.cc
@@ -102,7 +102,7 @@ BasePrefetcher::regStats(const std::string &name)
         ;
 }
 
-Packet *
+PacketPtr
 BasePrefetcher::getPacket()
 {
     DPRINTF(HWPrefetch, "%s:Requesting a hw_pf to issue\n", cache->name());
@@ -112,7 +112,7 @@ BasePrefetcher::getPacket()
         return NULL;
     }
 
-    Packet * pkt;
+    PacketPtr pkt;
     bool keepTrying = false;
     do {
         pkt = *pf.begin();
@@ -131,7 +131,7 @@ BasePrefetcher::getPacket()
 }
 
 void
-BasePrefetcher::handleMiss(Packet * &pkt, Tick time)
+BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
 {
     if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && only_data))
     {
@@ -139,7 +139,7 @@ BasePrefetcher::handleMiss(Packet * &pkt, Tick time)
         Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);
 
         //Check if miss is in pfq, if so remove it
-        std::list<Packet *>::iterator iter = inPrefetch(blkAddr);
+        std::list<PacketPtr>::iterator iter = inPrefetch(blkAddr);
         if (iter != pf.end()) {
             DPRINTF(HWPrefetch, "%s:Saw a miss to a queued prefetch, removing it\n", cache->name());
             pfRemovedMSHR++;
@@ -179,7 +179,7 @@ BasePrefetcher::handleMiss(Packet * &pkt, Tick time)
             pfIdentified++;
             //create a prefetch memreq
             Request * prefetchReq = new Request(*addr, blkSize, 0);
-            Packet * prefetch;
+            PacketPtr prefetch;
             prefetch = new Packet(prefetchReq, Packet::HardPFReq, -1);
             prefetch->allocate();
             prefetch->req->setThreadContext(pkt->req->getCpuNum(),
@@ -233,11 +233,11 @@ BasePrefetcher::handleMiss(Packet * &pkt, Tick time)
     }
 }
 
-std::list<Packet *>::iterator
+std::list<PacketPtr>::iterator
 BasePrefetcher::inPrefetch(Addr address)
 {
     //Guaranteed to only be one match, we always check before inserting
-    std::list<Packet *>::iterator iter;
+    std::list<PacketPtr>::iterator iter;
     for (iter=pf.begin(); iter != pf.end(); iter++) {
         if (((*iter)->getAddr() & ~(Addr)(blkSize-1)) == address) {
             return iter;
diff --git a/src/mem/cache/prefetch/base_prefetcher.hh b/src/mem/cache/prefetch/base_prefetcher.hh
index d7ea41961..781d3ab09 100644
--- a/src/mem/cache/prefetch/base_prefetcher.hh
+++ b/src/mem/cache/prefetch/base_prefetcher.hh
@@ -45,7 +45,7 @@ class BasePrefetcher
   protected:
 
     /** The Prefetch Queue. */
-    std::list<Packet *> pf;
+    std::list<PacketPtr> pf;
 
     // PARAMETERS
 
@@ -93,24 +93,24 @@ class BasePrefetcher
 
     void setCache(BaseCache *_cache);
 
-    void handleMiss(Packet * &pkt, Tick time);
+    void handleMiss(PacketPtr &pkt, Tick time);
 
-    Packet * getPacket();
+    PacketPtr getPacket();
 
     bool havePending()
     {
         return !pf.empty();
     }
 
-    virtual void calculatePrefetch(Packet * &pkt,
+    virtual void calculatePrefetch(PacketPtr &pkt,
                                    std::list<Addr> &addresses,
                                    std::list<Tick> &delays) = 0;
 
-    virtual bool inCache(Packet * &pkt) = 0;
+    virtual bool inCache(PacketPtr &pkt) = 0;
 
     virtual bool inMissQueue(Addr address) = 0;
 
-    std::list<Packet *>::iterator inPrefetch(Addr address);
+    std::list<PacketPtr>::iterator inPrefetch(Addr address);
 };
 
 
diff --git a/src/mem/cache/prefetch/ghb_prefetcher.hh b/src/mem/cache/prefetch/ghb_prefetcher.hh
index c22b763d1..14f5747df 100644
--- a/src/mem/cache/prefetch/ghb_prefetcher.hh
+++ b/src/mem/cache/prefetch/ghb_prefetcher.hh
@@ -75,7 +75,7 @@ class GHBPrefetcher : public Prefetcher<TagStore, Buffering>
 
     ~GHBPrefetcher() {}
 
-    void calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses,
+    void calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
                            std::list<Tick> &delays)
     {
         Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1);
diff --git a/src/mem/cache/prefetch/stride_prefetcher.hh b/src/mem/cache/prefetch/stride_prefetcher.hh
index 4a8ee7de4..d6fb8ab66 100644
--- a/src/mem/cache/prefetch/stride_prefetcher.hh
+++ b/src/mem/cache/prefetch/stride_prefetcher.hh
@@ -92,7 +92,7 @@ class StridePrefetcher : public Prefetcher<TagStore, Buffering>
 
     ~StridePrefetcher() {}
 
-    void calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses,
+    void calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
                            std::list<Tick> &delays)
     {
 //	Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1);
diff --git a/src/mem/cache/prefetch/tagged_prefetcher.hh b/src/mem/cache/prefetch/tagged_prefetcher.hh
index 17f500dd8..b61e57dcc 100644
--- a/src/mem/cache/prefetch/tagged_prefetcher.hh
+++ b/src/mem/cache/prefetch/tagged_prefetcher.hh
@@ -64,7 +64,7 @@ class TaggedPrefetcher : public Prefetcher<TagStore, Buffering>
 
     ~TaggedPrefetcher() {}
 
-    void calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses,
+    void calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
                            std::list<Tick> &delays);
 };
 
diff --git a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh
index e554b3cec..a18de4571 100644
--- a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh
+++ b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh
@@ -50,7 +50,7 @@ TaggedPrefetcher(int size, bool pageStop, bool serialSquash,
 template <class TagStore, class Buffering>
 void
 TaggedPrefetcher<TagStore, Buffering>::
-calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses,
+calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
                   std::list<Tick> &delays)
 {
     Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1);
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
index 784ba1311..a58ddaff8 100644
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -203,7 +203,7 @@ FALRU::findBlock(Addr addr, int &lat, int *inCache)
 }
 
 FALRUBlk*
-FALRU::findBlock(Packet * &pkt, int &lat, int *inCache)
+FALRU::findBlock(PacketPtr &pkt, int &lat, int *inCache)
 {
     Addr addr = pkt->getAddr();
 
@@ -256,7 +256,7 @@ FALRU::findBlock(Addr addr) const
 }
 
 FALRUBlk*
-FALRU::findReplacement(Packet * &pkt, PacketList &writebacks,
+FALRU::findReplacement(PacketPtr &pkt, PacketList &writebacks,
                        BlkList &compress_blocks)
 {
     FALRUBlk * blk = tail;
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
index f9d4d7109..2db89d603 100644
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -198,7 +198,7 @@ public:
      * @param inCache The FALRUBlk::inCache flags.
      * @return Pointer to the cache block.
      */
-    FALRUBlk* findBlock(Packet * &pkt, int &lat, int *inCache = 0);
+    FALRUBlk* findBlock(PacketPtr &pkt, int &lat, int *inCache = 0);
 
     /**
      * Find the block in the cache, do not update the replacement data.
@@ -215,7 +215,7 @@ public:
      * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    FALRUBlk* findReplacement(Packet * &pkt, PacketList & writebacks,
+    FALRUBlk* findReplacement(PacketPtr &pkt, PacketList & writebacks,
                               BlkList &compress_blocks);
 
     /**
@@ -322,25 +322,6 @@ public:
                    PacketList &writebacks)
     {
     }
-
-    /**
-     * Unimplemented. Perform a cache block copy from block aligned addresses.
-     * @param source The block aligned source address.
-     * @param dest The block aligned destination adddress.
-     * @param asid The address space ID.
-     * @param writebacks List for any generated writeback pktuests.
-     */
-    void doCopy(Addr source, Addr dest, PacketList &writebacks)
-    {
-    }
-
-    /**
-     * Unimplemented.
-     */
-    void fixCopy(Packet * &pkt, PacketList &writebacks)
-    {
-    }
-
 };
 
 #endif
diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc
index 1377c8613..f4e870659 100644
--- a/src/mem/cache/tags/iic.cc
+++ b/src/mem/cache/tags/iic.cc
@@ -285,7 +285,7 @@ IIC::findBlock(Addr addr, int &lat)
 }
 
 IICTag*
-IIC::findBlock(Packet * &pkt, int &lat)
+IIC::findBlock(PacketPtr &pkt, int &lat)
 {
     Addr addr = pkt->getAddr();
 
@@ -362,7 +362,7 @@ IIC::findBlock(Addr addr) const
 
 
 IICTag*
-IIC::findReplacement(Packet * &pkt, PacketList &writebacks,
+IIC::findReplacement(PacketPtr &pkt, PacketList &writebacks,
                      BlkList &compress_blocks)
 {
     DPRINTF(IIC, "Finding Replacement for %x\n", pkt->getAddr());
@@ -423,7 +423,7 @@ IIC::freeReplacementBlock(PacketList & writebacks)
         tag_ptr->refCount = 0;
 
         if (tag_ptr->isModified()) {
-/*	    Packet * writeback =
+/*	    PacketPtr writeback =
                 buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0),
                                   tag_ptr->req->asid, tag_ptr->xc, blkSize,
                                   tag_ptr->data,
@@ -431,7 +431,7 @@ IIC::freeReplacementBlock(PacketList & writebacks)
 */
             Request *writebackReq = new Request(regenerateBlkAddr(tag_ptr->tag, 0),
                                            blkSize, 0);
-            Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1);
+            PacketPtr writeback = new Packet(writebackReq, Packet::Writeback, -1);
             writeback->allocate();
             memcpy(writeback->getPtr<uint8_t>(), tag_ptr->data, blkSize);
 
@@ -711,8 +711,8 @@ IIC::invalidateBlk(Addr addr)
 }
 
 void
-IIC::readData(IICTag *blk, uint8_t *data){
-//    assert(cache->doData());
+IIC::readData(IICTag *blk, uint8_t *data)
+{
     assert(blk->size <= trivialSize || blk->numData > 0);
     int data_size = blk->size;
     if (data_size > trivialSize) {
@@ -729,8 +729,8 @@ IIC::readData(IICTag *blk, uint8_t *data){
 
 void
 IIC::writeData(IICTag *blk, uint8_t *write_data, int size,
-               PacketList & writebacks){
-//    assert(cache->doData());
+               PacketList & writebacks)
+{
     assert(size < blkSize || !blk->isCompressed());
     DPRINTF(IIC, "Writing %d bytes to %x\n", size,
             blk->tag<<tagShift);
@@ -750,10 +750,6 @@ IIC::writeData(IICTag *blk, uint8_t *write_data, int size,
         // can free data blocks
         for (int i=num_subs; i < blk->numData; ++i){
             // decrement reference count and compare to zero
-            /**
-             * @todo
-             * Make this work with copying.
-             */
             if (--dataReferenceCount[blk->data_ptr[i]] == 0) {
                 freeDataBlock(blk->data_ptr[i]);
             }
@@ -775,96 +771,6 @@ IIC::writeData(IICTag *blk, uint8_t *write_data, int size,
 }
 
 
-/**
- * @todo This code can break if the src is evicted to get a tag for the dest.
- */
-void
-IIC::doCopy(Addr source, Addr dest, PacketList &writebacks)
-{
-//Copy unsuported now
-#if 0
-    IICTag *dest_tag = findBlock(dest);
-
-    if (dest_tag) {
-        for (int i = 0; i < dest_tag->numData; ++i) {
-            if (--dataReferenceCount[dest_tag->data_ptr[i]] == 0) {
-                freeDataBlock(dest_tag->data_ptr[i]);
-            }
-        }
-        // Reset replacement entry
-    } else {
-        dest_tag = getFreeTag(hash(dest), writebacks);
-        dest_tag->re = (void*) repl->add(dest_tag - tagStore);
-        dest_tag->set = hash(dest);
-        dest_tag->tag = extractTag(dest);
-        dest_tag->status = BlkValid | BlkWritable;
-    }
-    // Find the source tag here since it might move if we need to find a
-    // tag for the destination.
-    IICTag *src_tag = findBlock(source);
-    assert(src_tag);
-    assert(!cache->doData() || src_tag->size <= trivialSize
-           || src_tag->numData > 0);
-    // point dest to source data and inc counter
-    for (int i = 0; i < src_tag->numData; ++i) {
-        dest_tag->data_ptr[i] = src_tag->data_ptr[i];
-        ++dataReferenceCount[dest_tag->data_ptr[i]];
-    }
-
-    // Maintain fast access data.
-    memcpy(dest_tag->data, src_tag->data, blkSize);
-
-    dest_tag->xc = src_tag->xc;
-    dest_tag->size = src_tag->size;
-    dest_tag->numData = src_tag->numData;
-    if (src_tag->numData == 0) {
-        // Data is stored in the trivial data, just copy it.
-        memcpy(dest_tag->trivialData, src_tag->trivialData, src_tag->size);
-    }
-
-    dest_tag->status |= BlkDirty;
-    if (dest_tag->size < blkSize) {
-        dest_tag->status |= BlkCompressed;
-    } else {
-        dest_tag->status &= ~BlkCompressed;
-    }
-#endif
-}
-
-void
-IIC::fixCopy(Packet * &pkt, PacketList &writebacks)
-{
-#if 0
-    // if reference counter is greater than 1, do copy
-    // else do write
-    Addr blk_addr = blkAlign(pkt->getAddr);
-    IICTag* blk = findBlock(blk_addr);
-
-    if (blk->numData > 0 && dataReferenceCount[blk->data_ptr[0]] != 1) {
-        // copy the data
-        // Mark the block as referenced so it doesn't get replaced.
-        blk->status |= BlkReferenced;
-        for (int i = 0; i < blk->numData; ++i){
-            unsigned long new_data = getFreeDataBlock(writebacks);
-            // Need to refresh pointer
-            /**
-             * @todo Remove this refetch once we change IIC to pointer based
-             */
-            blk = findBlock(blk_addr);
-            assert(blk);
-            if (cache->doData()) {
-                memcpy(&(dataBlks[new_data][0]),
-                       &(dataBlks[blk->data_ptr[i]][0]),
-                       subSize);
-            }
-            dataReferenceCount[blk->data_ptr[i]]--;
-            dataReferenceCount[new_data]++;
-            blk->data_ptr[i] = new_data;
-        }
-    }
-#endif
-}
-
 void
 IIC::cleanupRefs()
 {
diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh
index 2357bdce3..92bd6da1d 100644
--- a/src/mem/cache/tags/iic.hh
+++ b/src/mem/cache/tags/iic.hh
@@ -458,7 +458,7 @@ class IIC : public BaseTags
      * @param lat The access latency.
      * @return A pointer to the block found, if any.
      */
-    IICTag* findBlock(Packet * &pkt, int &lat);
+    IICTag* findBlock(PacketPtr &pkt, int &lat);
 
     /**
      * Find the block, do not update the replacement data.
@@ -475,7 +475,7 @@ class IIC : public BaseTags
      * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    IICTag* findReplacement(Packet * &pkt, PacketList &writebacks,
+    IICTag* findReplacement(PacketPtr &pkt, PacketList &writebacks,
                             BlkList &compress_blocks);
 
     /**
@@ -498,22 +498,6 @@ class IIC : public BaseTags
                    PacketList & writebacks);
 
     /**
-     * Perform a block aligned copy from the source address to the destination.
-     * @param source The block-aligned source address.
-     * @param dest The block-aligned destination address.
-     * @param asid The address space DI.
-     * @param writebacks List for any generated writeback pktuests.
-     */
-    void doCopy(Addr source, Addr dest, PacketList &writebacks);
-
-    /**
-     * If a block is currently marked copy on write, copy it before writing.
-     * @param pkt The write request.
-     * @param writebacks List for any generated writeback pktuests.
-     */
-    void fixCopy(Packet * &pkt, PacketList &writebacks);
-
-    /**
      * Called at end of simulation to complete average block reference stats.
      */
     virtual void cleanupRefs();
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 976bbeff2..31d29aae6 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -184,7 +184,7 @@ LRU::findBlock(Addr addr, int &lat)
 }
 
 LRUBlk*
-LRU::findBlock(Packet * &pkt, int &lat)
+LRU::findBlock(PacketPtr &pkt, int &lat)
 {
     Addr addr = pkt->getAddr();
 
@@ -215,7 +215,7 @@ LRU::findBlock(Addr addr) const
 }
 
 LRUBlk*
-LRU::findReplacement(Packet * &pkt, PacketList &writebacks,
+LRU::findReplacement(PacketPtr &pkt, PacketList &writebacks,
                      BlkList &compress_blocks)
 {
     unsigned set = extractSet(pkt->getAddr());
@@ -246,56 +246,12 @@ LRU::invalidateBlk(Addr addr)
     if (blk) {
         blk->status = 0;
         blk->isTouched = false;
+        blk->clearLoadLocks();
         tagsInUse--;
     }
 }
 
 void
-LRU::doCopy(Addr source, Addr dest, PacketList &writebacks)
-{
-    assert(source == blkAlign(source));
-    assert(dest == blkAlign(dest));
-    LRUBlk *source_blk = findBlock(source);
-    assert(source_blk);
-    LRUBlk *dest_blk = findBlock(dest);
-    if (dest_blk == NULL) {
-        // Need to do a replacement
-        Request *search = new Request(dest,1,0);
-        Packet * pkt = new Packet(search, Packet::ReadReq, -1);
-        BlkList dummy_list;
-        dest_blk = findReplacement(pkt, writebacks, dummy_list);
-        if (dest_blk->isValid() && dest_blk->isModified()) {
-            // Need to writeback data.
-/*	    pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag,
-                                                      dest_blk->set),
-                                    dest_blk->req->asid,
-                                    dest_blk->xc,
-                                    blkSize,
-                                    dest_blk->data,
-                                    dest_blk->size);
-*/
-            Request *writebackReq = new Request(regenerateBlkAddr(dest_blk->tag,
-                                                                  dest_blk->set),
-                                                blkSize, 0);
-            Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1);
-            writeback->allocate();
-            memcpy(writeback->getPtr<uint8_t>(),dest_blk->data, blkSize);
-            writebacks.push_back(writeback);
-        }
-        dest_blk->tag = extractTag(dest);
-        delete search;
-        delete pkt;
-    }
-    /**
-     * @todo Can't assume the status once we have coherence on copies.
-     */
-
-    // Set this block as readable, writeable, and dirty.
-    dest_blk->status = 7;
-    memcpy(dest_blk->data, source_blk->data, blkSize);
-}
-
-void
 LRU::cleanupRefs()
 {
     for (int i = 0; i < numSets*assoc; ++i) {
diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh
index a3a56a0e6..fed688283 100644
--- a/src/mem/cache/tags/lru.hh
+++ b/src/mem/cache/tags/lru.hh
@@ -174,7 +174,7 @@ public:
      * @param lat The access latency.
      * @return Pointer to the cache block if found.
      */
-    LRUBlk* findBlock(Packet * &pkt, int &lat);
+    LRUBlk* findBlock(PacketPtr &pkt, int &lat);
 
     /**
      * Finds the given address in the cache and update replacement data.
@@ -201,7 +201,7 @@ public:
      * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    LRUBlk* findReplacement(Packet * &pkt, PacketList &writebacks,
+    LRUBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
                             BlkList &compress_blocks);
 
     /**
@@ -303,22 +303,6 @@ public:
     }
 
     /**
-     * Perform a block aligned copy from the source address to the destination.
-     * @param source The block-aligned source address.
-     * @param dest The block-aligned destination address.
-     * @param asid The address space DI.
-     * @param writebacks List for any generated writeback pktuests.
-     */
-    void doCopy(Addr source, Addr dest, PacketList &writebacks);
-
-    /**
-     * No impl.
-     */
-    void fixCopy(Packet * &pkt, PacketList &writebacks)
-    {
-    }
-
-    /**
      * Called at end of simulation to complete average block reference stats.
      */
     virtual void cleanupRefs();
diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc
index 690eea22e..bc74f0e0f 100644
--- a/src/mem/cache/tags/split.cc
+++ b/src/mem/cache/tags/split.cc
@@ -267,7 +267,7 @@ Split::probe(Addr addr) const
 }
 
 SplitBlk*
-Split::findBlock(Packet * &pkt, int &lat)
+Split::findBlock(PacketPtr &pkt, int &lat)
 {
 
     Addr aligned = blkAlign(pkt->getAddr());
@@ -350,7 +350,7 @@ Split::findBlock(Addr addr) const
 }
 
 SplitBlk*
-Split::findReplacement(Packet * &pkt, PacketList &writebacks,
+Split::findReplacement(PacketPtr &pkt, PacketList &writebacks,
                      BlkList &compress_blocks)
 {
     SplitBlk *blk;
@@ -422,19 +422,6 @@ Split::invalidateBlk(Addr addr)
 }
 
 void
-Split::doCopy(Addr source, Addr dest, PacketList &writebacks)
-{
-    if (lru->probe( source))
-        lru->doCopy(source, dest, writebacks);
-    else {
-        if (lifo && lifo_net)
-            lifo_net->doCopy(source, dest, writebacks);
-        else if (lru_net)
-            lru_net->doCopy(source, dest, writebacks);
-    }
-}
-
-void
 Split::cleanupRefs()
 {
     lru->cleanupRefs();
diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh
index f0091e971..748f6fb25 100644
--- a/src/mem/cache/tags/split.hh
+++ b/src/mem/cache/tags/split.hh
@@ -207,7 +207,7 @@ class Split : public BaseTags
      * @param lat The access latency.
      * @return Pointer to the cache block if found.
      */
-    SplitBlk* findBlock(Packet * &pkt, int &lat);
+    SplitBlk* findBlock(PacketPtr &pkt, int &lat);
 
     /**
      * Finds the given address in the cache, do not update replacement data.
@@ -224,7 +224,7 @@ class Split : public BaseTags
      * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks,
+    SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
                             BlkList &compress_blocks);
 
 
@@ -311,22 +311,6 @@ class Split : public BaseTags
     }
 
     /**
-     * Perform a block aligned copy from the source address to the destination.
-     * @param source The block-aligned source address.
-     * @param dest The block-aligned destination address.
-     * @param asid The address space DI.
-     * @param writebacks List for any generated writeback pktuests.
-     */
-    void doCopy(Addr source, Addr dest, PacketList &writebacks);
-
-    /**
-     * No impl.
-     */
-    void fixCopy(Packet * &pkt, PacketList &writebacks)
-    {
-    }
-
-    /**
      * Called at end of simulation to complete average block reference stats.
      */
     virtual void cleanupRefs();
diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc
index 6fcbf3597..302e2aaeb 100644
--- a/src/mem/cache/tags/split_lifo.cc
+++ b/src/mem/cache/tags/split_lifo.cc
@@ -255,7 +255,7 @@ SplitLIFO::findBlock(Addr addr, int &lat)
 }
 
 SplitBlk*
-SplitLIFO::findBlock(Packet * &pkt, int &lat)
+SplitLIFO::findBlock(PacketPtr &pkt, int &lat)
 {
     Addr addr = pkt->getAddr();
 
@@ -291,7 +291,7 @@ SplitLIFO::findBlock(Addr addr) const
 }
 
 SplitBlk*
-SplitLIFO::findReplacement(Packet * &pkt, PacketList &writebacks,
+SplitLIFO::findReplacement(PacketPtr &pkt, PacketList &writebacks,
                            BlkList &compress_blocks)
 {
     unsigned set = extractSet(pkt->getAddr());
@@ -347,52 +347,6 @@ SplitLIFO::invalidateBlk(Addr addr)
 }
 
 void
-SplitLIFO::doCopy(Addr source, Addr dest, PacketList &writebacks)
-{
-//Copy Unsuported for now
-#if 0
-    assert(source == blkAlign(source));
-    assert(dest == blkAlign(dest));
-    SplitBlk *source_blk = findBlock(source);
-    assert(source_blk);
-    SplitBlk *dest_blk = findBlock(dest);
-    if (dest_blk == NULL) {
-        // Need to do a replacement
-        Packet * pkt = new Packet();
-        pkt->paddr = dest;
-        BlkList dummy_list;
-        dest_blk = findReplacement(pkt, writebacks, dummy_list);
-        if (dest_blk->isValid() && dest_blk->isModified()) {
-            // Need to writeback data.
-            pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag,
-                                                      dest_blk->set),
-                                    dest_blk->xc,
-                                    blkSize,
-                                    (cache->doData())?dest_blk->data:0,
-                                    dest_blk->size);
-            writebacks.push_back(pkt);
-        }
-        dest_blk->tag = extractTag(dest);
-        /**
-         * @todo Do we need to pass in the execution context, or can we
-         * assume its the same?
-         */
-        assert(source_blk->xc);
-        dest_blk->xc = source_blk->xc;
-    }
-    /**
-     * @todo Can't assume the status once we have coherence on copies.
-     */
-
-    // Set this block as readable, writeable, and dirty.
-    dest_blk->status = 7;
-    if (cache->doData()) {
-        memcpy(dest_blk->data, source_blk->data, blkSize);
-    }
-#endif
-}
-
-void
 SplitLIFO::cleanupRefs()
 {
     for (int i = 0; i < numBlks; ++i) {
diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh
index 355a66162..6c3befe37 100644
--- a/src/mem/cache/tags/split_lifo.hh
+++ b/src/mem/cache/tags/split_lifo.hh
@@ -207,7 +207,7 @@ public:
      * @param lat The access latency.
      * @return Pointer to the cache block if found.
      */
-    SplitBlk* findBlock(Packet * &pkt, int &lat);
+    SplitBlk* findBlock(PacketPtr &pkt, int &lat);
 
     /**
      * Finds the given address in the cache, do not update replacement data.
@@ -224,7 +224,7 @@ public:
      * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks,
+    SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
                             BlkList &compress_blocks);
 
     /**
@@ -326,22 +326,6 @@ public:
     }
 
     /**
-     * Perform a block aligned copy from the source address to the destination.
-     * @param source The block-aligned source address.
-     * @param dest The block-aligned destination address.
-     * @param asid The address space DI.
-     * @param writebacks List for any generated writeback pktuests.
-     */
-    void doCopy(Addr source, Addr dest, PacketList &writebacks);
-
-    /**
-     * No impl.
-     */
-    void fixCopy(Packet * &pkt, PacketList &writebacks)
-    {
-    }
-
-    /**
      * Called at end of simulation to complete average block reference stats.
      */
     virtual void cleanupRefs();
diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc
index 4381923bd..11c9a5d64 100644
--- a/src/mem/cache/tags/split_lru.cc
+++ b/src/mem/cache/tags/split_lru.cc
@@ -203,7 +203,7 @@ SplitLRU::findBlock(Addr addr, int &lat)
 }
 
 SplitBlk*
-SplitLRU::findBlock(Packet * &pkt, int &lat)
+SplitLRU::findBlock(PacketPtr &pkt, int &lat)
 {
     Addr addr = pkt->getAddr();
 
@@ -234,7 +234,7 @@ SplitLRU::findBlock(Addr addr) const
 }
 
 SplitBlk*
-SplitLRU::findReplacement(Packet * &pkt, PacketList &writebacks,
+SplitLRU::findReplacement(PacketPtr &pkt, PacketList &writebacks,
                      BlkList &compress_blocks)
 {
     unsigned set = extractSet(pkt->getAddr());
@@ -272,52 +272,6 @@ SplitLRU::invalidateBlk(Addr addr)
 }
 
 void
-SplitLRU::doCopy(Addr source, Addr dest, PacketList &writebacks)
-{
-//Copy not supported for now
-#if 0
-    assert(source == blkAlign(source));
-    assert(dest == blkAlign(dest));
-    SplitBlk *source_blk = findBlock(source);
-    assert(source_blk);
-    SplitBlk *dest_blk = findBlock(dest);
-    if (dest_blk == NULL) {
-        // Need to do a replacement
-        Packet * pkt = new Packet();
-        pkt->paddr = dest;
-        BlkList dummy_list;
-        dest_blk = findReplacement(pkt, writebacks, dummy_list);
-        if (dest_blk->isValid() && dest_blk->isModified()) {
-            // Need to writeback data.
-            pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag,
-                                                      dest_blk->set),
-                                    dest_blk->xc,
-                                    blkSize,
-                                    (cache->doData())?dest_blk->data:0,
-                                    dest_blk->size);
-            writebacks.push_back(pkt);
-        }
-        dest_blk->tag = extractTag(dest);
-        /**
-         * @todo Do we need to pass in the execution context, or can we
-         * assume its the same?
-         */
-        assert(source_blk->xc);
-        dest_blk->xc = source_blk->xc;
-    }
-    /**
-     * @todo Can't assume the status once we have coherence on copies.
-     */
-
-    // Set this block as readable, writeable, and dirty.
-    dest_blk->status = 7;
-    if (cache->doData()) {
-        memcpy(dest_blk->data, source_blk->data, blkSize);
-    }
-#endif
-}
-
-void
 SplitLRU::cleanupRefs()
 {
     for (int i = 0; i < numSets*assoc; ++i) {
diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh
index 72aebac9c..6160d59e5 100644
--- a/src/mem/cache/tags/split_lru.hh
+++ b/src/mem/cache/tags/split_lru.hh
@@ -190,7 +190,7 @@ public:
      * @param lat The access latency.
      * @return Pointer to the cache block if found.
      */
-    SplitBlk* findBlock(Packet * &pkt, int &lat);
+    SplitBlk* findBlock(PacketPtr &pkt, int &lat);
 
     /**
      * Finds the given address in the cache, do not update replacement data.
@@ -207,7 +207,7 @@ public:
      * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks,
+    SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
                             BlkList &compress_blocks);
 
     /**
@@ -309,22 +309,6 @@ public:
     }
 
     /**
-     * Perform a block aligned copy from the source address to the destination.
-     * @param source The block-aligned source address.
-     * @param dest The block-aligned destination address.
-     * @param asid The address space DI.
-     * @param writebacks List for any generated writeback pktuests.
-     */
-    void doCopy(Addr source, Addr dest, PacketList &writebacks);
-
-    /**
-     * No impl.
-     */
-    void fixCopy(Packet * &pkt, PacketList &writebacks)
-    {
-    }
-
-    /**
      * Called at end of simulation to complete average block reference stats.
      */
     virtual void cleanupRefs();
diff --git a/src/mem/dram.cc b/src/mem/dram.cc
index d7b955975..873ca5b97 100644
--- a/src/mem/dram.cc
+++ b/src/mem/dram.cc
@@ -359,7 +359,7 @@ DRAMMemory::regStats()
 static char *mem_access_output=NULL;
                 /* latency of access [CPU cycles]*/
 Tick
-DRAMMemory::calculateLatency(Packet *pkt)
+DRAMMemory::calculateLatency(PacketPtr pkt)
 {
 
   bool cmdIsRead = pkt->isRead();
diff --git a/src/mem/dram.hh b/src/mem/dram.hh
index 32d117596..9d2f60ee8 100644
--- a/src/mem/dram.hh
+++ b/src/mem/dram.hh
@@ -140,7 +140,7 @@ class DRAMMemory : public PhysicalMemory
 
 
   protected:
-    Tick calculateLatency(Packet *pkt);
+    Tick calculateLatency(PacketPtr pkt);
     int prechargeBanksAround(int bank);
 
   public:
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 64c65dcca..fa8d82c46 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -36,9 +36,10 @@
  */
 
 #include <iostream>
+
 #include "base/misc.hh"
-#include "mem/packet.hh"
 #include "base/trace.hh"
+#include "mem/packet.hh"
 
 static const std::string ReadReqString("ReadReq");
 static const std::string WriteReqString("WriteReq");
@@ -51,6 +52,7 @@ static const std::string HardPFReqString("HardPFReq");
 static const std::string HardPFRespString("HardPFResp");
 static const std::string InvalidateReqString("InvalidateReq");
 static const std::string WriteInvalidateReqString("WriteInvalidateReq");
+static const std::string WriteInvalidateRespString("WriteInvalidateResp");
 static const std::string UpgradeReqString("UpgradeReq");
 static const std::string ReadExReqString("ReadExReq");
 static const std::string ReadExRespString("ReadExResp");
@@ -71,6 +73,7 @@ Packet::cmdString() const
       case HardPFResp:      return HardPFRespString;
       case InvalidateReq:   return InvalidateReqString;
       case WriteInvalidateReq:return WriteInvalidateReqString;
+      case WriteInvalidateResp:return WriteInvalidateRespString;
       case UpgradeReq:      return UpgradeReqString;
       case ReadExReq:       return ReadExReqString;
       case ReadExResp:      return ReadExRespString;
@@ -93,6 +96,7 @@ Packet::cmdIdxToString(Packet::Command idx)
       case HardPFResp:      return HardPFRespString;
       case InvalidateReq:   return InvalidateReqString;
       case WriteInvalidateReq:return WriteInvalidateReqString;
+      case WriteInvalidateResp:return WriteInvalidateRespString;
       case UpgradeReq:      return UpgradeReqString;
       case ReadExReq:       return ReadExReqString;
       case ReadExResp:      return ReadExRespString;
@@ -129,7 +133,7 @@ Packet::allocate()
 
 /** Do the packet modify the same addresses. */
 bool
-Packet::intersect(Packet *p)
+Packet::intersect(PacketPtr p)
 {
     Addr s1 = getAddr();
     Addr e1 = getAddr() + getSize() - 1;
@@ -140,14 +144,14 @@ Packet::intersect(Packet *p)
 }
 
 bool
-fixPacket(Packet *func, Packet *timing)
+fixPacket(PacketPtr func, PacketPtr timing)
 {
     Addr funcStart      = func->getAddr();
     Addr funcEnd        = func->getAddr() + func->getSize() - 1;
     Addr timingStart    = timing->getAddr();
     Addr timingEnd      = timing->getAddr() + timing->getSize() - 1;
 
-    assert(!(funcStart > timingEnd || timingStart < funcEnd));
+    assert(!(funcStart > timingEnd || timingStart > funcEnd));
 
     if (DTRACE(FunctionalAccess)) {
        DebugOut() << func;
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 48b32ec47..d8ad49bdb 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -38,14 +38,15 @@
 #ifndef __MEM_PACKET_HH__
 #define __MEM_PACKET_HH__
 
+#include <cassert>
+#include <list>
+
 #include "mem/request.hh"
 #include "sim/host.hh"
 #include "sim/root.hh"
-#include <list>
-#include <cassert>
 
 struct Packet;
-typedef Packet* PacketPtr;
+typedef Packet *PacketPtr;
 typedef uint8_t* PacketDataPtr;
 typedef std::list<PacketPtr> PacketList;
 
@@ -102,7 +103,7 @@ class Packet
     /** Device address (e.g., bus ID) of the source of the
      *   transaction. The source is not responsible for setting this
      *   field; it is set implicitly by the interconnect when the
-     *   packet * is first sent.  */
+     *   packet is first sent.  */
     short src;
 
     /** Device address (e.g., bus ID) of the destination of the
@@ -202,7 +203,10 @@ class Packet
         HardPFResp      = IsRead  | IsResponse | IsHWPrefetch
                                     | NeedsResponse | HasData,
         InvalidateReq   = IsInvalidate | IsRequest,
-        WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest | HasData,
+        WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest
+                                   | HasData | NeedsResponse,
+        WriteInvalidateResp = IsWrite | IsInvalidate | IsRequest | NeedsResponse
+                                   | IsResponse,
         UpgradeReq      = IsInvalidate | IsRequest | IsUpgrade,
         ReadExReq       = IsRead | IsInvalidate | IsRequest | NeedsResponse,
         ReadExResp      = IsRead | IsInvalidate | IsResponse
@@ -339,10 +343,12 @@ class Packet
         srcValid = false;
     }
 
-    /** Take a request packet and modify it in place to be suitable
-     *   for returning as a response to that request.
+    /**
+     * Take a request packet and modify it in place to be suitable for
+     * returning as a response to that request.
      */
-    void makeAtomicResponse() {
+    void makeAtomicResponse()
+    {
         assert(needsResponse());
         assert(isRequest());
         int icmd = (int)cmd;
@@ -355,50 +361,90 @@ class Packet
         cmd = (Command)icmd;
     }
 
-    /** Take a request packet that has been returned as NACKED and modify it so
-     * that it can be sent out again. Only packets that need a response can be
-     * NACKED, so verify that that is true. */
-    void reinitNacked() {
+    /**
+     * Take a request packet that has been returned as NACKED and
+     * modify it so that it can be sent out again. Only packets that
+     * need a response can be NACKED, so verify that that is true.
+     */
+    void
+    reinitNacked()
+    {
         assert(needsResponse() && result == Nacked);
         dest =  Broadcast;
         result = Unknown;
     }
 
 
-    /** Set the data pointer to the following value that should not be freed. */
+    /**
+     * Set the data pointer to the following value that should not be
+     * freed.
+     */
     template <typename T>
-    void dataStatic(T *p);
+    void
+    dataStatic(T *p)
+    {
+        if(dynamicData)
+            dynamicData = false;
+        data = (PacketDataPtr)p;
+        staticData = true;
+    }
 
-    /** Set the data pointer to a value that should have delete [] called on it.
+    /**
+     * Set the data pointer to a value that should have delete []
+     * called on it.
      */
     template <typename T>
-    void dataDynamicArray(T *p);
+    void
+    dataDynamicArray(T *p)
+    {
+        assert(!staticData && !dynamicData);
+        data = (PacketDataPtr)p;
+        dynamicData = true;
+        arrayData = true;
+    }
 
-    /** set the data pointer to a value that should have delete called on it. */
+    /**
+     * set the data pointer to a value that should have delete called
+     * on it.
+     */
     template <typename T>
-    void dataDynamic(T *p);
+    void
+    dataDynamic(T *p)
+    {
+        assert(!staticData && !dynamicData);
+        data = (PacketDataPtr)p;
+        dynamicData = true;
+        arrayData = false;
+    }
 
-    /** return the value of what is pointed to in the packet. */
+    /** get a pointer to the data ptr. */
     template <typename T>
-    T get();
+    T*
+    getPtr()
+    {
+        assert(staticData || dynamicData);
+        return (T*)data;
+    }
 
-    /** get a pointer to the data ptr. */
+    /** return the value of what is pointed to in the packet. */
     template <typename T>
-    T* getPtr();
+    T get();
 
     /** set the value in the data pointer to v. */
     template <typename T>
     void set(T v);
 
-    /** delete the data pointed to in the data pointer. Ok to call to matter how
-     * data was allocted. */
+    /**
+     * delete the data pointed to in the data pointer. Ok to call to
+     * matter how data was allocted.
+     */
     void deleteData();
 
     /** If there isn't data in the packet, allocate some. */
     void allocate();
 
     /** Do the packet modify the same addresses. */
-    bool intersect(Packet *p);
+    bool intersect(PacketPtr p);
 };
 
 
@@ -407,7 +453,7 @@ class Packet
  * in the timing packet. It returns if the functional packet should continue to
  * traverse the memory hierarchy or not.
  */
-bool fixPacket(Packet *func, Packet *timing);
+bool fixPacket(PacketPtr func, PacketPtr timing);
 
 std::ostream & operator<<(std::ostream &o, const Packet &p);
 
diff --git a/src/mem/packet_access.hh b/src/mem/packet_access.hh
new file mode 100644
index 000000000..aac0c3ae5
--- /dev/null
+++ b/src/mem/packet_access.hh
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ali Saidi
+ *          Nathan Binkert
+ */
+
+#include "arch/isa_traits.hh"
+#include "mem/packet.hh"
+#include "sim/byteswap.hh"
+
+#ifndef __MEM_PACKET_ACCESS_HH__
+#define __MEM_PACKET_ACCESS_HH__
+// The memory system needs to have an endianness. This is the easiest
+// way to deal with it for now. At some point, we will have to remove
+// these functions and make the users do their own byte swapping since
+// the memory system does not in fact have an endianness.
+
+/** return the value of what is pointed to in the packet. */
+template <typename T>
+inline T
+Packet::get()
+{
+    assert(staticData || dynamicData);
+    assert(sizeof(T) <= size);
+    return TheISA::gtoh(*(T*)data);
+}
+
+/** set the value in the data pointer to v. */
+template <typename T>
+inline void
+Packet::set(T v)
+{
+    assert(sizeof(T) <= size);
+    *(T*)data = TheISA::htog(v);
+}
+
+#endif //__MEM_PACKET_ACCESS_HH__
diff --git a/src/mem/page_table.cc b/src/mem/page_table.cc
index 2b460306f..fe8094b88 100644
--- a/src/mem/page_table.cc
+++ b/src/mem/page_table.cc
@@ -27,6 +27,7 @@
  *
  * Authors: Steve Reinhardt
  *          Ron Dreslinski
+ *          Ali Saidi
  */
 
 /**
@@ -97,6 +98,8 @@ PageTable::allocate(Addr vaddr, int64_t size)
     // starting address must be page aligned
     assert(pageOffset(vaddr) == 0);
 
+    DPRINTF(MMU, "Allocating Page: %#x-%#x\n", vaddr, vaddr+ size);
+
     for (; size > 0; size -= pageSize, vaddr += pageSize) {
         m5::hash_map<Addr,Addr>::iterator iter = pTable.find(vaddr);
 
@@ -159,3 +162,41 @@ PageTable::translate(RequestPtr &req)
     req->setPaddr(paddr);
     return page_check(req->getPaddr(), req->getSize());
 }
+
+void
+PageTable::serialize(std::ostream &os)
+{
+    paramOut(os, "ptable.size", pTable.size());
+
+    int count = 0;
+
+    m5::hash_map<Addr,Addr>::iterator iter = pTable.begin();
+    m5::hash_map<Addr,Addr>::iterator end = pTable.end();
+    while (iter != end) {
+        paramOut(os, csprintf("ptable.entry%dvaddr", count), iter->first);
+        paramOut(os, csprintf("ptable.entry%dpaddr", count), iter->second);
+
+        ++iter;
+        ++count;
+    }
+    assert(count == pTable.size());
+}
+
+void
+PageTable::unserialize(Checkpoint *cp, const std::string &section)
+{
+    int i = 0, count;
+    paramIn(cp, section, "ptable.size", count);
+    Addr vaddr, paddr;
+
+    pTable.clear();
+
+    while(i < count) {
+        paramIn(cp, section, csprintf("ptable.entry%dvaddr", i), vaddr);
+        paramIn(cp, section, csprintf("ptable.entry%dpaddr", i), paddr);
+        pTable[vaddr] = paddr;
+        ++i;
+   }
+
+}
+
diff --git a/src/mem/page_table.hh b/src/mem/page_table.hh
index fce063280..0e2b1f58c 100644
--- a/src/mem/page_table.hh
+++ b/src/mem/page_table.hh
@@ -95,6 +95,8 @@ class PageTable
      */
     Fault translate(RequestPtr &req);
 
+    void serialize(std::ostream &os);
+    void unserialize(Checkpoint *cp, const std::string &section);
 };
 
 #endif
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index f5a0ade15..0302f7351 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -39,21 +39,17 @@
 #include <iostream>
 #include <string>
 
-
+#include "arch/isa_traits.hh"
 #include "base/misc.hh"
 #include "config/full_system.hh"
-#include "mem/packet_impl.hh"
 #include "mem/physical.hh"
-#include "sim/host.hh"
 #include "sim/builder.hh"
 #include "sim/eventq.hh"
-#include "arch/isa_traits.hh"
-
+#include "sim/host.hh"
 
 using namespace std;
 using namespace TheISA;
 
-
 PhysicalMemory::PhysicalMemory(Params *p)
     : MemObject(p->name), pmemAddr(NULL), port(NULL), lat(p->latency), _params(p)
 {
@@ -105,7 +101,7 @@ PhysicalMemory::deviceBlockSize()
 }
 
 Tick
-PhysicalMemory::calculateLatency(Packet *pkt)
+PhysicalMemory::calculateLatency(PacketPtr pkt)
 {
     return lat;
 }
@@ -193,7 +189,7 @@ PhysicalMemory::checkLockedAddrList(Request *req)
 }
 
 void
-PhysicalMemory::doFunctionalAccess(Packet *pkt)
+PhysicalMemory::doFunctionalAccess(PacketPtr pkt)
 {
     assert(pkt->getAddr() + pkt->getSize() <= params()->addrRange.size());
 
@@ -281,14 +277,14 @@ PhysicalMemory::MemoryPort::deviceBlockSize()
 }
 
 Tick
-PhysicalMemory::MemoryPort::recvAtomic(Packet *pkt)
+PhysicalMemory::MemoryPort::recvAtomic(PacketPtr pkt)
 {
     memory->doFunctionalAccess(pkt);
     return memory->calculateLatency(pkt);
 }
 
 void
-PhysicalMemory::MemoryPort::recvFunctional(Packet *pkt)
+PhysicalMemory::MemoryPort::recvFunctional(PacketPtr pkt)
 {
     // Default implementation of SimpleTimingPort::recvFunctional()
     // calls recvAtomic() and throws away the latency; we can save a
diff --git a/src/mem/physical.hh b/src/mem/physical.hh
index 97bea2ec4..045e61612 100644
--- a/src/mem/physical.hh
+++ b/src/mem/physical.hh
@@ -57,9 +57,9 @@ class PhysicalMemory : public MemObject
 
       protected:
 
-        virtual Tick recvAtomic(Packet *pkt);
+        virtual Tick recvAtomic(PacketPtr pkt);
 
-        virtual void recvFunctional(Packet *pkt);
+        virtual void recvFunctional(PacketPtr pkt);
 
         virtual void recvStatusChange(Status status);
 
@@ -172,8 +172,8 @@ class PhysicalMemory : public MemObject
     unsigned int drain(Event *de);
 
   protected:
-    void doFunctionalAccess(Packet *pkt);
-    virtual Tick calculateLatency(Packet *pkt);
+    void doFunctionalAccess(PacketPtr pkt);
+    virtual Tick calculateLatency(PacketPtr pkt);
     void recvStatusChange(Port::Status status);
 
   public:
diff --git a/src/mem/port.cc b/src/mem/port.cc
index 17924b759..bbc98c160 100644
--- a/src/mem/port.cc
+++ b/src/mem/port.cc
@@ -35,7 +35,6 @@
 
 #include "base/chunk_generator.hh"
 #include "base/trace.hh"
-#include "mem/packet_impl.hh"
 #include "mem/port.hh"
 
 void
diff --git a/src/mem/port.hh b/src/mem/port.hh
index bb3bc1b1b..b6eeb9db3 100644
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -128,13 +128,13 @@ class Port
      * called by a peer port, never directly by any outside object. */
 
     /** Called to recive a timing call from the peer port. */
-    virtual bool recvTiming(Packet *pkt) = 0;
+    virtual bool recvTiming(PacketPtr pkt) = 0;
 
     /** Called to recive a atomic call from the peer port. */
-    virtual Tick recvAtomic(Packet *pkt) = 0;
+    virtual Tick recvAtomic(PacketPtr pkt) = 0;
 
     /** Called to recive a functional call from the peer port. */
-    virtual void recvFunctional(Packet *pkt) = 0;
+    virtual void recvFunctional(PacketPtr pkt) = 0;
 
     /** Called to recieve a status change from the peer port. */
     virtual void recvStatusChange(Status status) = 0;
@@ -172,14 +172,14 @@ class Port
         case a cache has a higher priority request come in while waiting for
         the bus to arbitrate.
     */
-    bool sendTiming(Packet *pkt) { return peer->recvTiming(pkt); }
+    bool sendTiming(PacketPtr pkt) { return peer->recvTiming(pkt); }
 
     /** Function called by the associated device to send an atomic
      *   access, an access in which the data is moved and the state is
      *   updated in one cycle, without interleaving with other memory
      *   accesses.  Returns estimated latency of access.
      */
-    Tick sendAtomic(Packet *pkt)
+    Tick sendAtomic(PacketPtr pkt)
         { return peer->recvAtomic(pkt); }
 
     /** Function called by the associated device to send a functional access,
@@ -187,7 +187,7 @@ class Port
         memory system, without affecting the current state of any block or
         moving the block.
     */
-    void sendFunctional(Packet *pkt)
+    void sendFunctional(PacketPtr pkt)
         { return peer->recvFunctional(pkt); }
 
     /** Called by the associated device to send a status change to the device
@@ -252,9 +252,9 @@ class FunctionalPort : public Port
     {}
 
   protected:
-    virtual bool recvTiming(Packet *pkt) { panic("FuncPort is UniDir"); }
-    virtual Tick recvAtomic(Packet *pkt) { panic("FuncPort is UniDir"); }
-    virtual void recvFunctional(Packet *pkt) { panic("FuncPort is UniDir"); }
+    virtual bool recvTiming(PacketPtr pkt) { panic("FuncPort is UniDir"); }
+    virtual Tick recvAtomic(PacketPtr pkt) { panic("FuncPort is UniDir"); }
+    virtual void recvFunctional(PacketPtr pkt) { panic("FuncPort is UniDir"); }
     virtual void recvStatusChange(Status status) {}
 
   public:
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index 21907c0ca..55a461a8b 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -31,28 +31,27 @@
 #include "mem/tport.hh"
 
 void
-SimpleTimingPort::recvFunctional(Packet *pkt)
+SimpleTimingPort::recvFunctional(PacketPtr pkt)
 {
-    //First check queued events
-    std::list<Packet *>::iterator i = transmitList.begin();
-    std::list<Packet *>::iterator end = transmitList.end();
-    bool cont = true;
+    std::list<PacketPtr>::iterator i = transmitList.begin();
+    std::list<PacketPtr>::iterator end = transmitList.end();
 
-    while (i != end && cont) {
-        Packet * target = *i;
+    while (i != end) {
+        PacketPtr target = *i;
         // If the target contains data, and it overlaps the
         // probed request, need to update data
         if (target->intersect(pkt))
             fixPacket(pkt, target);
 
     }
+
     //Then just do an atomic access and throw away the returned latency
-    if (cont)
+    if (pkt->result != Packet::Success)
         recvAtomic(pkt);
 }
 
 bool
-SimpleTimingPort::recvTiming(Packet *pkt)
+SimpleTimingPort::recvTiming(PacketPtr pkt)
 {
     // If the device is only a slave, it should only be sending
     // responses, which should never get nacked.  There used to be
@@ -66,6 +65,13 @@ SimpleTimingPort::recvTiming(Packet *pkt)
         pkt->makeTimingResponse();
         sendTimingLater(pkt, latency);
     }
+    else {
+        if (pkt->cmd != Packet::UpgradeReq)
+        {
+            delete pkt->req;
+            delete pkt;
+        }
+    }
     return true;
 }
 
diff --git a/src/mem/tport.hh b/src/mem/tport.hh
index df6d48196..fbe81c443 100644
--- a/src/mem/tport.hh
+++ b/src/mem/tport.hh
@@ -60,7 +60,7 @@ class SimpleTimingPort : public Port
   protected:
     /** A list of outgoing timing response packets that haven't been
      * serviced yet. */
-    std::list<Packet*> transmitList;
+    std::list<PacketPtr> transmitList;
 
     /**
      * This class is used to implemented sendTiming() with a delay. When
@@ -71,10 +71,10 @@ class SimpleTimingPort : public Port
     class SendEvent : public Event
     {
         SimpleTimingPort *port;
-        Packet *packet;
+        PacketPtr packet;
 
       public:
-        SendEvent(SimpleTimingPort *p, Packet *pkt, Tick t)
+        SendEvent(SimpleTimingPort *p, PacketPtr pkt, Tick t)
             : Event(&mainEventQueue), port(p), packet(pkt)
         { setFlags(AutoDelete); schedule(curTick + t); }
 
@@ -95,7 +95,7 @@ class SimpleTimingPort : public Port
     Event *drainEvent;
 
     /** Schedule a sendTiming() event to be called in the future. */
-    void sendTimingLater(Packet *pkt, Tick time)
+    void sendTimingLater(PacketPtr pkt, Tick time)
     { outTiming++; new SendEvent(this, pkt, time); }
 
     /** This function is notification that the device should attempt to send a
@@ -103,10 +103,10 @@ class SimpleTimingPort : public Port
     virtual void recvRetry();
 
     /** Implemented using recvAtomic(). */
-    void recvFunctional(Packet *pkt);
+    void recvFunctional(PacketPtr pkt);
 
     /** Implemented using recvAtomic(). */
-    bool recvTiming(Packet *pkt);
+    bool recvTiming(PacketPtr pkt);
 
     /**
      * Simple ports generally don't care about any status
diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py
index 716f584b0..18b3fff55 100644
--- a/src/python/m5/SimObject.py
+++ b/src/python/m5/SimObject.py
@@ -730,9 +730,8 @@ class SimObject(object):
             # i don't know if there's a better way to do this - calling
             # setMemoryMode directly from self._ccObject results in calling
             # SimObject::setMemoryMode, not the System::setMemoryMode
-##            system_ptr = cc_main.convertToSystemPtr(self._ccObject)
-##            system_ptr.setMemoryMode(mode)
-            self._ccObject.setMemoryMode(mode)
+            system_ptr = cc_main.convertToSystemPtr(self._ccObject)
+            system_ptr.setMemoryMode(mode)
         for child in self._children.itervalues():
             child.changeTiming(mode)
 
diff --git a/src/python/m5/objects/AlphaConsole.py b/src/python/m5/objects/AlphaConsole.py
index 1c71493b1..f968aaa40 100644
--- a/src/python/m5/objects/AlphaConsole.py
+++ b/src/python/m5/objects/AlphaConsole.py
@@ -4,7 +4,7 @@ from Device import BasicPioDevice
 
 class AlphaConsole(BasicPioDevice):
     type = 'AlphaConsole'
-    cpu = Param.BaseCPU(Parent.any, "Processor")
+    cpu = Param.BaseCPU(Parent.cpu[0], "Processor")
     disk = Param.SimpleDisk("Simple Disk")
     sim_console = Param.SimConsole(Parent.any, "The Simulator Console")
     system = Param.AlphaSystem(Parent.any, "system object")
diff --git a/src/python/m5/objects/BaseCache.py b/src/python/m5/objects/BaseCache.py
index db58a177f..773a11bea 100644
--- a/src/python/m5/objects/BaseCache.py
+++ b/src/python/m5/objects/BaseCache.py
@@ -14,7 +14,6 @@ class BaseCache(MemObject):
         "This cache connects to a compressed memory")
     compression_latency = Param.Latency('0ns',
         "Latency in cycles of compression algorithm")
-    do_copy = Param.Bool(False, "perform fast copies in the cache")
     hash_delay = Param.Int(1, "time in cycles of hash access")
     lifo = Param.Bool(False,
         "whether this NIC partition should use LIFO repl. policy")
diff --git a/src/python/m5/objects/IntrControl.py b/src/python/m5/objects/IntrControl.py
index 95be0f4df..a7cf5cc84 100644
--- a/src/python/m5/objects/IntrControl.py
+++ b/src/python/m5/objects/IntrControl.py
@@ -3,4 +3,4 @@ from m5.params import *
 from m5.proxy import *
 class IntrControl(SimObject):
     type = 'IntrControl'
-    cpu = Param.BaseCPU(Parent.any, "the cpu")
+    cpu = Param.BaseCPU(Parent.cpu[0], "the cpu")
diff --git a/src/python/m5/objects/MemTest.py b/src/python/m5/objects/MemTest.py
index 83399be80..1219ddd4d 100644
--- a/src/python/m5/objects/MemTest.py
+++ b/src/python/m5/objects/MemTest.py
@@ -13,6 +13,7 @@ class MemTest(SimObject):
     percent_reads = Param.Percent(65, "target read percentage")
     percent_source_unaligned = Param.Percent(50,
         "percent of copy source address that are unaligned")
+    percent_functional = Param.Percent(50, "percent of access that are functional")
     percent_uncacheable = Param.Percent(10,
         "target uncacheable percentage")
     progress_interval = Param.Counter(1000000,
diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh
index 537bfb918..fa65b08af 100644
--- a/src/sim/eventq.hh
+++ b/src/sim/eventq.hh
@@ -120,10 +120,22 @@ class Event : public Serializable, public FastAlloc
     /// priority; these values are used to control events that need to
     /// be ordered within a cycle.
     enum Priority {
-        /// Breakpoints should happen before anything else, so we
-        /// don't miss any action when debugging.
+        /// If we enable tracing on a particular cycle, do that as the
+        /// very first thing so we don't miss any of the events on
+        /// that cycle (even if we enter the debugger).
+        Trace_Enable_Pri        = -101,
+
+        /// Breakpoints should happen before anything else (except
+        /// enabling trace output), so we don't miss any action when
+        /// debugging.
         Debug_Break_Pri		= -100,
 
+        /// CPU switches schedule the new CPU's tick event for the
+        /// same cycle (after unscheduling the old CPU's tick event).
+        /// The switch needs to come before any tick events to make
+        /// sure we don't tick both CPUs in the same cycle.
+        CPU_Switch_Pri		=   -31,
+
         /// For some reason "delayed" inter-cluster writebacks are
         /// scheduled before regular writebacks (which have default
         /// priority).  Steve?
@@ -132,12 +144,6 @@ class Event : public Serializable, public FastAlloc
         /// Default is zero for historical reasons.
         Default_Pri		=    0,
 
-        /// CPU switches schedule the new CPU's tick event for the
-        /// same cycle (after unscheduling the old CPU's tick event).
-        /// The switch needs to come before any tick events to make
-        /// sure we don't tick both CPUs in the same cycle.
-        CPU_Switch_Pri		=   -31,
-
         /// Serailization needs to occur before tick events also, so
         /// that a serialize/unserialize is identical to an on-line
         /// CPU switch.
diff --git a/src/sim/main.cc b/src/sim/main.cc
index 8bb0d7aaa..133141e57 100644
--- a/src/sim/main.cc
+++ b/src/sim/main.cc
@@ -55,6 +55,7 @@
 #include "base/statistics.hh"
 #include "base/str.hh"
 #include "base/time.hh"
+#include "config/pythonhome.hh"
 #include "cpu/base.hh"
 #include "cpu/smt.hh"
 #include "mem/mem_object.hh"
@@ -145,6 +146,11 @@ main(int argc, char **argv)
     if (setenv("PYTHONPATH", pythonpath.c_str(), true) == -1)
         fatal("setenv: %s\n", strerror(errno));
 
+    char *python_home = getenv("PYTHONHOME");
+    if (!python_home)
+        python_home = PYTHONHOME;
+    Py_SetPythonHome(python_home);
+
     // initialize embedded Python interpreter
     Py_Initialize();
     PySys_SetArgv(argc, argv);
diff --git a/src/sim/process.cc b/src/sim/process.cc
index 46ccd2596..f3e289d41 100644
--- a/src/sim/process.cc
+++ b/src/sim/process.cc
@@ -240,6 +240,41 @@ Process::sim_fd(int tgt_fd)
     return fd_map[tgt_fd];
 }
 
+void
+Process::serialize(std::ostream &os)
+{
+    SERIALIZE_SCALAR(initialContextLoaded);
+    SERIALIZE_SCALAR(brk_point);
+    SERIALIZE_SCALAR(stack_base);
+    SERIALIZE_SCALAR(stack_size);
+    SERIALIZE_SCALAR(stack_min);
+    SERIALIZE_SCALAR(next_thread_stack_base);
+    SERIALIZE_SCALAR(mmap_start);
+    SERIALIZE_SCALAR(mmap_end);
+    SERIALIZE_SCALAR(nxm_start);
+    SERIALIZE_SCALAR(nxm_end);
+    SERIALIZE_ARRAY(fd_map, MAX_FD);
+
+    pTable->serialize(os);
+}
+
+void
+Process::unserialize(Checkpoint *cp, const std::string &section)
+{
+    UNSERIALIZE_SCALAR(initialContextLoaded);
+    UNSERIALIZE_SCALAR(brk_point);
+    UNSERIALIZE_SCALAR(stack_base);
+    UNSERIALIZE_SCALAR(stack_size);
+    UNSERIALIZE_SCALAR(stack_min);
+    UNSERIALIZE_SCALAR(next_thread_stack_base);
+    UNSERIALIZE_SCALAR(mmap_start);
+    UNSERIALIZE_SCALAR(mmap_end);
+    UNSERIALIZE_SCALAR(nxm_start);
+    UNSERIALIZE_SCALAR(nxm_end);
+    UNSERIALIZE_ARRAY(fd_map, MAX_FD);
+
+    pTable->unserialize(cp, section);
+}
 
 
 //
diff --git a/src/sim/process.hh b/src/sim/process.hh
index b2777170f..5c37f725e 100644
--- a/src/sim/process.hh
+++ b/src/sim/process.hh
@@ -162,6 +162,9 @@ class Process : public SimObject
     int sim_fd(int tgt_fd);
 
     virtual void syscall(int64_t callnum, ThreadContext *tc) = 0;
+
+    void serialize(std::ostream &os);
+    void unserialize(Checkpoint *cp, const std::string &section);
 };
 
 //
diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py
index 116e71af6..2b990418c 100644
--- a/tests/configs/memtest.py
+++ b/tests/configs/memtest.py
@@ -53,7 +53,7 @@ class L2(BaseCache):
 
 #MAX CORES IS 8 with the fals sharing method
 nb_cores = 8
-cpus = [ MemTest(max_loads=1e12, percent_uncacheable=0, progress_interval=1000) for i in xrange(nb_cores) ]
+cpus = [ MemTest(atomic=False, max_loads=1e12, percent_uncacheable=10, progress_interval=1000) for i in xrange(nb_cores) ]
 
 # system simulated
 system = System(cpu = cpus, funcmem = PhysicalMemory(),
@@ -90,6 +90,6 @@ system.physmem.port = system.membus.port
 
 root = Root( system = system )
 root.system.mem_mode = 'timing'
-#root.trace.flags="Cache CachePort Bus"
-#root.trace.cycle=3810800
+#root.trace.flags="Cache CachePort MemoryAccess"
+#root.trace.cycle=1
 
diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py
index f798213db..1e6c10243 100644
--- a/tests/configs/tsunami-simple-atomic-dual.py
+++ b/tests/configs/tsunami-simple-atomic-dual.py
@@ -31,9 +31,6 @@ from m5.objects import *
 m5.AddToPath('../configs/common')
 import FSConfig
 
-AlphaConsole.cpu = Parent.cpu[0]
-IntrControl.cpu = Parent.cpu[0]
-
 cpus = [ AtomicSimpleCPU(cpu_id=i) for i in xrange(2) ]
 system = FSConfig.makeLinuxAlphaSystem('atomic')
 system.cpu = cpus
diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py
index bf94214fd..516495d18 100644
--- a/tests/configs/tsunami-simple-timing-dual.py
+++ b/tests/configs/tsunami-simple-timing-dual.py
@@ -31,9 +31,6 @@ from m5.objects import *
 m5.AddToPath('../configs/common')
 import FSConfig
 
-AlphaConsole.cpu = Parent.cpu[0]
-IntrControl.cpu = Parent.cpu[0]
-
 cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(2) ]
 system = FSConfig.makeLinuxAlphaSystem('timing')
 system.cpu = cpus
diff --git a/util/term/term.c b/util/term/term.c
index 8a95480b1..597966159 100644
--- a/util/term/term.c
+++ b/util/term/term.c
@@ -60,46 +60,49 @@ void	usage(int);
 int
 main(int argc, char *argv[])
 {
-        int ch, s, ret;
-        char *host, *port, *endp;
-        struct addrinfo hints;
-        socklen_t len;
-
-        ret = 1;
-        s = 0;
-        host = NULL;
-        port = NULL;
-        endp = NULL;
-
-        strncpy(progname, argv[0], sizeof progname);
-
-        /* Cruft to make sure options are clean, and used properly. */
-        if (argc != 3 || !argv[1] || !argv[2])
-                usage(1);
-
+    int ch, s, ret;
+    char *host, *port, *endp;
+    struct addrinfo hints;
+    socklen_t len;
+
+    ret = 1;
+    s = 0;
+    host = NULL;
+    port = NULL;
+    endp = NULL;
+
+    strncpy(progname, argv[0], sizeof progname);
+
+    /* Cruft to make sure options are clean, and used properly. */
+    if (argc == 2) {
+        host = "localhost";
+        port = argv[1];
+    } else if (argc == 3) {
         host = argv[1];
         port = argv[2];
+    } else {
+        usage(1);
+    }
 
+    if (!isatty(STDIN_FILENO))
+        errx(1, "not attached to a terminal");
 
-        if (!isatty(STDIN_FILENO))
-                errx(1, "not attached to a terminal");
+    raw_term();
 
-        raw_term();
+    /* Initialize addrinfo structure */
+    memset(&hints, 0, sizeof(struct addrinfo));
+    hints.ai_family = AF_UNSPEC;
+    hints.ai_socktype = SOCK_STREAM;
+    hints.ai_protocol = IPPROTO_TCP;
 
-        /* Initialize addrinfo structure */
-        memset(&hints, 0, sizeof(struct addrinfo));
-        hints.ai_family = AF_UNSPEC;
-        hints.ai_socktype = SOCK_STREAM;
-        hints.ai_protocol = IPPROTO_TCP;
+    s = remote_connect(host, port, hints);
+    ret = 0;
+    readwrite(s);
 
-        s = remote_connect(host, port, hints);
-        ret = 0;
-        readwrite(s);
+    if (s)
+        close(s);
 
-        if (s)
-                close(s);
-
-        exit(ret);
+    exit(ret);
 }
 
 /*
@@ -110,28 +113,28 @@ main(int argc, char *argv[])
 int
 remote_connect(char *host, char *port, struct addrinfo hints)
 {
-        struct addrinfo *res, *res0;
-        int s, error;
+    struct addrinfo *res, *res0;
+    int s, error;
 
-        if ((error = getaddrinfo(host, port, &hints, &res)))
-                errx(1, "getaddrinfo: %s", gai_strerror(error));
+    if ((error = getaddrinfo(host, port, &hints, &res)))
+        errx(1, "getaddrinfo: %s", gai_strerror(error));
 
-        res0 = res;
-        do {
-                if ((s = socket(res0->ai_family, res0->ai_socktype,
-                    res0->ai_protocol)) < 0)
-                        continue;
+    res0 = res;
+    do {
+        if ((s = socket(res0->ai_family, res0->ai_socktype,
+                        res0->ai_protocol)) < 0)
+            continue;
 
-                if (connect(s, res0->ai_addr, res0->ai_addrlen) == 0)
-                        break;
+        if (connect(s, res0->ai_addr, res0->ai_addrlen) == 0)
+            break;
 
-                close(s);
-                s = -1;
-        } while ((res0 = res0->ai_next) != NULL);
+        close(s);
+        s = -1;
+    } while ((res0 = res0->ai_next) != NULL);
 
-        freeaddrinfo(res);
+    freeaddrinfo(res);
 
-        return (s);
+    return (s);
 }
 
 /*
@@ -141,79 +144,79 @@ remote_connect(char *host, char *port, struct addrinfo hints)
 void
 readwrite(int nfd)
 {
-        struct pollfd pfd[2];
-        char buf[BUFSIZ];
-        int wfd = fileno(stdin), n, ret;
-        int lfd = fileno(stdout);
-        int escape = 0;
-
-        /* Setup Network FD */
-        pfd[0].fd = nfd;
-        pfd[0].events = POLLIN;
-
-        /* Setup STDIN FD */
-        pfd[1].fd = wfd;
-        pfd[1].events = POLLIN;
-
-        while (pfd[0].fd != -1) {
-                if ((n = poll(pfd, 2, -1)) < 0) {
-                        close(nfd);
-                        err(1, "Polling Error");
-                }
+    struct pollfd pfd[2];
+    char buf[BUFSIZ];
+    int wfd = fileno(stdin), n, ret;
+    int lfd = fileno(stdout);
+    int escape = 0;
+
+    /* Setup Network FD */
+    pfd[0].fd = nfd;
+    pfd[0].events = POLLIN;
+
+    /* Setup STDIN FD */
+    pfd[1].fd = wfd;
+    pfd[1].events = POLLIN;
+
+    while (pfd[0].fd != -1) {
+        if ((n = poll(pfd, 2, -1)) < 0) {
+            close(nfd);
+            err(1, "Polling Error");
+        }
 
-                if (n == 0)
-                        return;
+        if (n == 0)
+            return;
+
+        if (pfd[0].revents & POLLIN) {
+            if ((n = read(nfd, buf, sizeof(buf))) < 0)
+                return;
+            else if (n == 0) {
+                shutdown(nfd, SHUT_RD);
+                pfd[0].fd = -1;
+                pfd[0].events = 0;
+            } else {
+                if ((ret = atomicio(write, lfd, buf, n)) != n)
+                    return;
+            }
+        }
 
-                if (pfd[0].revents & POLLIN) {
-                        if ((n = read(nfd, buf, sizeof(buf))) < 0)
-                                return;
-                        else if (n == 0) {
-                                shutdown(nfd, SHUT_RD);
-                                pfd[0].fd = -1;
-                                pfd[0].events = 0;
-                        } else {
-                                if ((ret = atomicio(write, lfd, buf, n)) != n)
-                                        return;
-                        }
+        if (pfd[1].revents & POLLIN) {
+            if ((n = read(wfd, buf, sizeof(buf))) < 0)
+                return;
+            else if (n == 0) {
+                shutdown(nfd, SHUT_WR);
+                pfd[1].fd = -1;
+                pfd[1].events = 0;
+            } else {
+                if (escape) {
+                    char buf2[] = "~";
+                    if (*buf == '.') {
+                        printf("quit!\n");
+                        return;
+                    }
+                    escape = 0;
+                    if (*buf != '~' &&
+                        (ret = atomicio(write, nfd, buf2, 1)) != n)
+                        return;
+                } else {
+                    escape = (*buf == '~');
+                    if (escape)
+                        continue;
                 }
 
-                if (pfd[1].revents & POLLIN) {
-                        if ((n = read(wfd, buf, sizeof(buf))) < 0)
-                                return;
-                        else if (n == 0) {
-                                shutdown(nfd, SHUT_WR);
-                                pfd[1].fd = -1;
-                                pfd[1].events = 0;
-                        } else {
-                                if (escape) {
-                                        char buf2[] = "~";
-                                        if (*buf == '.') {
-                                                printf("quit!\n");
-                                                return;
-                                        }
-                                        escape = 0;
-                                        if (*buf != '~' &&
-                                            (ret = atomicio(write, nfd, buf2, 1)) != n)
-                                                return;
-                                } else {
-                                        escape = (*buf == '~');
-                                        if (escape)
-                                                continue;
-                                }
-
-                                if ((ret = atomicio(write, nfd, buf, n)) != n)
-                                        return;
-                        }
-                }
+                if ((ret = atomicio(write, nfd, buf, n)) != n)
+                    return;
+            }
         }
+    }
 }
 
 void
 usage(int ret)
 {
-        fprintf(stderr, "usage: %s hostname port\n", progname);
-        if (ret)
-                exit(1);
+    fprintf(stderr, "usage: %s hostname port\n", progname);
+    if (ret)
+        exit(1);
 }
 
 /*
@@ -247,22 +250,22 @@ usage(int ret)
 ssize_t
 atomicio(ssize_t (*f) (), int fd, void *_s, size_t n)
 {
-        char *s = _s;
-        ssize_t res, pos = 0;
-
-        while (n > pos) {
-                res = (f) (fd, s + pos, n - pos);
-                switch (res) {
-                case -1:
-                        if (errno == EINTR || errno == EAGAIN)
-                                continue;
-                case 0:
-                        return (res);
-                default:
-                        pos += res;
-                }
+    char *s = _s;
+    ssize_t res, pos = 0;
+
+    while (n > pos) {
+        res = (f) (fd, s + pos, n - pos);
+        switch (res) {
+          case -1:
+            if (errno == EINTR || errno == EAGAIN)
+                continue;
+          case 0:
+            return (res);
+          default:
+            pos += res;
         }
-        return (pos);
+    }
+    return (pos);
 }
 
 /*
@@ -284,28 +287,28 @@ atomicio(ssize_t (*f) (), int fd, void *_s, size_t n)
 void
 raw_term()
 {
-        struct termios ios;
+    struct termios ios;
 
-        if (tcgetattr(STDIN_FILENO, &ios) < 0)
-            errx(1, "tcgetagttr\n");
+    if (tcgetattr(STDIN_FILENO, &ios) < 0)
+        errx(1, "tcgetagttr\n");
 
-        memcpy(&saved_ios, &ios, sizeof(struct termios));
+    memcpy(&saved_ios, &ios, sizeof(struct termios));
 
-        ios.c_iflag &= ~(ISTRIP|ICRNL|IGNCR|ICRNL|IXOFF|IXON);
-        ios.c_oflag &= ~(OPOST);
-        ios.c_oflag &= (ONLCR);
-        ios.c_lflag &= ~(ISIG|ICANON|ECHO);
-        ios.c_cc[VMIN] = 1;
-        ios.c_cc[VTIME] = 0;
+    ios.c_iflag &= ~(ISTRIP|ICRNL|IGNCR|ICRNL|IXOFF|IXON);
+    ios.c_oflag &= ~(OPOST);
+    ios.c_oflag &= (ONLCR);
+    ios.c_lflag &= ~(ISIG|ICANON|ECHO);
+    ios.c_cc[VMIN] = 1;
+    ios.c_cc[VTIME] = 0;
 
-        if (tcsetattr(STDIN_FILENO, TCSANOW, &ios) < 0)
-            errx(1, "tcsetattr\n");
+    if (tcsetattr(STDIN_FILENO, TCSANOW, &ios) < 0)
+        errx(1, "tcsetattr\n");
 
-        atexit(restore_term);
+    atexit(restore_term);
 }
 
 void
 restore_term()
 {
-        tcsetattr(STDIN_FILENO, TCSANOW, &saved_ios);
+    tcsetattr(STDIN_FILENO, TCSANOW, &saved_ios);
 }